In [None]:
import tensorflow as tf
tf.random.set_seed(1)
tf.compat.v1.enable_eager_execution()

import numpy as np
np.random.seed(1)

import gensim
import pandas as pd
import matplotlib.pyplot as plt
import talos as ta

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import LSTM, Embedding, Dense, \
    SpatialDropout1D, Bidirectional, Conv1D, concatenate
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import Callback

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, confusion_matrix

%matplotlib inline

In [None]:
print(tf.test.gpu_device_name())

/device:GPU:0


In [None]:
df = pd.read_csv('../data/data.csv')

In [None]:
df.head()

Unnamed: 0,word,tag
0,Pengamat,O
1,politik,O
2,dari,O
3,Universitas,B-ORGANIZATION
4,Gadjah,I-ORGANIZATION


In [None]:
sentences = []
cnt = 1

for i in df.itertuples():
    sentences.append(cnt)
    
    if '.' in str(i.word):
        cnt += 1
        
df['sentence #'] = sentences
df.head()

Unnamed: 0,word,tag,sentence #
0,Pengamat,O,1
1,politik,O,1
2,dari,O,1
3,Universitas,B-ORGANIZATION,1
4,Gadjah,I-ORGANIZATION,1


In [None]:
agg_func = lambda s: [(w, t) for w, t in zip(s['word'].values.tolist(), s['tag'].values.tolist())]
grouped = df.groupby('sentence #').apply(agg_func)
sentences = [s for s in grouped]

In [None]:
words = list(set(df['word'].values))
words.append('PADDING')
num_words = len(words)
tags = list(set(df['tag'].values))
num_tags = len(tags)

In [None]:
tags = sorted([t for t in tags if t != 'O'], key=lambda x: (x[2], x[0]))
tags.append('O')

In [None]:
word2idx = {w: i for i, w in enumerate(words)}
tag2idx = {t: i for i, t in enumerate(tags)}

In [None]:
tag2idx

{'B-LOCATION': 0,
 'B-ORGANIZATION': 4,
 'B-PERSON': 8,
 'B-QUANTITY': 12,
 'B-TIME': 16,
 'I-LOCATION': 1,
 'I-ORGANIZATION': 5,
 'I-PERSON': 9,
 'I-QUANTITY': 13,
 'I-TIME': 17,
 'L-LOCATION': 2,
 'L-ORGANIZATION': 6,
 'L-PERSON': 10,
 'L-QUANTITY': 14,
 'L-TIME': 18,
 'O': 20,
 'U-LOCATION': 3,
 'U-ORGANIZATION': 7,
 'U-PERSON': 11,
 'U-QUANTITY': 15,
 'U-TIME': 19}

### Pad Sequence

In [None]:
max_len = 40
X = [[word2idx[w[0]] for w in s] for s in sentences]
X = pad_sequences(maxlen=max_len, sequences=X, padding='post', value=num_words-1)

y = [[tag2idx[w[1]] for w in s] for s in sentences]
y = pad_sequences(maxlen=max_len, sequences=y, padding='post', value=tag2idx['O'])
y = [to_categorical(i, num_classes=num_tags) for i in y]

In [None]:
x_train, x_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=1)
x_val, x_test, y_val, y_test = train_test_split(x_val_test, y_val_test, test_size=0.5, random_state=1)

### Pre-trained Word2Vec Embedding

In [None]:
model = gensim.models.Word2Vec.load("../checkpoint/w2vec_wiki_id_case")

In [None]:
embedding_matrix = np.zeros((len(words), 400))

for i, w in enumerate(words):
    try:
        embedding_vector = model.wv[w]
        embedding_matrix[i] = embedding_vector
    except:
        embedding_matrix[i] = np.random.normal(0, np.sqrt(0.25), 400)

Initial parameters

In [None]:
def f1_score_micro(y_true, y_pred):
    y_true = np.argmax(y_true.numpy(), axis=-1).reshape(-1)
    y_pred = np.argmax(y_pred.numpy(), axis=-1).reshape(-1)
    
    # label O will be ignored during training and evaluation
    o_pad_idx = np.where(y_true==20) # 20 is the idx for label O
    
    # remove label O
    y_true = np.delete(y_true, o_pad_idx)
    y_pred = np.delete(y_pred, o_pad_idx)
    
    # compute f1 score with micro average
    score = f1_score(y_true, y_pred, average='micro')
    
    return score

In [None]:
def create_model(x_train, y_train, x_val, y_val, params):
    input_sequence = Input(shape=(max_len, ))
    
    model = Embedding(
        input_dim= embedding_matrix.shape[0], 
        weights=[embedding_matrix], 
        output_dim=embedding_matrix.shape[1], 
        input_length=max_len,
        trainable=False
    )(input_sequence)
    
    # dropout layer
    model = SpatialDropout1D(params['dropout'])(model)
            
    # lstm layer
    model = Bidirectional(LSTM(units=params['lstm_units'], return_sequences=True))(model)

    # convolution layer
    model = Conv1D(params['filters'], params['filter_size'], activation='relu', padding='same')(model)
    
    # output layer
    output_sequence = Dense(num_tags, activation='softmax')(model)
    
    # model
    model = Model(input_sequence, output_sequence)
    
    # learning algorithm (optimizer)
    if params['optimizer'] == 'Nadam':
        optm = tf.keras.optimizers.Nadam(lr=params['lr'])
        
    if params['optimizer'] == 'Adam':
        optm = tf.keras.optimizers.Adam(lr=params['lr'])
        
    if params['optimizer'] == 'RMSprop':
        optm = tf.keras.optimizers.RMSprop(lr=params['lr'])
    
    loss = tf.keras.losses.CategoricalCrossentropy()

    model.compile(loss=loss, optimizer=optm, metrics=[f1_score_micro], run_eagerly=True)

    early_stopping = tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_f1_score_micro', mode='max')

    history = model.fit(
      x_train, np.array(y_train),
      validation_data=(x_val, np.array(y_val)),
      epochs=100, verbose=1, callbacks=[early_stopping], batch_size=params['batch_size']
    )

    return history, model

In [None]:
params = {
    'optimizer': ['Adam'],
    'lr': [0.01],
    'lstm_units': [50],
    'filters': [64],
    'filter_size': [3],
    'dropout': [0.1, 0.3, 0.5, 0.7],
    'batch_size': [128]
}

In [None]:
params

{'batch_size': [128],
 'dropout': [0.1, 0.3, 0.5, 0.7],
 'filter_size': [3],
 'filters': [64],
 'lr': [0.01],
 'lstm_units': [50],
 'optimizer': ['Adam']}

In [None]:
t = ta.Scan(x=x_train,
    y=np.array(y_train),
    x_val=x_val,
    y_val=np.array(y_val),
    model=create_model,
    params=params,
    experiment_name='bilstm_cnns_w2v_opt_v3', 
    val_split=None
)

  0%|          | 0/4 [00:00<?, ?it/s]

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


 25%|██▌       | 1/4 [00:36<01:50, 36.98s/it]

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100


 50%|█████     | 2/4 [01:23<01:19, 39.91s/it]

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100


 75%|███████▌  | 3/4 [02:16<00:43, 43.89s/it]

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100


100%|██████████| 4/4 [03:23<00:00, 50.86s/it]


In [None]:
# round 5
t.data[['loss', 'f1_score_micro', 'val_loss', 'val_f1_score_micro', 'batch_size']] \
    .sort_values(by=['val_f1_score_micro'], ascending=False)

Unnamed: 0,loss,f1_score_micro,val_loss,val_f1_score_micro,batch_size
0,0.044741,0.820512,0.083335,0.782582,32
1,0.032525,0.867605,0.095214,0.771599,64
2,0.041588,0.820682,0.07886,0.758756,128
3,0.044483,0.810544,0.080802,0.714837,256


In [None]:
# round 4
t.data[['loss', 'f1_score_micro', 'val_loss', 'val_f1_score_micro', 'dropout']] \
    .sort_values(by=['val_f1_score_micro'], ascending=False)

Unnamed: 0,loss,f1_score_micro,val_loss,val_f1_score_micro,dropout
3,0.06819,0.805643,0.109679,0.783496,0.7
2,0.039366,0.89027,0.142401,0.764896,0.5
1,0.021966,0.937722,0.168943,0.745104,0.3
0,0.018275,0.951331,0.16679,0.731593,0.1


In [None]:
# round 3
t.data[['loss', 'f1_score_micro', 'val_loss', 'val_f1_score_micro', 'filters', 'filter_size']] \
    .sort_values(by=['val_f1_score_micro'], ascending=False)

Unnamed: 0,loss,f1_score_micro,val_loss,val_f1_score_micro,filters,filter_size
5,0.011246,0.972493,0.174134,0.745754,64,3
3,0.012673,0.969102,0.185548,0.736608,256,2
11,0.01643,0.956099,0.18022,0.728741,256,4
1,0.021444,0.943472,0.178046,0.726668,64,2
7,0.006171,0.984916,0.236037,0.724983,256,3
6,0.011119,0.970362,0.202876,0.724653,128,3
9,0.008628,0.977795,0.192755,0.718315,64,4
4,0.01698,0.958602,0.172604,0.718132,32,3
10,0.01119,0.972511,0.176814,0.712981,128,4
2,0.011189,0.971396,0.194367,0.711627,128,2


In [None]:
# round 2
t.data[['loss', 'f1_score_micro', 'val_loss', 'val_f1_score_micro', 'lstm_units']] \
    .sort_values(by=['val_f1_score_micro'], ascending=False)

Unnamed: 0,loss,f1_score_micro,val_loss,val_f1_score_micro,lstm_units
0,0.010389,0.97245,0.199703,0.717026,50
2,0.013843,0.962516,0.175725,0.710149,200
1,0.004368,0.988726,0.228762,0.704217,100


In [None]:
# round 1
t.data[['loss', 'f1_score_micro', 'val_loss', 'val_f1_score_micro', 'optimizer', 'lr']] \
    .sort_values(by=['val_f1_score_micro'], ascending=False)

Unnamed: 0,loss,f1_score_micro,val_loss,val_f1_score_micro,optimizer,lr
1,0.007106,0.981749,0.210253,0.737883,Adam,0.01
2,0.010357,0.977084,0.215194,0.726728,RMSprop,0.01
0,0.00599,0.987432,0.21261,0.711857,Nadam,0.01
5,0.017705,0.958183,0.187242,0.688027,RMSprop,0.001
4,0.027014,0.931386,0.177369,0.685104,Adam,0.001
3,0.041103,0.888974,0.159306,0.621556,Nadam,0.001
6,0.136784,0.576052,0.171238,0.528891,Nadam,0.0001
8,0.115763,0.647499,0.162081,0.513431,RMSprop,0.0001
7,0.137029,0.574442,0.176208,0.467877,Adam,0.0001
