In [1]:
import pickle as pkl
import numpy as np

In [2]:
input_dim = 320

In [3]:
with open('Train_'+str(input_dim)+'.pkl', 'rb') as tr:
    X_train = pkl.load(tr)
tr.close()
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_train.shape

(87719, 1, 320)

In [4]:
with open('Val_'+str(input_dim)+'.pkl', 'rb') as val:
    X_val = pkl.load(val)
val.close()
X_val = X_val.reshape(X_val.shape[0], 1, X_val.shape[1])
X_val.shape

(998, 1, 320)

In [5]:
with open('Test_'+str(input_dim)+'.pkl', 'rb') as test:
    X_test = pkl.load(test)
val.close()
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
X_test.shape

(999, 1, 320)

In [6]:
with open('Train_labels.txt', 'r') as l:
    train_labels = l.read().split('\n')
l.close()
len(train_labels)

87719

In [7]:
with open('Val_labels.txt', 'r') as v:
    val_labels = v.read().split('\n')
v.close()
len(val_labels)

998

In [8]:
with open('Test_labels.txt', 'r') as t:
    test_labels = t.read().split('\n')
t.close()
len(test_labels)

999

In [9]:
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
enc.fit(train_labels)

LabelEncoder()

In [10]:
y_train_labels = enc.transform(train_labels)
y_train_labels.shape

(87719,)

In [11]:
y_val_labels = enc.transform(val_labels)
y_val_labels.shape

(998,)

In [12]:
y_test_labels = enc.transform(test_labels)
y_test_labels.shape

(999,)

In [13]:
import keras
from keras import backend as th
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, MaxPooling1D, LSTM, Activation, Masking, Bidirectional
from keras.layers.wrappers import TimeDistributed
from keras.layers.advanced_activations import LeakyReLU
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [14]:
y_train = to_categorical(y_train_labels)
y_train.shape

(87719, 10)

In [15]:
y_val = to_categorical(y_val_labels)
y_val.shape

(998, 10)

In [16]:
y_test = to_categorical(y_test_labels)
y_test.shape

(999, 10)

In [17]:
from sklearn.utils.class_weight import compute_class_weight

In [18]:
# Instantiate the label encoder
le = LabelEncoder()

# Fit the label encoder to our label series
le.fit(list(y_train_labels))

# Create integer based labels Series
y_integers = le.transform(list(y_train_labels))

In [19]:
class_weights = compute_class_weight('balanced', np.unique(y_train_labels), y_train_labels)
class_weights_dict = dict(zip(le.transform(list(le.classes_)), class_weights))

In [20]:
# Parameters
batch_size = 512
epochs = 100
num_classes = 10

In [21]:
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(1, input_dim)))
model.add(Bidirectional(LSTM(100)))
# model.add(Dropout(0.2))
# model.add(Dense(200, activation='relu'))
# model.add(Dropout(0.2))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='sigmoid'))

In [22]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_1 (Masking)          (None, 1, 320)            0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 200)               336800    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               20100     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 357,910
Trainable params: 357,910
Non-trainable params: 0
_________________________________________________________________
None


In [23]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [24]:
earlystop = EarlyStopping(monitor='val_acc', min_delta = 0.0001, patience=5, verbose=1, mode='auto')
callbacks_list = [earlystop]

In [25]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=callbacks_list,
          validation_data=(X_val, y_val), class_weight = class_weights_dict, verbose=1)

Train on 87719 samples, validate on 998 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 00026: early stopping


<keras.callbacks.History at 0x5427ee7048>

In [26]:
scores = model.evaluate(X_val, y_val, verbose=0)
print('Val loss:', scores[0])
print('Val accuracy:', scores[1])

Val loss: 0.6281728482915309
Val accuracy: 0.7955911822452812


In [27]:
y_pred = model.predict_classes(X_val)
confusion_matrix(y_pred, y_val_labels)

array([[72,  2,  3,  2,  3,  1,  4,  4,  3,  0],
       [ 1, 74,  2,  1,  0,  0,  0,  2,  1,  5],
       [ 4,  5, 86,  3,  1,  0,  1,  1,  2,  0],
       [12,  2,  1, 85,  0,  4,  1,  4,  0,  1],
       [ 2,  2,  0,  1, 88,  1,  0,  1,  7,  5],
       [ 1,  0,  0,  1,  0, 87,  1,  5,  1,  1],
       [ 1,  4,  0,  1,  0,  3, 85,  8,  3,  4],
       [ 3,  6,  4,  2,  0,  0,  5, 68,  3,  1],
       [ 1,  3,  2,  3,  4,  1,  1,  3, 71,  5],
       [ 3,  2,  2,  1,  4,  3,  1,  3,  9, 78]], dtype=int64)

In [28]:
test_scores = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

Test loss: 0.7225807339691364
Test accuracy: 0.7847847847847848


In [29]:
y_pred = model.predict_classes(X_test)
confusion_matrix(y_pred, y_test_labels)

array([[79,  2,  5,  3,  0,  1,  5,  6,  0,  2],
       [ 2, 76,  2,  0,  1,  0,  0,  2,  4,  3],
       [ 2,  3, 82,  1,  1,  2,  1,  3,  2,  1],
       [ 3,  7,  3, 81,  0,  0,  0,  4,  0,  0],
       [ 1,  2,  1,  3, 85,  0,  1,  3, 12,  3],
       [ 0,  0,  1,  2,  0, 87,  2,  1,  0,  2],
       [ 8,  4,  0,  5,  1,  2, 78,  8,  0,  3],
       [ 4,  1,  1,  2,  3,  4,  6, 69,  2,  2],
       [ 0,  3,  1,  0,  4,  0,  3,  0, 69,  6],
       [ 1,  2,  4,  3,  5,  4,  4,  3, 11, 78]], dtype=int64)