In [107]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import numpy as np
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, accuracy_score
import pandas as pd
import matplotlib.pyplot as plt
import time
import pickle
from sklearn.model_selection import StratifiedKFold

In [146]:
def initialize_nn(frame_size, n_hid, lr):
    model = Sequential() # The Keras Sequential model is a linear stack of layers.
    model.add(Dense(n_hid, kernel_initializer='uniform', input_dim=frame_size)) # Dense layer
    model.add(Activation('tanh')) # Activation layer
    model.add(Dropout(0.5)) # Dropout layer
    model.add(Dense(n_hid, kernel_initializer='uniform')) # Another dense layer
    model.add(Activation('tanh')) # Another activation layer
    model.add(Dropout(0.5)) # Another dropout layer
    model.add(Dense(12, init='uniform')) # Last dense layer
    model.add(Activation('softmax')) # Softmax activation at the end
    sgd = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True) # Using Nesterov momentum
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['categorical_accuracy']) # Using logloss
    return model

In [147]:
def get_data():
    file = pd.read_csv('embed_ikea_intent.csv')
    features = file.drop(['index', 'intent'], axis=1).values
    labels = file['intent'].values
    n_frames, frame_size = features.shape
    return features, labels, n_frames, frame_size

In [148]:
features, labels, n_frames, frame_size = get_data()

In [149]:
unique_labels = set(labels)
label_dictionary = {}
label_int_value = 0
for label in unique_labels:
    if not label in label_dictionary:
        label_dictionary[label]=label_int_value
        label_int_value+=1
new_labels = []
for label in labels:
    new_labels.append(label_dictionary[label])
new_labels = np.array(new_labels)
records = []
avg_records = []

In [163]:
for i in range(200,201):
    print('number of hidden nodes = ', i)
    n_hid = i
    lr = 0.1
    num_splits = 10
    mlp_model = initialize_nn(frame_size, n_hid, lr)
    skf = StratifiedKFold(n_splits=num_splits)
    avg_acc = 0
    avg_time = 0
    for train_index, test_index in skf.split(features, new_labels):
        X_train, X_test = features[train_index], features[test_index]
        Y_train, Y_test = new_labels[train_index], new_labels[test_index]
        Y_train = to_categorical(Y_train, num_classes=12, dtype='float32')
        Y_test = to_categorical(Y_test, num_classes=12, dtype='float32')
        print('Start training')
        start = time.time()
        mlp_model.fit(X_train, Y_train, validation_split=0.1, batch_size=32, epochs=20, verbose=2, shuffle=True)
        end = time.time()
        print('End training')
        predictions = mlp_model.predict(X_test)
        predictions = (predictions>0.5)
        time_taken = end-start
        score = accuracy_score(Y_test, predictions)
        print('accuracy = ', float('{0:.3f}'.format(score)), 'time_taken = ', float('{0:.3f}'.format(time_taken)))
        avg_acc+=score
        avg_time+=time_taken
        records.append([n_hid, lr, float('{0:.3f}'.format(score)), float('{0:.3f}'.format(time_taken))])
    avg_acc /= num_splits
    avg_time /= num_splits
    print('Accuracy Final = ', float('{0:.3f}'.format(avg_acc)), 'Time_taken average = ', float('{0:.3f}'.format(avg_time)))
    temp = [n_hid, lr, float('{0:.3f}'.format(avg_acc)), float('{0:.3f}'.format(avg_time))]
    avg_records.append(temp)
    with open('avg_records.csv', 'a') as fd:
        fd.write(temp)
    name_of_file = 'mlp_'+str(n_hid)+'_ikea.pkl'
    with open(name_of_file, 'wb') as fid:
        pickle.dump(mlp_model, fid)

number of hidden nodes =  200


  if __name__ == '__main__':


Start training
Train on 3762 samples, validate on 418 samples
Epoch 1/20
 - 5s - loss: 2.0080 - categorical_accuracy: 0.2730 - val_loss: 2.1543 - val_categorical_accuracy: 0.5383
Epoch 2/20
 - 1s - loss: 1.7525 - categorical_accuracy: 0.3884 - val_loss: 2.0175 - val_categorical_accuracy: 0.4330
Epoch 3/20
 - 1s - loss: 1.5338 - categorical_accuracy: 0.4747 - val_loss: 2.0014 - val_categorical_accuracy: 0.5789
Epoch 4/20
 - 1s - loss: 1.3534 - categorical_accuracy: 0.5473 - val_loss: 2.0914 - val_categorical_accuracy: 0.5789
Epoch 5/20
 - 1s - loss: 1.2981 - categorical_accuracy: 0.5662 - val_loss: 1.9512 - val_categorical_accuracy: 0.6196
Epoch 6/20
 - 1s - loss: 1.2261 - categorical_accuracy: 0.5984 - val_loss: 2.3520 - val_categorical_accuracy: 0.5885
Epoch 7/20
 - 1s - loss: 1.2322 - categorical_accuracy: 0.5909 - val_loss: 2.2281 - val_categorical_accuracy: 0.6005
Epoch 8/20
 - 1s - loss: 1.2002 - categorical_accuracy: 0.6063 - val_loss: 2.2343 - val_categorical_accuracy: 0.6172
Ep

 - 1s - loss: 10.5424 - categorical_accuracy: 0.3248 - val_loss: 10.4953 - val_categorical_accuracy: 0.3174
Epoch 8/20
 - 1s - loss: 11.0834 - categorical_accuracy: 0.2895 - val_loss: 11.2711 - val_categorical_accuracy: 0.3007
Epoch 9/20
 - 1s - loss: 11.2327 - categorical_accuracy: 0.2848 - val_loss: 10.1171 - val_categorical_accuracy: 0.3723
Epoch 10/20
 - 1s - loss: 10.9680 - categorical_accuracy: 0.2943 - val_loss: 5.7942 - val_categorical_accuracy: 0.5943
Epoch 11/20
 - 1s - loss: 10.7138 - categorical_accuracy: 0.3169 - val_loss: 10.1951 - val_categorical_accuracy: 0.3628
Epoch 12/20
 - 1s - loss: 10.6569 - categorical_accuracy: 0.3110 - val_loss: 10.6667 - val_categorical_accuracy: 0.3031
Epoch 13/20
 - 1s - loss: 10.6065 - categorical_accuracy: 0.3182 - val_loss: 6.4912 - val_categorical_accuracy: 0.5823
Epoch 14/20
 - 1s - loss: 10.6675 - categorical_accuracy: 0.3158 - val_loss: 8.0933 - val_categorical_accuracy: 0.4869
Epoch 15/20
 - 1s - loss: 10.6518 - categorical_accuracy:

Epoch 13/20
 - 1s - loss: 11.7489 - categorical_accuracy: 0.2666 - val_loss: 11.2711 - val_categorical_accuracy: 0.3007
Epoch 14/20
 - 1s - loss: 11.6994 - categorical_accuracy: 0.2600 - val_loss: 11.2711 - val_categorical_accuracy: 0.3007
Epoch 15/20
 - 1s - loss: 11.5626 - categorical_accuracy: 0.2648 - val_loss: 10.1556 - val_categorical_accuracy: 0.3699
Epoch 16/20
 - 1s - loss: 11.6089 - categorical_accuracy: 0.2613 - val_loss: 11.2711 - val_categorical_accuracy: 0.3007
Epoch 17/20
 - 1s - loss: 11.7112 - categorical_accuracy: 0.2632 - val_loss: 11.2711 - val_categorical_accuracy: 0.3007
Epoch 18/20
 - 1s - loss: 11.6100 - categorical_accuracy: 0.2706 - val_loss: 11.2711 - val_categorical_accuracy: 0.3007
Epoch 19/20
 - 1s - loss: 11.7102 - categorical_accuracy: 0.2571 - val_loss: 10.1556 - val_categorical_accuracy: 0.3699
Epoch 20/20
 - 1s - loss: 11.9001 - categorical_accuracy: 0.2523 - val_loss: 10.1556 - val_categorical_accuracy: 0.3699
End training
accuracy =  0.256 time_take

Epoch 19/20
 - 1s - loss: 11.6059 - categorical_accuracy: 0.2641 - val_loss: 6.6464 - val_categorical_accuracy: 0.5786
Epoch 20/20
 - 1s - loss: 11.5719 - categorical_accuracy: 0.2569 - val_loss: 12.3188 - val_categorical_accuracy: 0.2357
End training
accuracy =  0.276 time_taken =  16.53
Accuracy Final =  0.303 Time_taken average =  17.596


TypeError: write() argument must be str, not list

In [162]:
r_data = pd.DataFrame(avg_records)
r_data.to_csv('avg_records_mlp.csv', header=['n_hid', 'learn_rate', 'accuracy', 'time_taken'], index_label='index')

In [158]:
r_data

Unnamed: 0,0,1,2,3,4
0,1,1,0.1,0.000,9.090
1,1,1,0.1,0.034,6.116
2,1,1,0.1,0.024,6.094
3,1,1,0.1,0.140,6.254
4,1,1,0.1,0.114,6.118
5,1,1,0.1,0.065,5.897
6,1,1,0.1,0.019,5.746
7,1,1,0.1,0.000,9.644
8,1,1,0.1,0.002,6.093
9,1,1,0.1,0.096,5.890
