In [1]:
from __future__ import print_function
import keras
from keras.models  import Sequential 
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import backend as K
from keras import optimizers
from keras.datasets import mnist
from keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
batch_size = 128
num_classes = 10
epochs = 30

train_data= pd.read_csv('optdigits-training.txt', header = None)
test_data = pd.read_csv('optdigits-test.txt', header = None)
y_train = np.array(train_data.iloc[:,-1])
y_test = np.array(test_data.iloc[:,-1])
x_train = np.array(train_data.iloc[:,:-1])
x_test = np.array(test_data.iloc[:,:-1])

#rescale
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [4]:
seed = 10
np.random.seed(seed)
x_train,x_valid,train_label,valid_label = train_test_split(x_train, y_train, test_size=0.2, random_state=13)
print('x_train shape:', x_train.shape)
print('x_valid shape:', x_valid.shape)
print(x_train.shape[0], 'train samples')
print(x_valid.shape[0], 'validation sample')

x_train shape: (3058, 64)
x_valid shape: (765, 64)
3058 train samples
765 validation sample


In [None]:
## One layer, cross-entropy with reLU and stochastic gradient descent

train = []
test = []
learning_rate = [0.03, 0.05, 0.07, 0.09]
momentum = [0.95, 0.97, 0.99]
number_of_units = [128, 256, 512]
print('number of layers: 1')
for LR in learning_rate:
    for MT in momentum:
        for units in number_of_units:
            sgd = optimizers.SGD(lr = LR, decay = 5e-6, momentum = MT, nesterov = True)
            model11 = Sequential()

#This is the first layer
#The first layer in a Sequential model (and only the first, 
#because following layers can do automatic shape inference) needs 
#to receive information about its input shape
            model11.add(Dense(units, activation='relu', input_shape=(64,)))
#helps prevent overfitting
            model11.add(Dropout(0.2))
#hidden units activation function should be reLU or tanh
            model11.add(Dense(units, activation='relu'))
            model11.add(Dropout(0.2))
#Output layer, stays the same
            model11.add(Dense(num_classes, activation='softmax'))

            #model11.summary()

            model11.compile(loss = 'categorical_crossentropy',
                      optimizer = sgd,
                      metrics=['accuracy'])

            early_stopping_monitor = EarlyStopping(patience=2)

            history = model11.fit(x_train, train_label,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose = 0,
                      validation_data=(x_valid, valid_label),
                      callbacks=[early_stopping_monitor])
            
            test_eval = model11.evaluate(x_test, y_test, verbose=0)
            
            accuracy = history.history['acc']
            val_accuracy = history.history['val_acc']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            
            
            
            #print('Test loss:', test_eval[0])
            if accuracy[-1] > test_eval[1]:
                train.append(accuracy[-1])
                test.append(test_eval[1])
                print('Learning rate: ' + str(LR) + 
                  ', momentum: ' + str(MT) + 
                  ', number of units: ' + str(units) + 
                  ', train accuracy: '+ str(round(accuracy[-1], 4)) + 
                  ' and Test accuracy:', round(test_eval[1], 4))
                #training class accuracy and confusion matrix
                print(' ')
                print('Class accuracy and confusion matrix for training'.format('foo'))
                Y_train_label = np.argmax(train_label, axis = 1)
                y_train_fit = model11.predict_classes(x_train)
                print(classification_report(Y_train_label, y_train_fit))
                print(pd.DataFrame(confusion_matrix(Y_train_label, y_train_fit)))
                #test class accuracy and confusion matrix 
                print('Class accuracy and confusion matrix for test')
                Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
                y_pred = model11.predict_classes(x_test)
                print(classification_report(Y_test, y_pred))
                print(pd.DataFrame(confusion_matrix(Y_test, y_pred)))

In [None]:
## Two layers, Cross-entropy with reLU and stochastic gradient descent
train = []
test = []
learning_rate = [0.03, 0.05, 0.07, 0.09]
momentum = [0.95, 0.97, 0.99]
number_of_units = [128, 256, 512]
print('number of layers: 2')
for LR in learning_rate:
    for MT in momentum:
        for units in number_of_units:
            sgd = optimizers.SGD(lr = LR, decay = 5e-6, momentum = MT, nesterov = True)
            model12 = Sequential()

#This is the first layer
#The first layer in a Sequential model (and only the first, 
#because following layers can do automatic shape inference) needs 
#to receive information about its input shape
            model12.add(Dense(units, activation='relu', input_shape=(64,)))
#helps prevent overfitting
            model12.add(Dropout(0.2))
#hidden units activation function should be reLU or tanh
            model12.add(Dense(units, activation='relu'))
            model12.add(Dropout(0.2))
            model12.add(Dense(units, activation='relu'))
            model12.add(Dropout(0.2))
#Output layer, stays the same
            model12.add(Dense(num_classes, activation='softmax'))

            #model12.summary()

            model12.compile(loss = 'categorical_crossentropy',
                      optimizer = sgd,
                      metrics=['accuracy'])

            early_stopping_monitor = EarlyStopping(patience=2)

            history = model12.fit(x_train, train_label,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose = 0,
                      validation_data=(x_valid, valid_label),
                      callbacks=[early_stopping_monitor])
            
            test_eval = model12.evaluate(x_test, y_test, verbose=0)
            
            accuracy = history.history['acc']
            val_accuracy = history.history['val_acc']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            
            
            
            #print('Test loss:', test_eval[0])
            if accuracy[-1] > test_eval[1]:
                train.append(accuracy[-1])
                test.append(test_eval[1])
                print('Learning rate: ' + str(LR) + 
                  ', momentum: ' + str(MT) + 
                  ', number of units: ' + str(units) + 
                  ', train accuracy: '+ str(round(accuracy[-1], 4)) + 
                  ' and Test accuracy:', round(test_eval[1], 4))
                #training class accuracy and confusion matrix
                print(' ')
                print('Class accuracy and confusion matrix for training'.format('foo'))
                Y_train_label = np.argmax(train_label, axis = 1)
                y_train_fit = model12.predict_classes(x_train)
                print(classification_report(Y_train_label, y_train_fit))
                print(pd.DataFrame(confusion_matrix(Y_train_label, y_train_fit)))
                #test class accuracy and confusion matrix 
                print('Class accuracy and confusion matrix for test')
                Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
                y_pred = model12.predict_classes(x_test)
                print(classification_report(Y_test, y_pred))
                print(pd.DataFrame(confusion_matrix(Y_test, y_pred)))

In [None]:
## Three layers, Cross-entropy with reLU and stochastic gradient descent
train = []
test = []
learning_rate = [0.003, 0.007, 0.01, 0.03]
momentum = [0.95, 0.97, 0.99]
number_of_units = [128, 256, 512]
print('number of layers: 3')
for LR in learning_rate:
    for MT in momentum:
        for units in number_of_units:
            sgd = optimizers.SGD(lr = LR, decay = 5e-6, momentum = MT, nesterov = True)
            model13 = Sequential()

#This is the first layer
#The first layer in a Sequential model (and only the first, 
#because following layers can do automatic shape inference) needs 
#to receive information about its input shape
            model13.add(Dense(units, activation='relu', input_shape=(64,)))
#helps prevent overfitting
            model13.add(Dropout(0.2))
#hidden units activation function should be reLU or tanh
            model13.add(Dense(units, activation='relu'))
            model13.add(Dropout(0.2))
            model13.add(Dense(units, activation='relu'))
            model13.add(Dropout(0.2))
            model13.add(Dense(units, activation='relu'))
            model13.add(Dropout(0.2))
#Output layer, stays the same
            model13.add(Dense(num_classes, activation='softmax'))

            #model13.summary()

            model13.compile(loss = 'categorical_crossentropy',
                      optimizer = sgd,
                      metrics=['accuracy'])

            early_stopping_monitor = EarlyStopping(patience=2)

            history = model13.fit(x_train, train_label,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose = 0,
                      validation_data=(x_valid, valid_label),
                      callbacks=[early_stopping_monitor])
            
            test_eval = model13.evaluate(x_test, y_test, verbose=0)
            
            accuracy = history.history['acc']
            val_accuracy = history.history['val_acc']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            
            
            
            #print('Test loss:', test_eval[0])
            if accuracy[-1] > test_eval[1]:
                train.append(accuracy[-1])
                test.append(test_eval[1])
                print('Learning rate: ' + str(LR) + 
                  ', momentum: ' + str(MT) + 
                  ', number of units: ' + str(units) + 
                  ', train accuracy: '+ str(round(accuracy[-1], 4)) + 
                  ' and Test accuracy:', round(test_eval[1], 4))
                #training class accuracy and confusion matrix
                print(' ')
                print('Class accuracy and confusion matrix for training'.format('foo'))
                Y_train_label = np.argmax(train_label, axis = 1)
                y_train_fit = model13.predict_classes(x_train)
                print(classification_report(Y_train_label, y_train_fit))
                print(pd.DataFrame(confusion_matrix(Y_train_label, y_train_fit)))
                #test class accuracy and confusion matrix 
                print('Class accuracy and confusion matrix for test')
                Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
                y_pred = model13.predict_classes(x_test)
                print(classification_report(Y_test, y_pred))
                print(pd.DataFrame(confusion_matrix(Y_test, y_pred)))

In [None]:
## One layer, Sum of squared with reLU and stochastic gradient descent
train = []
test = []
learning_rate = [0.3, 0.5, 0.7, 0.9]
momentum = [0.95, 0.97, 0.99]
number_of_units = [128, 256, 512]
print('number of layers: 1')
for LR in learning_rate:
    for MT in momentum:
        for units in number_of_units:
            sgd = optimizers.SGD(lr = LR, decay = 5e-6, momentum = MT, nesterov = True)
            model21 = Sequential()

#This is the first layer
#The first layer in a Sequential model (and only the first, 
#because following layers can do automatic shape inference) needs 
#to receive information about its input shape
            model21.add(Dense(units, activation='relu', input_shape=(64,)))
#helps prevent overfitting
            model21.add(Dropout(0.2))
#hidden units activation function should be reLU or tanh
            model21.add(Dense(units, activation='relu'))
            model21.add(Dropout(0.2))
#Output layer, stays the same
            model21.add(Dense(num_classes, activation='softmax'))

            #model21.summary()

            model21.compile(loss = 'mean_squared_error',
                      optimizer = sgd,
                      metrics=['accuracy'])

            early_stopping_monitor = EarlyStopping(patience=2)

            history = model21.fit(x_train, train_label,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose = 0,
                      validation_data=(x_valid, valid_label),
                      callbacks=[early_stopping_monitor])
            
            test_eval = model21.evaluate(x_test, y_test, verbose=0)
            
            accuracy = history.history['acc']
            val_accuracy = history.history['val_acc']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            
            
            
            #print('Test loss:', test_eval[0])
            if accuracy[-1] > test_eval[1]:
                train.append(accuracy[-1])
                test.append(test_eval[1])
                print('Learning rate: ' + str(LR) + 
                  ', momentum: ' + str(MT) + 
                  ', number of units: ' + str(units) + 
                  ', train accuracy: '+ str(round(accuracy[-1], 4)) + 
                  ' and Test accuracy:', round(test_eval[1], 4))
                #training class accuracy and confusion matrix
                print(' ')
                print('Class accuracy and confusion matrix for training'.format('foo'))
                Y_train_label = np.argmax(train_label, axis = 1)
                y_train_fit = model21.predict_classes(x_train)
                print(classification_report(Y_train_label, y_train_fit))
                print(pd.DataFrame(confusion_matrix(Y_train_label, y_train_fit)))
                #test class accuracy and confusion matrix 
                print('Class accuracy and confusion matrix for test')
                Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
                y_pred = model21.predict_classes(x_test)
                print(classification_report(Y_test, y_pred))
                print(pd.DataFrame(confusion_matrix(Y_test, y_pred)))

In [None]:
## Two layers, sum of squared with reLU and stochastic gradient descent
train = []
test = []
learning_rate = [0.3, 0.5, 0.7, 0.9]
momentum = [0.95, 0.97, 0.99]
number_of_units = [128, 256, 512]
print('number of layers: 2')
for LR in learning_rate:
    for MT in momentum:
        for units in number_of_units:
            sgd = optimizers.SGD(lr = LR, decay = 5e-6, momentum = MT, nesterov = True)
            model22 = Sequential()

#This is the first layer
#The first layer in a Sequential model (and only the first, 
#because following layers can do automatic shape inference) needs 
#to receive information about its input shape
            model22.add(Dense(units, activation='relu', input_shape=(64,)))
#helps prevent overfitting
            model22.add(Dropout(0.2))
#hidden units activation function should be reLU or tanh
            model22.add(Dense(units, activation='relu'))
            model22.add(Dropout(0.2))
            model22.add(Dense(units, activation='relu'))
            model22.add(Dropout(0.2))
#Output layer, stays the same
            model22.add(Dense(num_classes, activation='softmax'))

            #model22.summary()

            model22.compile(loss = 'mean_squared_error',
                      optimizer = sgd,
                      metrics=['accuracy'])

            early_stopping_monitor = EarlyStopping(patience=2)

            history = model22.fit(x_train, train_label,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose = 0,
                      validation_data=(x_valid, valid_label),
                      callbacks=[early_stopping_monitor])
            
            test_eval = model22.evaluate(x_test, y_test, verbose=0)
            
            accuracy = history.history['acc']
            val_accuracy = history.history['val_acc']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            
            
            
            #print('Test loss:', test_eval[0])
            if accuracy[-1] > test_eval[1]:
                train.append(accuracy[-1])
                test.append(test_eval[1])
                print('Learning rate: ' + str(LR) + 
                  ', momentum: ' + str(MT) + 
                  ', number of units: ' + str(units) + 
                  ', train accuracy: '+ str(round(accuracy[-1], 4)) + 
                  ' and Test accuracy:', round(test_eval[1], 4))
                #training class accuracy and confusion matrix
                print(' ')
                print('Class accuracy and confusion matrix for training'.format('foo'))
                Y_train_label = np.argmax(train_label, axis = 1)
                y_train_fit = model22.predict_classes(x_train)
                print(classification_report(Y_train_label, y_train_fit))
                print(pd.DataFrame(confusion_matrix(Y_train_label, y_train_fit)))
                #test class accuracy and confusion matrix 
                print('Class accuracy and confusion matrix for test')
                Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
                y_pred = model22.predict_classes(x_test)
                print(classification_report(Y_test, y_pred))
                print(pd.DataFrame(confusion_matrix(Y_test, y_pred)))

In [None]:
## Three layers, sum of squared with reLU and stochastic gradient descent
train = []
test = []
learning_rate = [0.3, 0.5, 0.7, 0.9]
momentum = [0.95, 0.97, 0.99]
number_of_units = [128, 256, 512]
print('number of layers: 2')
for LR in learning_rate:
    for MT in momentum:
        for units in number_of_units:
            sgd = optimizers.SGD(lr = LR, decay = 5e-6, momentum = MT, nesterov = True)
            model23 = Sequential()

#This is the first layer
#The first layer in a Sequential model (and only the first, 
#because following layers can do automatic shape inference) needs 
#to receive information about its input shape
            model23.add(Dense(units, activation='relu', input_shape=(64,)))
#helps prevent overfitting
            model23.add(Dropout(0.2))
#hidden units activation function should be reLU or tanh
            model23.add(Dense(units, activation='relu'))
            model23.add(Dropout(0.2))
            model23.add(Dense(units, activation='relu'))
            model23.add(Dropout(0.2))
            model23.add(Dense(units, activation='relu'))
            model23.add(Dropout(0.2))
#Output layer, stays the same
            model23.add(Dense(num_classes, activation='softmax'))

            #model23.summary()

            model23.compile(loss = 'mean_squared_error',
                      optimizer = sgd,
                      metrics=['accuracy'])

            early_stopping_monitor = EarlyStopping(patience=2)

            history = model23.fit(x_train, train_label,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose = 0,
                      validation_data=(x_valid, valid_label),
                      callbacks=[early_stopping_monitor])
            
            test_eval = model23.evaluate(x_test, y_test, verbose=0)
            
            accuracy = history.history['acc']
            val_accuracy = history.history['val_acc']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            
            
            
            #print('Test loss:', test_eval[0])
            if accuracy[-1] > test_eval[1]:
                train.append(accuracy[-1])
                test.append(test_eval[1])
                print('Learning rate: ' + str(LR) + 
                  ', momentum: ' + str(MT) + 
                  ', number of units: ' + str(units) + 
                  ', train accuracy: '+ str(round(accuracy[-1], 4)) + 
                  ' and Test accuracy:', round(test_eval[1], 4))
                #training class accuracy and confusion matrix
                print(' ')
                print('Class accuracy and confusion matrix for training'.format('foo'))
                Y_train_label = np.argmax(train_label, axis = 1)
                y_train_fit = model23.predict_classes(x_train)
                print(classification_report(Y_train_label, y_train_fit))
                print(pd.DataFrame(confusion_matrix(Y_train_label, y_train_fit)))
                #test class accuracy and confusion matrix 
                print('Class accuracy and confusion matrix for test')
                Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
                y_pred = model23.predict_classes(x_test)
                print(classification_report(Y_test, y_pred))
                print(pd.DataFrame(confusion_matrix(Y_test, y_pred)))

In [None]:
## One layer, Cross-entropy with tanh and stochastic gradient descent
train = []
test = []
learning_rate = [0.03, 0.05, 0.07, 0.09]
momentum = [0.95, 0.97, 0.99]
number_of_units = [128, 256, 512]
print('number of layers: 1')
for LR in learning_rate:
    for MT in momentum:
        for units in number_of_units:
            sgd = optimizers.SGD(lr = LR, decay = 5e-6, momentum = MT, nesterov = True)
            model31 = Sequential()

#This is the first layer
#The first layer in a Sequential model (and only the first, 
#because following layers can do automatic shape inference) needs 
#to receive information about its input shape
            model31.add(Dense(units, activation='relu', input_shape=(64,)))
#helps prevent overfitting
            model31.add(Dropout(0.2))
#hidden units activation function should be reLU or tanh
            model31.add(Dense(units, activation='tanh'))
            model31.add(Dropout(0.2))
#Output layer, stays the same
            model31.add(Dense(num_classes, activation='softmax'))

            #model31.summary()

            model31.compile(loss = 'categorical_crossentropy',
                      optimizer = sgd,
                      metrics=['accuracy'])

            early_stopping_monitor = EarlyStopping(patience=2)

            history = model31.fit(x_train, train_label,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose = 0,
                      validation_data=(x_valid, valid_label),
                      callbacks=[early_stopping_monitor])
            
            test_eval = model31.evaluate(x_test, y_test, verbose=0)
            
            accuracy = history.history['acc']
            val_accuracy = history.history['val_acc']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            
            
            
            #print('Test loss:', test_eval[0])
            if accuracy[-1] > test_eval[1]:
                train.append(accuracy[-1])
                test.append(test_eval[1])
                print('Learning rate: ' + str(LR) + 
                  ', momentum: ' + str(MT) + 
                  ', number of units: ' + str(units) + 
                  ', train accuracy: '+ str(round(accuracy[-1], 4)) + 
                  ' and Test accuracy:', round(test_eval[1], 4))
                #training class accuracy and confusion matrix
                print(' ')
                print('Class accuracy and confusion matrix for training'.format('foo'))
                Y_train_label = np.argmax(train_label, axis = 1)
                y_train_fit = model31.predict_classes(x_train)
                print(classification_report(Y_train_label, y_train_fit))
                print(pd.DataFrame(confusion_matrix(Y_train_label, y_train_fit)))
                #test class accuracy and confusion matrix 
                print('Class accuracy and confusion matrix for test')
                Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
                y_pred = model31.predict_classes(x_test)
                print(classification_report(Y_test, y_pred))
                print(pd.DataFrame(confusion_matrix(Y_test, y_pred)))

In [None]:
## Two layers, Cross-entropy with tanh and stochastic gradient descent
train = []
test = []
learning_rate = [0.03, 0.05, 0.07, 0.09]
momentum = [0.95, 0.97, 0.99]
number_of_units = [128, 256, 512]
print('number of layers: 2')
for LR in learning_rate:
    for MT in momentum:
        for units in number_of_units:
            sgd = optimizers.SGD(lr = LR, decay = 5e-6, momentum = MT, nesterov = True)
            model32 = Sequential()

#This is the first layer
#The first layer in a Sequential model (and only the first, 
#because following layers can do automatic shape inference) needs 
#to receive information about its input shape
            model32.add(Dense(units, activation='relu', input_shape=(64,)))
#helps prevent overfitting
            model32.add(Dropout(0.2))
#hidden units activation function should be reLU or tanh
            model32.add(Dense(units, activation='tanh'))
            model32.add(Dropout(0.2))
            model32.add(Dense(units, activation='tanh'))
            model32.add(Dropout(0.2))
#Output layer, stays the same
            model32.add(Dense(num_classes, activation='softmax'))

            #model32.summary()

            model32.compile(loss = 'categorical_crossentropy',
                      optimizer = sgd,
                      metrics=['accuracy'])

            early_stopping_monitor = EarlyStopping(patience=2)

            history = model32.fit(x_train, train_label,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose = 0,
                      validation_data=(x_valid, valid_label),
                      callbacks=[early_stopping_monitor])
            
            test_eval = model32.evaluate(x_test, y_test, verbose=0)
            
            accuracy = history.history['acc']
            val_accuracy = history.history['val_acc']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            
            
            
            #print('Test loss:', test_eval[0])
            if accuracy[-1] > test_eval[1]:
                train.append(accuracy[-1])
                test.append(test_eval[1])
                print('Learning rate: ' + str(LR) + 
                  ', momentum: ' + str(MT) + 
                  ', number of units: ' + str(units) + 
                  ', train accuracy: '+ str(round(accuracy[-1], 4)) + 
                  ' and Test accuracy:', round(test_eval[1], 4))
                #training class accuracy and confusion matrix
                print(' ')
                print('Class accuracy and confusion matrix for training'.format('foo'))
                Y_train_label = np.argmax(train_label, axis = 1)
                y_train_fit = model32.predict_classes(x_train)
                print(classification_report(Y_train_label, y_train_fit))
                print(pd.DataFrame(confusion_matrix(Y_train_label, y_train_fit)))
                #test class accuracy and confusion matrix 
                print('Class accuracy and confusion matrix for test')
                Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
                y_pred = model32.predict_classes(x_test)
                print(classification_report(Y_test, y_pred))
                print(pd.DataFrame(confusion_matrix(Y_test, y_pred)))

In [None]:
## Three layers, Cross-entropy with tanh and stochastic gradient descent
train = []
test = []
learning_rate = [0.005, 0.007, 0.01, 0.09]
momentum = [0.95, 0.97, 0.99]
number_of_units = [128, 256, 512]
print('number of layers: 3')
for LR in learning_rate:
    for MT in momentum:
        for units in number_of_units:
            sgd = optimizers.SGD(lr = LR, decay = 5e-6, momentum = MT, nesterov = True)
            model33 = Sequential()

#This is the first layer
#The first layer in a Sequential model (and only the first, 
#because following layers can do automatic shape inference) needs 
#to receive information about its input shape
            model33.add(Dense(units, activation='relu', input_shape=(64,)))
#helps prevent overfitting
            model33.add(Dropout(0.2))
#hidden units activation function should be reLU or tanh
            model33.add(Dense(units, activation='tanh'))
            model33.add(Dropout(0.2))
            model33.add(Dense(units, activation='tanh'))
            model33.add(Dropout(0.2))
            model33.add(Dense(units, activation='tanh'))
            model33.add(Dropout(0.2))
#Output layer, stays the same
            model33.add(Dense(num_classes, activation='softmax'))

            #model33.summary()

            model33.compile(loss = 'categorical_crossentropy',
                      optimizer = sgd,
                      metrics=['accuracy'])

            early_stopping_monitor = EarlyStopping(patience=2)

            history = model33.fit(x_train, train_label,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose = 0,
                      validation_data=(x_valid, valid_label),
                      callbacks=[early_stopping_monitor])
            
            test_eval = model33.evaluate(x_test, y_test, verbose=0)
            
            accuracy = history.history['acc']
            val_accuracy = history.history['val_acc']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            
            
            
            #print('Test loss:', test_eval[0])
            if accuracy[-1] > test_eval[1]:
                train.append(accuracy[-1])
                test.append(test_eval[1])
                print('Learning rate: ' + str(LR) + 
                  ', momentum: ' + str(MT) + 
                  ', number of units: ' + str(units) + 
                  ', train accuracy: '+ str(round(accuracy[-1], 4)) + 
                  ' and Test accuracy:', round(test_eval[1], 4))
                #training class accuracy and confusion matrix
                print(' ')
                #print('Class accuracy and confusion matrix for training'.format('foo'))
                #Y_train_label = np.argmax(train_label, axis = 1)
                #y_train_fit = model33.predict_classes(x_train)
                #print(classification_report(Y_train_label, y_train_fit))
                #print(pd.DataFrame(confusion_matrix(Y_train_label, y_train_fit)))
                #test class accuracy and confusion matrix 
                #print('Class accuracy and confusion matrix for test')
                #Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
                #y_pred = model33.predict_classes(x_test)
                #print(classification_report(Y_test, y_pred))
                #print(pd.DataFrame(confusion_matrix(Y_test, y_pred)))

In [None]:
accuracy = history.history['acc']
val_accuracy = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()