## Setup

In [28]:
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from sklearn.model_selection import KFold

from pyMLaux import show_img_data, plot_history, evaluate_classification_result

In [9]:
data_dir = 'splits/'

In [10]:
tf.get_logger().setLevel('ERROR')

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


## Load Interpolation and Extrapolation datasets

In [11]:
training = pd.read_csv("splits/training.csv")
training.drop(["index_number"], axis=1, inplace=True)
training

X_train = training.iloc[:, :4]
y_train = training.iloc[:, -1]

## Random Search for ANN

In [54]:
def create_hyperparams(n, 
                       lr, 
                       epoch_options, 
                       no_layers_options, 
                       hidden_layer_options,
                       dropout_options, 
                       activation_options):
    df = pd.DataFrame(index=range(n),
                      columns=['no_hidden_layers', 'hidden_layers', 'activation', 'dropout', 'lr', 'epochs'])

    for i in range(n):
        df.loc[i, 'lr'] = lr * 5.**random.uniform(-2., 2.)
        df.loc[i, 'epochs'] = random.sample(epoch_options, 1)[0]
    
        no_layers = int(random.sample(no_hidden_layers, 1)[0])
        df.loc[i, 'no_hidden_layers'] = no_layers
        df.loc[i, 'hidden_layers'] = sorted([int(random.sample(hidden_layer_options, 1)[0]) for i in range(no_layers)], reverse=True)
        df.loc[i, 'dropout'] = random.sample(dropout_options, 1)[0]
        df.loc[i, 'activation'] = random.sample(activation_options, 1)[0]
    
    return(df)

In [20]:
def create_network(hp, no_inputs, no_outputs, output_activation='linear', **kwargs):
    hidden_layers = hp['hidden_layers']
    
    dropout = hp['dropout']
    hidden_activation = hp['activation']
    
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(no_inputs, )))

    for cl in hidden_layers:
        model.add(tf.keras.layers.Dense(cl, activation=hidden_activation))
        if dropout > 0:
            model.add(tf.keras.layers.Dropout(dropout))
            
    model.add(tf.keras.layers.Dense(no_outputs, activation=output_activation)) 

    opt = tf.keras.optimizers.Adam(learning_rate=hp['lr'])

    model.compile(optimizer=opt, **kwargs)

    return(model)

In [21]:
def find_best(df, crit='ACC'):
    index = np.where(df[crit] == np.amax(df[crit]))[0]
    return(df.iloc[list(index), :])

## Perform Model Selection and Determine Best Model

### Set of 100 models

In [55]:
random.seed(4232)
batch_size = 32
no_models = 5

In [56]:
model_sel = create_hyperparams(no_models, 
                               0.001, 
                               [32, 64], 
                               [2, 3], 
                               [64, 128, 256],
                               [0.2, 0.3], 
                               ['relu'])
model_sel['MSE'] = 0

In [57]:
model_sel

Unnamed: 0,no_hidden_layers,hidden_layers,activation,dropout,lr,epochs,MSE
0,3,[256],relu,0.2,0.000714,64,0
1,2,[],relu,0.2,5.2e-05,64,0
2,2,[],relu,0.3,0.006553,32,0
3,3,[128],relu,0.3,0.01994,64,0
4,3,[128],relu,0.3,0.016225,32,0


In [50]:
n_splits = 5
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=4232)

In [51]:
for train_index, test_index in kfold.split(X_train):
    X_train_sel, X_test_sel = X_train.loc[train_index], X_train.loc[test_index]
    y_train_sel, y_test_sel = y_train.loc[train_index], y_train.loc[test_index]

    for i in tqdm(range(no_models)):
        model = create_network(model_sel.iloc[i], no_inputs=X_train_sel.shape[1],
                               no_outputs=1, loss='mse', 
                               metrics=[tf.keras.metrics.MeanSquaredError()])


        history = model.fit(x=X_train_sel, y=y_train_sel, 
                            epochs=model_sel['epochs'][i],
                            batch_size=batch_size, 
                            verbose=2)

        pred = model.predict(x=X_test_sel, verbose=0)
        
        model_sel.loc[i, 'MSE'] += mean_squared_error(y_test_sel, pred)

        tf.keras.backend.clear_session()
    model_sel.loc[i, 'MSE'] /= n_splits

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/648
1682/1682 - 4s - loss: 0.0872 - mean_squared_error: 0.0872 - 4s/epoch - 2ms/step
Epoch 2/648
1682/1682 - 4s - loss: 0.0324 - mean_squared_error: 0.0324 - 4s/epoch - 2ms/step
Epoch 3/648
1682/1682 - 4s - loss: 0.0294 - mean_squared_error: 0.0294 - 4s/epoch - 2ms/step
Epoch 4/648
1682/1682 - 4s - loss: 0.0247 - mean_squared_error: 0.0247 - 4s/epoch - 2ms/step
Epoch 5/648
1682/1682 - 4s - loss: 0.0239 - mean_squared_error: 0.0239 - 4s/epoch - 2ms/step
Epoch 6/648


KeyboardInterrupt: 

In [None]:
model_sel.sort_values(by='MSE', ascending=True)

In [17]:
find_best(model_sel)

Unnamed: 0,no_hidden_layers,hidden_layers,activation,dropout,lr,epochs,ACC
38,4,"[512, 256, 256, 64]",relu,0.25,0.000113,256,0.955556
97,3,"[512, 64, 64]",relu,0.25,0.000766,256,0.955556


In [18]:
best_index = find_best(model_sel).index[0]

### Narrow down to 50
The best 10 models before, only rarely featured only 1 hidden layer, and only used RELU as an activation function. Also, 32 Epochs were never used. I'd therefor remove these options.

In [19]:
no_models = 50
model_sel = create_hyperparams(no_models, 
                               0.001, 
                               [128, 256], 
                               [2, 3, 4], 
                               [64, 256, 512],
                               [0.2, 0.25, 0.3], 
                               ['relu'])
model_sel['ACC'] = -1

In [20]:
model_sel

Unnamed: 0,no_hidden_layers,hidden_layers,activation,dropout,lr,epochs,ACC
0,1,[256],relu,0.3,0.000331,256,-1
1,4,"[512, 256, 64, 64]",relu,0.3,0.000416,256,-1
2,2,"[64, 64]",relu,0.25,5.6e-05,256,-1
3,3,"[256, 256, 64]",relu,0.2,0.000277,128,-1
4,1,[512],relu,0.2,0.00168,256,-1
5,3,"[512, 64, 64]",relu,0.25,0.004181,256,-1
6,2,"[512, 64]",relu,0.25,0.021843,256,-1
7,3,"[512, 256, 256]",relu,0.25,0.000103,256,-1
8,2,"[512, 64]",relu,0.25,0.000719,256,-1
9,1,[64],relu,0.2,0.000696,256,-1


In [21]:
for i in tqdm(range(no_models)):
    model = create_network(model_sel.iloc[i], no_inputs=X_train.shape[1],
                           no_outputs=10, loss='sparse_categorical_crossentropy', 
                           metrics=['accuracy'])

    history = model.fit(x=X_train, y=y_train, 
                        epochs=model_sel['epochs'][i],
                        batch_size=batch_size, 
                        verbose=0)

    pred = model.predict(x=X_val, verbose=0)
    predC = np.argmax(pred, axis=1)

    model_sel.loc[i, 'ACC'] = accuracy_score(y_val, predC)

    tf.keras.backend.clear_session()

  0%|          | 0/50 [00:00<?, ?it/s]

In [22]:
model_sel.sort_values(by='ACC', ascending=False).head(10)

Unnamed: 0,no_hidden_layers,hidden_layers,activation,dropout,lr,epochs,ACC
10,4,"[512, 256, 64, 64]",relu,0.25,0.000796,256,0.957778
37,2,"[512, 64]",relu,0.3,0.000441,128,0.956667
8,2,"[512, 64]",relu,0.25,0.000719,256,0.956667
38,1,[512],relu,0.2,0.000781,256,0.956667
1,4,"[512, 256, 64, 64]",relu,0.3,0.000416,256,0.956667
40,3,"[512, 256, 256]",relu,0.3,0.00085,128,0.956667
23,3,"[256, 256, 64]",relu,0.2,0.000205,256,0.955556
4,1,[512],relu,0.2,0.00168,256,0.955556
41,1,[512],relu,0.2,0.001038,256,0.954444
17,4,"[512, 512, 512, 64]",relu,0.25,6.8e-05,128,0.953333


In [23]:
find_best(model_sel)

Unnamed: 0,no_hidden_layers,hidden_layers,activation,dropout,lr,epochs,ACC
10,4,"[512, 256, 64, 64]",relu,0.25,0.000796,256,0.957778


In [24]:
best_index = find_best(model_sel).index[0]

## Train Model on Entire Training Set Using Best Parameters 


In [25]:
model = create_network(model_sel.loc[best_index], no_inputs=X_train.shape[1],
                       no_outputs=10, loss='sparse_categorical_crossentropy', 
                       metrics=['accuracy'])

In [26]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               12800     
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 64)                16448     
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_3 (Dense)             (None, 64)                4

In [27]:
history = model.fit(x=dig_train['data'], y=dig_train['target'], 
                    epochs=model_sel.loc[best_index, 'epochs'],
                    batch_size=batch_size)

Epoch 1/256
Epoch 2/256
Epoch 3/256
Epoch 4/256
Epoch 5/256
Epoch 6/256
Epoch 7/256
Epoch 8/256
Epoch 9/256
Epoch 10/256
Epoch 11/256
Epoch 12/256
Epoch 13/256
Epoch 14/256
Epoch 15/256
Epoch 16/256
Epoch 17/256
Epoch 18/256
Epoch 19/256
Epoch 20/256
Epoch 21/256
Epoch 22/256
Epoch 23/256
Epoch 24/256
Epoch 25/256
Epoch 26/256
Epoch 27/256
Epoch 28/256
Epoch 29/256
Epoch 30/256
Epoch 31/256
Epoch 32/256
Epoch 33/256
Epoch 34/256
Epoch 35/256
Epoch 36/256
Epoch 37/256
Epoch 38/256
Epoch 39/256
Epoch 40/256
Epoch 41/256
Epoch 42/256
Epoch 43/256
Epoch 44/256
Epoch 45/256
Epoch 46/256
Epoch 47/256
Epoch 48/256
Epoch 49/256
Epoch 50/256
Epoch 51/256
Epoch 52/256
Epoch 53/256
Epoch 54/256
Epoch 55/256
Epoch 56/256
Epoch 57/256
Epoch 58/256
Epoch 59/256
Epoch 60/256
Epoch 61/256
Epoch 62/256
Epoch 63/256
Epoch 64/256
Epoch 65/256
Epoch 66/256
Epoch 67/256
Epoch 68/256
Epoch 69/256
Epoch 70/256
Epoch 71/256
Epoch 72/256
Epoch 73/256
Epoch 74/256
Epoch 75/256
Epoch 76/256
Epoch 77/256
Epoch 78

## Test Model on Test Data 

In [28]:
pred = model.predict(dig_test['data'])

evaluate_classification_result(dig_test['target'], pred);

[[208   3   0   0   0   0   1   0   0   0]
 [  0 200   1   0   1   0   0   1   0   0]
 [  0   2 183   0   0   0   0   0   0   1]
 [  1   2   1 190   1   4   0   1   0   2]
 [  0   4   0   0 202   0   1   0   0   1]
 [  1   2   0   1   0 179   7   0   0   0]
 [  2   2   0   0   2   2 195   0   1   0]
 [  0   2   1   1   0   0   0 195   1   2]
 [  2   7   0   3   1   1   2   1 173   3]
 [  2  10   2   3   1   3   0   0   0 183]]


Class 0:
    Sensitivity (TPR):  98.113% (208 of 212)
    Specificity (TNR):  99.554% (1784 of 1792)
    Precision:          96.296% (208 of 216)
    Neg. pred. value:   99.776% (1784 of 1788)
Class 1:
    Sensitivity (TPR):  98.522% (200 of 203)
    Specificity (TNR):  98.112% (1767 of 1801)
    Precision:          85.470% (200 of 234)
    Neg. pred. value:   99.831% (1767 of 1770)
Class 2:
    Sensitivity (TPR):  98.387% (183 of 186)
    Specificity (TNR):  99.725% (1813 of 1818)
    Precision:          97.340% (183 of 188)
    Neg. pred. value:   99.835% (18

In [29]:
model.save("Pritz_Sebastian.hdf5")