# Optimising Character Classification Network

## Import

In [None]:
from lab3.nn_general import NeuralNetwork, batchTrain, test
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 13})
import pandas as pd

## Get Data

Get full training dataset

In [None]:
file = open("datasources\mnist_train.csv", 'r')
data_training = file.readlines()
file.close()

data_training = data_training[:-5000]

Get validation data of 5000 lines from other end of full dataset

In [None]:
data_validation = data_training[-5000:]

Check split has been done correctly (not including last/ first record twice)

In [None]:
assert not data_training[-1].split(',')[0] == data_validation[0].split(',')[0]

Get full test dataset

In [None]:
file = open("datasources\mnist_test.csv", 'r')
data_testing = file.readlines()
file.close()

## Get models and set parameters

Load 3 best performing models from excel

In [None]:
selected = pd.read_excel('.\datasources\selected.xlsx', index_col=0)
selected = selected.sort_values(by='performanceUpdated',ascending=False, ignore_index=True).head(3)
selected

In [None]:
epochs=55
batchSize=5000

## Training

In [None]:
results = {'900': {'training':[], 'validation':[], 'nn':None}, 
           '700': {'training':[], 'validation':[], 'nn':None}, 
           '200': {'training':[], 'validation':[], 'nn':None}}
results

In [None]:
for hid in results.keys():
    nn = NeuralNetwork(input_nodes=784, 
                       hidden_nodes=int(hid), 
                       output_nodes=10, 
                       lr=0.3,
                       error_function='difference-squared')

    nn, trainingCurve, validationCurve, = batchTrain(data_training=data_training,
                                                     data_validation=data_validation,
                                                     nn=nn,
                                                     batchSize=batchSize,
                                                     epochs=epochs)
    results[hid]['nn'] = nn
    results[hid]['training'] = trainingCurve
    results[hid]['validation'] = validationCurve

In [None]:
finalModel = results['200']['nn']

In [None]:
finalScore = test(data_testing, finalModel)

In [None]:
finalScore

In [None]:
results

In [None]:
colours = [('navy', 
           'cornflowerblue'), 
           ('darkred', 
           'lightcoral'), 
           ('darkgreen', 
           'lightgreen')]

In [None]:
for hid, colour in zip(results.keys(), colours):
    label='{} '.format(int(hid)) + list(results[hid].keys())[0]
    plt.plot(range(len(results[hid]['training'])), 
             results[hid]['training'], 
             label=label, 
             c=colour[0])
    
    label='{} '.format(int(hid)) + list(results[hid].keys())[1]
    plt.plot(range(len(results[hid]['validation'])), 
             results[hid]['validation'], 
             label=label, 
             c=colour[1], 
             linestyle='--')
    
plt.ylabel('performance')
plt.xlabel('epochs')
plt.legend()
plt.show()

In [None]:
list(results['700'].keys())

In [None]:
len(results['200']['training'])