# Using MLP Classifier
Resources:<br>
https://analyticsindiamag.com/a-beginners-guide-to-scikit-learns-mlpclassifier/<br>
https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html<br>
<br>

### Import Libraries

In [None]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.neural_network import MLPClassifier

### Read Data sets and create validation set (15% of training set)

In [None]:
data = pd.read_csv("./../dataset/csv/mnist_train.csv", header=None)

size_data = len(data)
data = data.sample(size_data)
limit = math.floor(3*size_data/20)

validation_data = data[:limit]
train_data = data[limit:]

test_data = pd.read_csv("./../dataset/csv/mnist_test.csv", header=None)

### Create sparse matrix of shape and classification for train, validation and test data

In [None]:
# The division by 255 transforms integers in floats to have a better percision
train_sparse_matrix = train_data.iloc[:,1:].values/255
train_sparse_matrix

In [None]:
train_classes = train_data.iloc[:,0:1].values.ravel()
train_classes

In [None]:
validation_sparse_matrix = validation_data.iloc[:,1:].values/255
validation_classes = validation_data.iloc[:,0:1].values.ravel()

test_sparse_matrix = test_data.iloc[:,1:].values/255
test_classes = test_data.iloc[:,0:1].values.ravel()

## Use MLP Classifier
### Define functions for accuracy and loss

In [None]:
def accuracy(confusion_matrix):
    diagonal_sum = confusion_matrix.trace()
    sum_of_all_elements = confusion_matrix.sum()
    
    return diagonal_sum / sum_of_all_elements

In [None]:
# # For the loss of the classifier, we use the cross-entropy
# def loss_function(data_pred, data_classes):
#     loss = 0
# #     for i in range(len(data_classes)):
# #         if (data_pred[i] != data_classes[i]):
# #             loss += 1
# #     loss = loss/len(data_classes)

# #     print(data_classes)
# #     print(np.log(data_pred))

# #     print(np.dot(data_classes, np.log(data_pred)))
    
#     temp = 0
#     for i in range(len(data_classes)):
#         print("data_classes[i]: {}".format(data_classes[i]))
#         print("data_pred[i]: {}".format(data_pred[i]))
#         print("log data_pred[i]: {}".format(np.log(data_pred)[i]))
#         print("product: {}".format(data_classes[i]*np.log(data_pred)[i]))
        
#         temp += data_classes[i]*np.log(data_pred)[i]
#         print(temp)
#         print()
    
#     loss = -np.dot(data_classes, np.log(data_pred))
    
#     return loss

### Function to initialize and optimize classifier

In [None]:
def create_classifier(num_hl, learning_rate, max_iter):
    return MLPClassifier(hidden_layer_sizes=(num_hl), max_iter=max_iter, activation = 'relu', solver='adam', learning_rate='constant', learning_rate_init=learning_rate, random_state=1)


In [None]:
def classification(max_iter, nhl, lr): 
    
    classification_results = pd.DataFrame(columns=['nhl', 'lr', 'acc_valid', 'loss_valid', 'acc_train', 'loss_train'])
    
    classifier = create_classifier(nhl, lr, max_iter) 
    
#   Training network
    classifier.fit(train_sparse_matrix, train_classes)

#             Validation set
    validation_pred = classifier.predict(validation_sparse_matrix)
    
    cm_valid = confusion_matrix(validation_pred, validation_classes)            
    acc_valid = accuracy(cm_valid)
    print("Accuracy valid: {}".format(acc_valid))
    
    print("Loss: {}".format(classifier.loss_))
    loss_valid = classifier.loss_
#     loss_valid = loss_function(validation_pred, validation_classes)



#             Training set
    train_pred = classifier.predict(train_sparse_matrix)
    
    cm_train = confusion_matrix(train_pred, train_classes)
    acc_train = accuracy(cm_train)
    print("\nAccuracy train: {}".format(acc_train))
    
    print("Loss: {}\n-----\n".format(classifier.loss_))
#     loss_train = loss_function(train_pred, train_classes)
    loss_train = classifier.loss_

    classification_results.loc[len(classification_results)] = [nhl, lr, acc_valid, loss_valid, acc_train, loss_train]

    return classification_results

In [None]:
def classification_test(max_iter, nhl, lr): 
    
    classification_results = pd.DataFrame(columns=['nhl', 'lr', 'acc', 'loss'])
    
    classifier = create_classifier(nhl, lr, max_iter) 
#   Learning
    classifier.fit(train_sparse_matrix, train_classes)

#             Test set
    test_pred = classifier.predict(test_sparse_matrix)
    cm = confusion_matrix(test_pred, test_classes)            
    acc = accuracy(cm)
    loss = loss_function(test_pred, test_classes)

    classification_results.loc[len(classification_results)] = [nhl, lr, acc, loss]

    return classification_results

In [None]:
# Different number Hidden Layers and Learning rates to find the best classifier
nhl_range = [10, 50, 100]
learning_rates = [0.1, 0.01, 0.001]

# We loop over the different parameters
for nhl in nhl_range:
        for lr in learning_rates:
#             Table containing the results
            classification_epoch = pd.DataFrame(columns=['iter', 'acc_valid', 'loss_valid', 'acc_train', 'loss_train'])
                        
            for m in range (1,6):
                acc_loss_m = classification(m, nhl, lr)
                classification_epoch.loc[len(classification_epoch)] = [m, acc_loss_m['acc_valid'][0], acc_loss_m['loss_valid'][0], acc_loss_m['acc_train'][0], acc_loss_m['loss_train'][0]]
    
            print("\nNumber neurons in hidden layer: {}".format(nhl))
            print("learning rate: {}\n".format(lr))
            print(classification_epoch)

            plt.subplot(2,1,1)
            plt.title("Accuracy and Loss")
            plt.xlabel("Epochs")
            plt.ylabel("Accuracy (in %)")
            plt.plot(classification_epoch['iter'], classification_epoch['acc_valid'], "-b", label='Validation')
            plt.plot(classification_epoch['iter'], classification_epoch['acc_train'], "-r", label='Training')
            plt.legend()

            plt.subplot(2,1,2)
            plt.xlabel("Epochs")
            plt.ylabel("Loss")
            plt.plot(classification_epoch['iter'], classification_epoch['loss_valid'], "-b", label='Validation')
            plt.plot(classification_epoch['iter'], classification_epoch['loss_train'], "-r", label='Training')
            plt.legend()

            plt.show()
            print("\n------------------------------------------------------\n")

            
# classification_test(optimal_iter, optimal_nhl, optimal_lr)

After this validation phase, when we look at the previous tests, we clearly see that the best learning rate is 0.001 (the bigger ones give completely divergent accuracy).<br>
For the number of neurons in the hidden layer, the best choice is 100.<br>
<br>
Now we want to to the MLP classification with the test set.

In [None]:
optimal_nhl = 100
optimal_learning_rate = 0.001
max_iter = 15

classification_epoch = pd.DataFrame(columns=['iter', 'acc', 'loss'])
acc_loss_m = classification_test(max_iter, optimal_nhl, optimal_learning_rate)
classification_epoch.loc[len(classification_epoch)] = [max_iter, acc_loss_m['acc'][0], acc_loss_m['loss'][0]]

final_accuracy = classification_epoch['acc'][0]*100

print("Accuracy of MLP classifier with optimized parameter values: %.3f" % final_accuracy, '%')