In [None]:
#import packages
import numpy as np
import pandas as pd
import time
import sklearn
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
data = pd.read_csv("mnist_train.csv")
data_test = pd.read_csv("mnist_test.csv")
import matplotlib.pyplot as plt
import seaborn as sn
import random

In [None]:
#from the training set, create the first two dataset: WILL BE USED IN HYPERPARAMETERS OPTIMIZATION 
#yt is the column of the starting training set indicating the digit (0,1,2,3,4,5,6,7,8,9)
#xt are the columns in which are expressed the valuesof each
yt = data['label']
xt = data.loc[:, data.columns != 'label']

In [None]:
#from the training set, create the second group of two dataset: wILL BE USED TO TRAINING THE ALGORITHM AND OUTPUT THE PREDICTOR 
#yT is the column of the starting training set indicating the digit (0,1,2,3,4,5,6,7,8,9)
#xT are the columns in which are expressed the values of each pixel
yT = data['label']
xT = data.loc[:, data.columns != 'label']

In [None]:
#from the test set, create last two dataset: wILL BE USED FOR TESTING
#yT is the column of the test set indicating the digit (0,1,2,3,4,5,6,7,8,9)
#xT are the columns in which are expressed the values of each pixel
y_tst = data_test['label']
x_tst = data_test.loc[:, data_test.columns != 'label']

In [None]:
#use the function train_test_split to create the TRAINING SET and the DEVELOPMENT SET. 
#They will be necessary for hyperparameters optimization
from sklearn.model_selection import train_test_split
x_train, x_dev, y_train, y_dev = train_test_split(xt, yt, test_size=0.3, stratify = yt, random_state = 42)

In [None]:
#binarize the label, in order to compute binary classificators for each digit
y_train = pd.get_dummies(y_train)
y_train = y_train.astype('int8')
y_dev = pd.get_dummies(y_dev)
y_dev = y_dev.astype('int8')

In [None]:
#change from zero to -1 the label
y_train[y_train != 1] = -1
y_dev[y_dev != 1] = -1

In [None]:
#to np array, easier object to work with
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
x_dev = x_dev.to_numpy()
y_dev = y_dev.to_numpy()

In [None]:
#each entries must be normalized
mean_px = x_train.mean().astype(np.float32)
std_px = x_train.std().astype(np.float32)
x_train = (x_train - mean_px)/(std_px)

#use a smaller dataset to decrease the time of computation
x_train = x_train[:6000,:]
x_dev = x_dev[:2000,:]
y_train = y_train[:6000,:]
y_dev = y_dev[:2000,:]


In [None]:
#compute the gram matrix, to insert in the function for computing the classifier
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import sign
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import gram_poly
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import kernel_poly
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import find_predictors
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import define_bests
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import mean_classifier
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import accuracy_test
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import accuracy_train
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import kernel_perceptron
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import confusion_matrix
from ipynb.fs.full.Basic_functions_Kernel_Perceptron import metrics

In [None]:
#this is a proof to estimate the time of computation of the learning algorithm
#the process will take = max seconds * 60. (6 grades and 10  epochs)
import time
start_time = time.time()
result = kernel_perceptron(x_train, x_dev, y_train, y_dev, 6, 10)
print("--- %s max seconds ---" % (time.time() - start_time))

In [None]:
###################################### HYPERPARAMETERS OPTIMIZATION ##########################################################
#shuffle the training dataset
union = np.concatenate((x_train, y_train), axis = 1)
np.random.seed(223366)
np.random.shuffle(union)
result = []
x_shuffled_train = union[:,:784]
y_shuffled_train = union[:,784:]

In [None]:
#this cell is used to compute test and training error, for each combination of epochs and grade of kernel, of both the predictors
#computed using the two methods indicated (best binary predictor vs mean binary predictor)
train_test_result_best = []
train_test_result_mean = []
for power in range(1,7):
    power_result = []
    for epoch in range(1,11):
        train_test = kernel_perceptron(x_shuffled_train, x_dev, y_shuffled_train, y_dev, power, epoch)
        power_result.append(train_test)
    power_result = np.array(power_result)
    power_result_best = power_result[:,0:2]
    power_result_mean = power_result[:,2:4]
    train_test_result_best.append(power_result_best)
    train_test_result_mean.append(power_result_best)

In [None]:
#plot of the test and training error of the predictor output by the learning algorithm, considering the best binary classifiers
epoch = [1,2,3,4,5,6,7,8,9,10]
#epoch 1, best
plt.plot(epoch, train_test_result_best[0][:,0], color = "g", label = "grade 1")
plt.plot(epoch, train_test_result_best[0][:,1], color = "g")

#epoch 2, best
plt.plot(epoch, train_test_result_best[1][:,0], color = "b", label = "grade 2")
plt.plot(epoch, train_test_result_best[1][:,1], color = "b")

#epoch 3, best
plt.plot(epoch, train_test_result_best[2][:,0], color = "k", label = "grade 3")
plt.plot(epoch, train_test_result_best[2][:,1], color = "k")

#epoch 4, best
plt.plot(epoch, train_test_result_best[3][:,0], color = "r", label = "grade 4")
plt.plot(epoch, train_test_result_best[3][:,1], color = "r")

#epoch 5, best
plt.plot(epoch, train_test_result_best[4][:,0], color = "y", label = "grade 5")
plt.plot(epoch, train_test_result_best[4][:,1], color = "y")

#epoca 6, best
plt.plot(epoch, train_test_result_best[5][:,0], color = "c", label = "grade 6")
plt.plot(epoch, train_test_result_best[5][:,1], color = "c")

# naming the x axis
plt.xlabel('Epochs')
# naming the y axis
plt.ylabel('Test and Training accuracy')
# giving a title to my graph
plt.title('Predictor_best_binary_classifiers')
 
# show a legend on the plot
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")

#save plot
plt.savefig('best_bin_class.png', bbox_inches = 'tight')
    
# function to show the plot
plt.show()

In [None]:
#plot of the test and training error of the predictor output by the learning algorithm, considering the mean binary classifiers
epoch = [1,2,3,4,5,6,7,8,9,10]
#epoca 1, best
plt.plot(epoch, train_test_result_mean[0][:,0], color = "g", label = "grade 1")
plt.plot(epoch, train_test_result_mean[0][:,1], color = "g")

#epoca 2, best
plt.plot(epoch, train_test_result_mean[1][:,0], color = "b", label = "grade 2")
plt.plot(epoch, train_test_result_mean[1][:,1], color = "b")

#epoca 2, best
plt.plot(epoch, train_test_result_mean[2][:,0], color = "k", label = "grade 3")
plt.plot(epoch, train_test_result_mean[2][:,1], color = "k")

#epoca 2, best
plt.plot(epoch, train_test_result_mean[3][:,0], color = "r", label = "grade 4")
plt.plot(epoch, train_test_result_mean[3][:,1], color = "r")

#epoca 2, best
plt.plot(epoch, train_test_result_mean[4][:,0], color = "y", label = "grade 5")
plt.plot(epoch, train_test_result_mean[4][:,1], color = "y")

#epoca 2, best
plt.plot(epoch, train_test_result_mean[5][:,0], color = "c", label = "grade 6")
plt.plot(epoch, train_test_result_mean[5][:,1], color = "c")

# naming the x axis
plt.xlabel('Epochs')
# naming the y axis
plt.ylabel('Test and Training  accuracy')
# giving a title to my graph
plt.title('Predictor_mean_binary_classifiers')
 
# show a legend on the plot
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")

#save_plot
plt.savefig('Mean_bin_class.png', bbox_inches = 'tight')
 
# function to show the plot
plt.show()

In [None]:
#extract from the two table containing the test and training error for both the predictor
#the combination of epochs and grade of polynomial kernel the MAXIMIZE THE TEST ACCURACY
search_best = np.array(train_test_result_best)
search_mean = np.array(train_test_result_mean)
ind_best = np.unravel_index(np.argmax(search_best[:,:,0], axis=None), search_best[:,:,0].shape)
ind_mean = np.unravel_index(np.argmax(search_mean[:,:,0], axis=None), search_mean[:,:,0].shape)
print(ind_best, ind_mean)

In [None]:
######################################################  TESTING PREDICTORS ##############################################################
#########################################################################################################################################
#get dummy for training label set
yT = pd.get_dummies(yT)
#change the type of the dataset
yT = yT.astype('int8')
#change from 0 to -1 the labels
yT[yT != 1] = -1

In [None]:
#get dummy for test label set
y_tst = pd.get_dummies(y_tst)
#change the type of the dataset
y_tst = y_tst.astype('int8')
#change from 0 to -1 the labels
y_tst[y_tst != 1] = -1

In [None]:
#to np array, because of the function used to compute the predictor
xT = xT.to_numpy()
yT = yT.to_numpy()
x_tst = x_tst.to_numpy()
y_tst = y_tst.to_numpy()

In [None]:
#normalize the training set (excluding the labels)
mean_px = xT.mean().astype(np.float32)
std_px = xT.std().astype(np.float32)
xT = (xT - mean_px)/(std_px)
xT = np.float32(xT)

#change no.type for test set
x_tst = np.float32(x_tst)

In [None]:
#use a smaller dataset to decrease the time of computation
xTr = xT[:8000, :]
x_tst = x_tst[:2500, :]
yTr = yT[:8000, :]
y_tst = y_tst[:2500, :]

In [None]:
#retrain kernel percpetron and evaluate the performance of the ouptut predictor builted with the best binary classifiers
#and considering the hyperparameters found in the first part
accuracy1 = kernel_perceptron(xTr, x_tst, yTr, y_tst, (ind_best[0]+1), (ind_best[1]+1))
print(accuracy1[0],accuracy1[1], accuracy1[4])

In [None]:
#retrain kernel percpetron and evaluate the performance of the ouptut predictor builted with the mean binary classifiers
#and considering the hyperparameters found in the first part
accuracy2 = kernel_perceptron(xTr, x_tst, yTr, y_tst, (ind_mean[0]+1), (ind_mean[1]+1))
print(accuracy2[2], accuracy2[3], accuracy2[5])

In [None]:
#################################################### METRICS ###################################################################
#extract from the output of the kernel_percpetron the CONFUSION MATRIX
confusion_matrix_best = accuracy1[4]
confusion_matrix_mean = accuracy2[5]

In [None]:
#PLOT the CONFUSION MATRIX computed from the prediction of the "best_classifier"
cmap = sn.cm.rocket_r
conf_1 = pd.DataFrame(confusion_matrix_best, range(10), range(10))
sn.set(font_scale=1.3) # for label size
sn.heatmap(conf_1, annot=True, annot_kws={"size": 13}, cmap = cmap, fmt='g' ) # font size

#save plot
plt.savefig('best_confusion_matrix.png', bbox_inches = 'tight')

#show plot
plt.show()

In [None]:
#PLOT the CONFUSION MATRIX computed from the prediction of the "mean_classifier"
cmap = sn.cm.rocket_r
conf_2 = pd.DataFrame(confusion_matrix_mean, range(10), range(10))
sn.set(font_scale=1.4) # for label size
sn.heatmap(conf_2, annot=True, annot_kws={"size": 13}, cmap = cmap, fmt='g') # font size

#save plot
plt.savefig('mena_confusion_matrix.png', bbox_inches = 'tight')

#plot show
plt.show()

In [None]:
#compute metrics "RECALL" and "SPECIFICITY" for best_classifier
metric_best = metrics(confusion_matrix_best)
metric_mean = metrics(confusion_matrix_mean)

In [None]:
print(metric_best)

In [None]:
#in the four following cells are saved the barplot containing the information recall and precision for the two predictors

In [None]:
x = [0,1,2,3,4,5,6,7,8,9]
y_precision_best = metric_best[0]
clrs = clrs = ['green' if (precision > 0.95) else 'red'  if (precision < 0.85)  else 'grey' for precision in y_precision_best ]
fig = sn.barplot(x, y_precision_best, palette=clrs) # color=clrs)
fig.set(xlabel="Digit", ylabel = "Precision") 
#save plot
fig = fig.get_figure()
fig.savefig('precision_best_classifier.png', bbox_inches = 'tight')

In [None]:
x = [0,1,2,3,4,5,6,7,8,9]
y_recall_best = metric_best[1]
clrs = clrs = ['green' if (precision > 0.95) else 'red'  if (precision < 0.825)  else 'grey' for precision in y_recall_best ]
fig1 = sn.barplot(x, y_recall_best, palette=clrs) # color=clrs)

fig1.set(xlabel="Digit", ylabel = "Recall") 
#save plot
fig1 = fig1.get_figure()
fig1.savefig('recall_best_classifier.png', bbox_inches = 'tight')

In [None]:
x = [0,1,2,3,4,5,6,7,8,9]
y_precision_mean = metric_mean[0]
clrs = clrs = ['green' if (precision > 0.95) else 'red'  if (precision < 0.825)  else 'grey' for precision in y_precision_mean ]
fig2 = sn.barplot(x, y_precision_mean, palette=clrs) # color=clrs)
fig2.set(xlabel="Digit", ylabel = "Precision") 
#save plot
fig2 = fig2.get_figure()
fig2.savefig('precision_mean_classifier.png', bbox_inches = 'tight')

In [None]:
x = [0,1,2,3,4,5,6,7,8,9]
y_recall_mean = metric_mean[1]
clrs = clrs = ['green' if (precision > 0.95) else 'red'  if (precision < 0.825)  else 'grey' for precision in y_recall_mean ]
fig3 = sn.barplot(x, y_recall_mean, palette=clrs) # color=clrs)
fig3.set(xlabel="Digit", ylabel = "Recall") 
#save plot
fig3 = fig3.get_figure()
fig3.savefig('recall_mean_classifier.png', bbox_inches = 'tight')

In [None]:
#TRAINING AND TEST ACCURACY VARYING THE NUMBER OF EXAMPLES IN THE TRAINING SET
train_size = [2500, 5000, 7500, 10000, 15000]
result = []
for number in train_size:
        train_tst = kernel_perceptron(xT[:number,:], x_tst, yT[:number,:], y_tst, ind_best[0]+1, ind_best[1]+1)
        result.append(train_tst)
result = np.array(result)
result_best = result[:,0:2]
result_mean = result[:,2:4]

In [None]:
plt.plot(train_size, result_best[:,0], color = "g", label = "best")
plt.plot(train_size, result_best[:,1], color = "g")

#mean
plt.plot(train_size, result_mean[:,0], color = "b", label = "mean")
plt.plot(train_size, result_mean[:,1], color = "b")

# naming the x axis
plt.xlabel('Training_size')
# naming the y axis
plt.ylabel('Test and Training  accuracy')
 
# show a legend on the plot
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")

#save_plot
plt.savefig('train_size.png', bbox_inches = 'tight')
 
# function to show the plot
plt.show()

In [None]:
print(result_best)

In [None]:
print(result_mean)

In [None]:
#time for running the entire notebook is about 25 minute