#  **Exercise 1: Optical Character Recognition via Neural Networks**
Develop a Neural Network model that can classify human-written digits into
either of the first 10 using hyperparameter optimization and k-cross fold validation through Sklearn library.

In [None]:
from sklearn.datasets import load_digits
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")
from numpy.random import seed
seed(seed=3116)

### *Load the MNIST digits dataset*

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784')
X_data = mnist.data
y_data = mnist.target

In [None]:
print("x", np.shape(X_data))
print("y" ,np.shape(y_data))

x (70000, 784)
y (70000,)


In [None]:
X_data=X_data[:5000]            #Working with partial data because it is too heavy for my laptop
y_data=y_data[:5000]

In [None]:
X, X_test, Y, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=1)

In [None]:
print("Shape X:", np.shape(X))    #80%
print("Test data dimensions:", np.shape(X_test))   #20% test

Shape X: (4000, 784)
Test data dimensions: (1000, 784)


### *K-cross fold validation*

In [None]:
kf = KFold(n_splits=4)    #4 splits to have 60% train 20% val and 20% test data
kf.get_n_splits(X)

for train_index, test_index in kf.split(X):
    X_train, X_val = X_data[train_index], X_data[test_index]
    y_train, y_val = y_data[train_index], y_data[test_index]

In [None]:
print("Train data dimensions:", np.shape(X_train))  #60% train
print("Validation dimensions:", np.shape(X_val))    #20% val

Train data dimensions: (3000, 784)
Test data dimensions: (1000, 784)


### *MLPClassifier with Random Search procedure and then train the model* 

In [None]:
hyperparameter_spec = {'activation': ['identity', 'logistic','tanh', 'relu'],'solver': [ 'lbfgs','sgd','adam'],'alpha': [0.01,0.001,0.0001],'batch_size':['auto',200],'learning_rate': ['constant','adaptive','invscaling'],'max_iter':[100,150,200],}

In [None]:
mlp = MLPClassifier()
clf = RandomizedSearchCV(mlp, hyperparameter_spec, cv=4)
clf.fit(X_train, y_train)

RandomizedSearchCV(cv=4, error_score='raise-deprecating',
          estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False),
          fit_params=None, iid='warn', n_iter=10, n_jobs=None,
          param_distributions={'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['lbfgs', 'sgd', 'adam'], 'alpha': [0.01, 0.001, 0.0001], 'batch_size': ['auto', 200], 'learning_rate': ['constant', 'adaptive', 'invscaling'], 'max_iter': [100, 150, 200]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=0)

### *Best hyperparameters*

In [None]:
clf_cv_results = pd.DataFrame(clf.cv_results_)
print(clf_cv_results)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time param_solver  \
0       3.746235      0.101013         0.004737        0.000826          sgd   
1       0.426361      0.016697         0.003990        0.000001          sgd   
2       3.441300      0.019499         0.004247        0.000833          sgd   
3       1.504478      0.435252         0.003740        0.000432        lbfgs   
4       5.640171      0.214972         0.004495        0.000505          sgd   
5       1.560322      0.203042         0.004488        0.000498         adam   
6       2.578357      0.044921         0.005236        0.000826          sgd   
7       7.498201      0.188289         0.004489        0.001115          sgd   
8       4.346628      0.130448         0.005236        0.000827         adam   
9       4.195533      0.700935         0.004986        0.000704         adam   

  param_max_iter param_learning_rate param_batch_size param_alpha  \
0            100          invscaling             a

In [None]:
a = clf.best_params_
print(a)

{'solver': 'adam', 'max_iter': 100, 'learning_rate': 'constant', 'batch_size': 'auto', 'alpha': 0.01, 'activation': 'logistic'}


In [None]:
print("One with best R2 score:", round(np.max(clf_cv_results.mean_test_score), 4), clf.best_params_)

One with best R2 score: 0.9003 {'solver': 'adam', 'max_iter': 100, 'learning_rate': 'constant', 'batch_size': 'auto', 'alpha': 0.01, 'activation': 'logistic'}


In [None]:
clf_best = MLPClassifier(solver="adam", max_iter=100, learning_rate="constant", batch_size= "auto", alpha=0.01, activation="logistic")
clf_best_model_cv = cross_val_score(clf_best, X_train, y_train, cv=4)

In [None]:
print("CV results on best parameters:", clf_best_model_cv)
print("CV results mean on best parameters:", round(np.mean(clf_best_model_cv),4))

CV results on best parameters: [0.88609272 0.87749667 0.92389853 0.90067114]
CV results mean on best parameters: 0.897


### *Test accuracy*

In [None]:
y_pred = clf.predict(X_test)
print('The accuracy of the best model is :',accuracy_score(y_test, y_pred))

The accuracy of the best model is : 0.961
