In [29]:
import numpy as np
import pandas as pd
import random

%matplotlib inline
from matplotlib import pyplot as plt

from sklearn import datasets
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score 
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold 
from sklearn.metrics import accuracy_score

## Exercise 1 : Optical Character Recognition via Neural Networks

In [4]:
# Load dataset
data = datasets.load_digits()

In [11]:
print(data.keys())
print(data.target_names)

dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'images', 'DESCR'])
[0 1 2 3 4 5 6 7 8 9]


In [30]:
digits = data.images.reshape((len(data.images), -1))
digits = pd.DataFrame(digits)

**Initialize parameter space**

In [17]:
parameter_space = {
    'hidden_layer_sizes': [(10,30,10),(20,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam', 'lbfgs'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive']}

1. Initialize MLPClassfier
2. Do randomizedSearchCV for every fold.
3. Finally, report average of test accuracies.
4. Also print best parameters

#### Best hyperparameters and test accuracy of each fold (printed below) :

In [32]:
# 5-fold validation
k = 5
kf = KFold(n_splits=k, random_state=None)
acc_score = []

# Initialize MLPClassifier
mlp_clf = MLPClassifier(random_state=1, max_iter=1000)

# For every fold, random search and report test accuracy
for train_index , test_index in kf.split(digits):
    X_train , X_test = digits.iloc[train_index,:],digits.iloc[test_index,:]
    y_train , y_test = data.target[train_index] , data.target[test_index]
     
    clf = RandomizedSearchCV(mlp_clf, parameter_space, random_state=0)

    model = clf.fit(X_train, y_train)
    
    print(model.best_params_)
    
    
    pred_values = model.predict(X_test)
     
    acc = accuracy_score(pred_values , y_test)
    acc_score.append(acc)
     
avg_acc_score = sum(acc_score)/k
 
print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))

{'solver': 'lbfgs', 'learning_rate': 'constant', 'hidden_layer_sizes': (20,), 'alpha': 0.0001, 'activation': 'relu'}
{'solver': 'adam', 'learning_rate': 'adaptive', 'hidden_layer_sizes': (20,), 'alpha': 0.0001, 'activation': 'relu'}
{'solver': 'adam', 'learning_rate': 'adaptive', 'hidden_layer_sizes': (20,), 'alpha': 0.0001, 'activation': 'relu'}
{'solver': 'adam', 'learning_rate': 'adaptive', 'hidden_layer_sizes': (20,), 'alpha': 0.0001, 'activation': 'relu'}
{'solver': 'adam', 'learning_rate': 'adaptive', 'hidden_layer_sizes': (20,), 'alpha': 0.0001, 'activation': 'relu'}
accuracy of each fold - [0.9333333333333333, 0.9027777777777778, 0.9526462395543176, 0.9415041782729805, 0.9303621169916435]
Avg accuracy : 0.9321247291860105
