# COMP47590: Advanced Machine Learning
# Assignment 1: Implementing Perceptrons

- Student Name: Shuhao Guan
- Student Number: 20211120

## Import Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import cross_validate
import math
from sklearn import neural_network
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted, check_random_state
from sklearn.utils.multiclass import unique_labels
from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import tensorflow as tf
import tensorflow.keras
from sklearn.model_selection import cross_validate

## Task 1: The Perceptron Classifier

Define the PerceptronClassifier class

In [2]:
class PerceptronClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, layers, activation='sigmoid',learning_rate=0.1, epochs=40000,l2=None):
        self.activation=activation
        self.activation_deriv=activation+'_deriv'
        self.layers=layers
        self.learning_rate=learning_rate
        self.epochs=epochs
        self.l2=l2
        if l2==None:
            self.l2=0
        self.weights = []  
        self.b=[]
        for i in range(1, len(layers) - 1):  #Set the initial weight
            self.b.append(np.random.randn(1 , layers[i] ))
            self.weights.append(np.random.randn(layers[i - 1] , layers[i] )/np.sqrt(layers[i-1]) )  
            #The gaussian distribution generates a random matrix 
        self.weights.append(np.random.randn(layers[i] , layers[i + 1]))
        self.b.append(np.random.randn(1 , layers[i + 1]))
        
    def sigmoid(self,x):
        return .5 * (1 + np.tanh(.5 * x))
#         return 1/(1 + np.exp(-x))
    def sigmoid_deriv(self,x):  
        return x*(1-x)
    def tanh(self,x):  
        return  np.tanh(x)   
    def tanh_deriv(self,x):  
        return (1+(x))*(1-(x))
        
    # The fit function to train a classifier
    def fit(self, X, y):         
        X = np.atleast_2d(X)                 
        y = np.array(y)
        for k in range(self.epochs):  #epchos represents the number of iterations
            i = np.random.randint(X.shape[0])   
            #Randomly select any one sample point in the sample
            a = [X[i]]
    
            for l in range(len(self.weights)):  
            #Feed-forward neural network, calculating the value of each neuron
                a.append(eval('PerceptronClassifier.'+self.activation)(self,np.dot(a[l], self.weights[l])+self.b[l]))  
                #Compute the l+1 layer neuron parameters from the l layer, a(l+1)=f(x(l)W(l)),where f is the activation function
            error = y[i] - a[-1]  #Calculate the error in the last layer
#             print('error:',error)
            deltas = [error * eval('PerceptronClassifier.'+self.activation_deriv)(self,a[-1])]
            #Start BP algorithm
            for l in range(len(a) - 2, 0, -1): 
                # Calculate delta from the penultimate level 
                deltas.append(deltas[-1].dot(self.weights[l].T)*eval('PerceptronClassifier.'+self.activation)(self,a[l]))  
                #delta(l)=(delta(l+1)*W(l)^T).*h(a(l))
            deltas.reverse()  #Arrange delta in order of feedforward neural network
            for i in range(len(self.weights)):  
                layer =np.atleast_2d(a[i])
                delta = np.atleast_2d(deltas[i]) 
                self.weights[i] = self.weights[i]+self.learning_rate * (layer.T.dot(delta)+self.l2*self.weights[i])
                self.b[i]=self.b[i]+self.learning_rate * (delta+self.l2*self.b[i])#The partial derivative of the layer update is the transpose rank of a[i] * delta[i]  
        
    # The predict_proba function to make a set of predictions for a set of query instances. This returns a set of class distributions.
    def predict_proba(self, x):
        x = np.array(x)         
        a=x
        for l in range(0, len(self.weights)):             
            a = eval('PerceptronClassifier.'+self.activation)(self,np.dot(a, self.weights[l])+self.b[l])         
        return a

    # The predict function to make a set of predictions for a set of query instances
    def predict(self, x):
        yy=self.predict_proba(x)
        for i in range(len(yy)):
            if yy[i][0]>yy[i][1]:
                yy[i][0]=1
                yy[i][1]=0
            else:
                yy[i][0]=0
                yy[i][1]=1
        return yy


## Task 2: Evaluation

Load the Diabethic Retinopathy dataset

In [3]:
data=pd.read_csv('messidor_features.csv')

Pre-processing the data.

In [4]:
Y = data.pop('Class')
X = data
Y_=list(set(Y))
for i in range(len(Y)):
    if Y[i]==Y_[0]:
        Y[i]=0
    else:
        Y[i]=1
X=np.array(X)
Y=np.array(Y)
z_scaler = preprocessing.StandardScaler()
X = z_scaler.fit_transform(X)
Y = tensorflow.keras.utils.to_categorical(Y, len(set(Y)))
Y=Y.astype(int)

Split the data into a **training set** and **test set**

In [5]:
X_train, X_test, y_train, y_test \
    = train_test_split(X, Y, \
                       shuffle=True, \
                       stratify = Y, \
                       train_size = 0.7)

### My PerceptronClassifier

In [6]:
MY_MLP1=PerceptronClassifier([19,300,100,2], 'sigmoid',0.01,80000,0)
MY_MLP1.fit(X_train,y_train)

Assess the performance on the training set

In [7]:
# Make a set of predictions for the training data
y_pred = MY_MLP1.predict(X_train)
accuracy = metrics.accuracy_score(y_train, y_pred)

In [8]:
print("Accuracy: " +  str(accuracy))
print(metrics.classification_report(y_train, y_pred))

Accuracy: 0.7490683229813665
              precision    recall  f1-score   support

           0       0.72      0.75      0.74       378
           1       0.77      0.74      0.76       427

   micro avg       0.75      0.75      0.75       805
   macro avg       0.75      0.75      0.75       805
weighted avg       0.75      0.75      0.75       805
 samples avg       0.75      0.75      0.75       805



Assess the performance on the test set

In [9]:
# Make a set of predictions for the training data
y_pred = MY_MLP1.predict(X_test)
accuracy = metrics.accuracy_score(y_test, y_pred)

In [10]:
print("Accuracy: " +  str(accuracy))
print(metrics.classification_report(y_test, y_pred))

Accuracy: 0.7167630057803468
              precision    recall  f1-score   support

           0       0.68      0.73      0.71       162
           1       0.75      0.70      0.72       184

   micro avg       0.72      0.72      0.72       346
   macro avg       0.72      0.72      0.72       346
weighted avg       0.72      0.72      0.72       346
 samples avg       0.72      0.72      0.72       346



Evaluate the perfomrance of the perceptron classifier on the daibetic retinopathy dataset with cross validate.

In [11]:
# MY_MLP
MY_MLP=PerceptronClassifier([19,300,100,2], 'sigmoid',0.01,80000,0)
scores1 = cross_validate(MY_MLP, X, Y,cv=5,return_train_score=True)
print(scores1)

{'fit_time': array([22.15905643, 22.2041688 , 22.3154428 , 22.29046607, 22.6674161 ]), 'score_time': array([0.00598431, 0.00299263, 0.00398993, 0.00299168, 0.00299144]), 'test_score': array([0.74458874, 0.7       , 0.70869565, 0.70869565, 0.74782609]), 'train_score': array([0.75434783, 0.74049946, 0.7339848 , 0.69489685, 0.7752443 ])}


In [12]:
print('The mean score of my MLP model in cross validation is '+str(scores1['test_score'].mean()))

The mean score of my MLP model in cross validation is 0.7219612271786185


### Sklearn PerceptronClassifier

In [13]:
SK_MLP = neural_network.MLPClassifier(hidden_layer_sizes=(300, 100),max_iter=80000,learning_rate_init=0.01,activation='logistic')
SK_MLP.fit(X_train,y_train)

MLPClassifier(activation='logistic', hidden_layer_sizes=(300, 100),
              learning_rate_init=0.01, max_iter=80000)

Assess the performance on the training set

In [14]:
# Make a set of predictions for the training data
y_pred = SK_MLP.predict(X_train)
accuracy = metrics.accuracy_score(y_train, y_pred)

In [15]:
print("Accuracy: " +  str(accuracy))
print(metrics.classification_report(y_train, y_pred))

Accuracy: 0.6645962732919255
              precision    recall  f1-score   support

           0       0.59      0.94      0.73       378
           1       0.88      0.45      0.60       427

   micro avg       0.67      0.68      0.68       805
   macro avg       0.74      0.70      0.66       805
weighted avg       0.75      0.68      0.66       805
 samples avg       0.67      0.68      0.68       805



Assess the performance on the test set

In [16]:
# Make a set of predictions for the training data
y_pred = SK_MLP.predict(X_test)
accuracy = metrics.accuracy_score(y_test, y_pred)

In [17]:
print("Accuracy: " +  str(accuracy))
print(metrics.classification_report(y_test, y_pred))

Accuracy: 0.6647398843930635
              precision    recall  f1-score   support

           0       0.60      0.93      0.72       162
           1       0.86      0.46      0.60       184

   micro avg       0.67      0.68      0.67       346
   macro avg       0.73      0.69      0.66       346
weighted avg       0.73      0.68      0.66       346
 samples avg       0.67      0.68      0.67       346



Evaluate the perfomrance of the perceptron classifier on the daibetic retinopathy dataset with cross validate.

In [18]:
# SK_MLP
SK_MLP = neural_network.MLPClassifier(hidden_layer_sizes=(300, 100),max_iter=80000,learning_rate_init=0.01,activation='logistic')
scores2 = cross_validate(SK_MLP, X, Y,cv=5,return_train_score=True)
print(scores2)

{'fit_time': array([1.53190494, 1.3354311 , 1.36734533, 1.67252946, 2.3148129 ]), 'score_time': array([0.00199461, 0.00199437, 0.00199437, 0.00199461, 0.00199485]), 'test_score': array([0.76190476, 0.73913043, 0.73913043, 0.73043478, 0.74347826]), 'train_score': array([0.81521739, 0.81758958, 0.82301846, 0.84907709, 0.88925081])}


In [19]:
print('The mean score of sklearn MLP model in cross validation is '+str(scores2['test_score'].mean()))

The mean score of sklearn MLP model in cross validation is 0.7428157349896479


## Task 3 & 4: Add Different Activations & Regularisation

Reimplement the PerceptronClassifier class adding an activation function option and L2 regularisation. 

In [20]:
class PerceptronClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, layers=[19,1,2], activation='sigmoid',learning_rate=0.1, epochs=40000,l2=None):
        self.activation=activation
        self.activation_deriv=activation+'_deriv'
        self.layers=layers
        self.learning_rate=learning_rate
        self.epochs=epochs
        self.l2=l2
        if l2==None:
            self.l2=0
        self.weights = []  
        self.b=[]
        for i in range(1, len(layers) - 1):  #Set the initial weight
            self.b.append(np.random.randn(1 , layers[i] ))
            self.weights.append(np.random.randn(layers[i - 1] , layers[i] )/np.sqrt(layers[i-1]) )  
            #The gaussian distribution generates a random matrix 
        self.weights.append(np.random.randn(layers[i] , layers[i + 1]))
        self.b.append(np.random.randn(1 , layers[i + 1]))
        
    def sigmoid(self,x):
        return .5 * (1 + np.tanh(.5 * x))
#         return 1/(1 + np.exp(-x))
    def sigmoid_deriv(self,x):  
        return x*(1-x)
    def tanh(self,x):  
        return  np.tanh(x)   
    def tanh_deriv(self,x):  
        return (1+(x))*(1-(x))
        
    # The fit function to train a classifier
    def fit(self, X, y):         
        X = np.atleast_2d(X)                 
        y = np.array(y)
        for k in range(self.epochs):  #epchos represents the number of iterations
            i = np.random.randint(X.shape[0])   
            #Randomly select any one sample point in the sample
            a = [X[i]]
    
            for l in range(len(self.weights)):  
            #Feed-forward neural network, calculating the value of each neuron
                a.append(eval('PerceptronClassifier.'+self.activation)(self,np.dot(a[l], self.weights[l])+self.b[l]))  
                #Compute the l+1 layer neuron parameters from the l layer, a(l+1)=f(x(l)W(l)),where f is the activation function
            error = y[i] - a[-1]  #Calculate the error in the last layer
#             print('error:',error)
            deltas = [error * eval('PerceptronClassifier.'+self.activation_deriv)(self,a[-1])]
            #Start BP algorithm
            for l in range(len(a) - 2, 0, -1): 
                # Calculate delta from the penultimate level 
                deltas.append(deltas[-1].dot(self.weights[l].T)*eval('PerceptronClassifier.'+self.activation)(self,a[l]))  
                #delta(l)=(delta(l+1)*W(l)^T).*h(a(l))
            deltas.reverse()  #Arrange delta in order of feedforward neural network
            for i in range(len(self.weights)):  
                layer =np.atleast_2d(a[i])
                delta = np.atleast_2d(deltas[i]) 
                self.weights[i] = self.weights[i]+self.learning_rate * (layer.T.dot(delta)+self.l2*self.weights[i])
                self.b[i]=self.b[i]+self.learning_rate * (delta+self.l2*self.b[i])
                #The partial derivative of the layer update is the transpose rank of a[i] * delta[i]  
        
    # The predict_proba function to make a set of predictions for a set of query instances. This returns a set of class distributions.
    def predict_proba(self, x):
        x = np.array(x)         
        a=x
        for l in range(0, len(self.weights)):             
            a = eval('PerceptronClassifier.'+self.activation)(self,np.dot(a, self.weights[l])+self.b[l])         
        return a

    # The predict function to make a set of predictions for a set of query instances
    def predict(self, x):
        yy=self.predict_proba(x)
        for i in range(len(yy)):
            if yy[i][0]>yy[i][1]:
                yy[i][0]=1
                yy[i][1]=0
            else:
                yy[i][0]=0
                yy[i][1]=1
        return yy

## Task 5: Reflect on the Performance of the Different Models Evaluated

Perform hyper-parameter tuning and evaluate models. 

In [27]:
# Set up the parameter grid to seaerch
from sklearn.model_selection import GridSearchCV
param_grid ={'layers': [[19,30,20,2],[19,300,100,2], [19,20,2]], \
             'activation': ['sigmoid','tanh'], \
             'learning_rate': [0.01,0.05,0.1],
            'epochs':[80000,20000],
             'l2':[0.0001,0.001,0]
            }

# Perform the search
my_tuned_MLP = GridSearchCV(PerceptronClassifier(), \
                                param_grid, cv=5,verbose = 2, \
                            return_train_score=True)
my_tuned_MLP.fit(X_train,y_train)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END activation=sigmoid, epochs=80000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.01; total time=   6.2s
[CV] END activation=sigmoid, epochs=80000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.01; total time=   6.2s
[CV] END activation=sigmoid, epochs=80000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.01; total time=   6.2s
[CV] END activation=sigmoid, epochs=80000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.01; total time=   6.2s
[CV] END activation=sigmoid, epochs=80000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.01; total time=   6.2s
[CV] END activation=sigmoid, epochs=80000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.05; total time=   6.1s
[CV] END activation=sigmoid, epochs=80000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.05; total time=   6.1s
[CV] END activation=sigmoid, epochs=80000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.05; total time=   6.1s
[

[CV] END activation=sigmoid, epochs=80000, l2=0.001, layers=[19, 300, 100, 2], learning_rate=0.1; total time=   6.5s
[CV] END activation=sigmoid, epochs=80000, l2=0.001, layers=[19, 300, 100, 2], learning_rate=0.1; total time=   6.8s
[CV] END activation=sigmoid, epochs=80000, l2=0.001, layers=[19, 300, 100, 2], learning_rate=0.1; total time=   6.6s
[CV] END activation=sigmoid, epochs=80000, l2=0.001, layers=[19, 300, 100, 2], learning_rate=0.1; total time=   6.5s
[CV] END activation=sigmoid, epochs=80000, l2=0.001, layers=[19, 300, 100, 2], learning_rate=0.1; total time=   6.4s
[CV] END activation=sigmoid, epochs=80000, l2=0.001, layers=[19, 20, 2], learning_rate=0.01; total time=   6.5s
[CV] END activation=sigmoid, epochs=80000, l2=0.001, layers=[19, 20, 2], learning_rate=0.01; total time=   6.5s
[CV] END activation=sigmoid, epochs=80000, l2=0.001, layers=[19, 20, 2], learning_rate=0.01; total time=   6.4s
[CV] END activation=sigmoid, epochs=80000, l2=0.001, layers=[19, 20, 2], learni

[CV] END activation=sigmoid, epochs=20000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.05; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.1; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.1; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.1; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.1; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.0001, layers=[19, 30, 20, 2], learning_rate=0.1; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.01; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.01; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.0001, layers=[19,

[CV] END activation=sigmoid, epochs=20000, l2=0.001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.001, layers=[19, 20, 2], learning_rate=0.1; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.001, layers=[19, 20, 2], learning_rate=0.1; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.001, layers=[19, 20, 2], learning_rate=0.1; total time=   1.5s
[CV] END activation=sigmoid, epochs=20000, l2=0.001, layers=[19, 20, 2], learning_rate=0.1; total time=   1

[CV] END activation=tanh, epochs=80000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.01; total time=   5.4s
[CV] END activation=tanh, epochs=80000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   5.4s
[CV] END activation=tanh, epochs=80000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   5.5s
[CV] END activation=tanh, epochs=80000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   5.5s
[CV] END activation=tanh, epochs=80000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   5.5s
[CV] END activation=tanh, epochs=80000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   5.4s
[CV] END activation=tanh, epochs=80000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.1; total time=   5.5s
[CV] END activation=tanh, epochs=80000, l2=0.0001, layers=[19, 300, 100, 2], learning_rate=0.1; total time=   5.4s
[CV] END activation=tanh, epochs=80000, l2=0.0001, layers=[19, 300, 100, 2

[CV] END activation=tanh, epochs=80000, l2=0, layers=[19, 30, 20, 2], learning_rate=0.01; total time=   5.5s
[CV] END activation=tanh, epochs=80000, l2=0, layers=[19, 30, 20, 2], learning_rate=0.01; total time=   5.5s
[CV] END activation=tanh, epochs=80000, l2=0, layers=[19, 30, 20, 2], learning_rate=0.05; total time=   5.4s
[CV] END activation=tanh, epochs=80000, l2=0, layers=[19, 30, 20, 2], learning_rate=0.05; total time=   5.5s
[CV] END activation=tanh, epochs=80000, l2=0, layers=[19, 30, 20, 2], learning_rate=0.05; total time=   5.4s
[CV] END activation=tanh, epochs=80000, l2=0, layers=[19, 30, 20, 2], learning_rate=0.05; total time=   5.4s
[CV] END activation=tanh, epochs=80000, l2=0, layers=[19, 30, 20, 2], learning_rate=0.05; total time=   5.5s
[CV] END activation=tanh, epochs=80000, l2=0, layers=[19, 30, 20, 2], learning_rate=0.1; total time=   5.5s
[CV] END activation=tanh, epochs=80000, l2=0, layers=[19, 30, 20, 2], learning_rate=0.1; total time=   5.5s
[CV] END activation=t

[CV] END activation=tanh, epochs=20000, l2=0.0001, layers=[19, 20, 2], learning_rate=0.01; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0.0001, layers=[19, 20, 2], learning_rate=0.01; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0.0001, layers=[19, 20, 2], learning_rate=0.01; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0.0001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0.0001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0.0001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0.0001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0.0001, layers=[19, 20, 2], learning_rate=0.05; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0.0001, layers=[19, 20, 2], learning_rate=0.1; total time=   1.3s
[CV] END ac

[CV] END activation=tanh, epochs=20000, l2=0, layers=[19, 300, 100, 2], learning_rate=0.01; total time=   1.4s
[CV] END activation=tanh, epochs=20000, l2=0, layers=[19, 300, 100, 2], learning_rate=0.01; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0, layers=[19, 300, 100, 2], learning_rate=0.01; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0, layers=[19, 300, 100, 2], learning_rate=0.01; total time=   1.4s
[CV] END activation=tanh, epochs=20000, l2=0, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   1.4s
[CV] END activation=tanh, epochs=20000, l2=0, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   1.3s
[CV] END activation=tanh, epochs=20000, l2=0, layers=[19, 300, 100, 2], learning_rate=0.05; total time=   1.4s
[

GridSearchCV(cv=5, estimator=PerceptronClassifier(l2=0),
             param_grid={'activation': ['sigmoid', 'tanh'],
                         'epochs': [80000, 20000], 'l2': [0.0001, 0.001, 0],
                         'layers': [[19, 30, 20, 2], [19, 300, 100, 2],
                                    [19, 20, 2]],
                         'learning_rate': [0.01, 0.05, 0.1]},
             return_train_score=True, verbose=2)

In [28]:
# Print details
print("Best parameters set found on development set:")
display(my_tuned_MLP.best_params_)

# display(my_tuned_MLP.best_score_)
# display(my_tuned_MLP.cv_results_)

Best parameters set found on development set:


{'activation': 'sigmoid',
 'epochs': 80000,
 'l2': 0.001,
 'layers': [19, 300, 100, 2],
 'learning_rate': 0.01}

Make a set of predictions for the training data

In [29]:
y_pred = my_tuned_MLP.predict(X_train)
accuracy = metrics.accuracy_score(y_train, y_pred)

In [30]:
print("Accuracy: " +  str(accuracy))
print(metrics.classification_report(y_train, y_pred))

Accuracy: 0.7403726708074534
              precision    recall  f1-score   support

           0       0.74      0.70      0.72       378
           1       0.74      0.78      0.76       427

   micro avg       0.74      0.74      0.74       805
   macro avg       0.74      0.74      0.74       805
weighted avg       0.74      0.74      0.74       805
 samples avg       0.74      0.74      0.74       805



Make a set of predictions for the test data

In [31]:
y_pred = my_tuned_MLP.predict(X_test)
accuracy = metrics.accuracy_score(y_test, y_pred)

In [32]:
print("Accuracy: " +  str(accuracy))
print(metrics.classification_report(y_test, y_pred))

Accuracy: 0.7138728323699421
              precision    recall  f1-score   support

           0       0.71      0.66      0.68       162
           1       0.72      0.76      0.74       184

   micro avg       0.71      0.71      0.71       346
   macro avg       0.71      0.71      0.71       346
weighted avg       0.71      0.71      0.71       346
 samples avg       0.71      0.71      0.71       346

