In [75]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix
df = pd.read_csv("dataset4.csv")

df.head()

Unnamed: 0,feature1,feature2,class
0,1.400628,-1.062722,1
1,1.572726,1.03096,0
2,-0.397654,-0.56427,2
3,-0.489704,1.137081,2
4,1.761848,1.191341,0


In [76]:
## creating another variable to save the features except target column
features = df.drop('class', axis=1)
features

## naming the response variable as target
target = df['class']
target

0      1
1      0
2      2
3      2
4      0
      ..
495    1
496    1
497    0
498    0
499    1
Name: class, Length: 500, dtype: int64

## MLPCLassifier with StandardScaler and GridSearchCV


In [77]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([("scaling", StandardScaler()),("classifier", MLPClassifier(random_state=42))])



In [78]:
MLPClassifier(random_state=42).get_params().keys()
pipe.get_params().keys()

dict_keys(['memory', 'steps', 'verbose', 'scaling', 'classifier', 'scaling__copy', 'scaling__with_mean', 'scaling__with_std', 'classifier__activation', 'classifier__alpha', 'classifier__batch_size', 'classifier__beta_1', 'classifier__beta_2', 'classifier__early_stopping', 'classifier__epsilon', 'classifier__hidden_layer_sizes', 'classifier__learning_rate', 'classifier__learning_rate_init', 'classifier__max_fun', 'classifier__max_iter', 'classifier__momentum', 'classifier__n_iter_no_change', 'classifier__nesterovs_momentum', 'classifier__power_t', 'classifier__random_state', 'classifier__shuffle', 'classifier__solver', 'classifier__tol', 'classifier__validation_fraction', 'classifier__verbose', 'classifier__warm_start'])

In [79]:
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

grid_parameters = {
    "classifier__hidden_layer_sizes": [(2,),(2,2),(5,),(5,2),(5,5),(10,),(10,2),(10,5),(10,10)],
    "classifier__learning_rate":["adaptive"],
    "classifier__max_iter":[5000]
}


In [80]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.20, random_state=42)
#print("Size of training set: {} size of test set: {}".format(X_train.shape[0], X_test.shape[0]))


mlp_classifier=GridSearchCV(estimator=pipe, param_grid=grid_parameters, scoring='accuracy')
mlp_classifier.fit(X_train, y_train)

GridSearchCV(estimator=Pipeline(steps=[('scaling', StandardScaler()),
                                       ('classifier',
                                        MLPClassifier(random_state=42))]),
             param_grid={'classifier__hidden_layer_sizes': [(2,), (2, 2), (5,),
                                                            (5, 2), (5, 5),
                                                            (10,), (10, 2),
                                                            (10, 5), (10, 10)],
                         'classifier__learning_rate': ['adaptive'],
                         'classifier__max_iter': [5000]},
             scoring='accuracy')

In [82]:
# Here, we are showing the accuracy scores for all the models
print("Grid scores for all the models based on CV:\n")
means = mlp_classifier.cv_results_['mean_test_score']
stds = mlp_classifier.cv_results_['std_test_score']

for mean, std, params in zip(means, stds, mlp_classifier.cv_results_['params']):
    print("%0.5f (+/-%0.05f) for %r" % (mean, std * 2, params))
    
# Here, we are checking out the best model performance
print("\nBest parameters set found on development set:", mlp_classifier.best_params_)

# Here, we are checking out the best model validation accuracy
print("Best model validation accuracy:", mlp_classifier.best_score_)

gs_best = mlp_classifier.best_estimator_
gs_best
tuned_y_pred = gs_best.predict(X_test)


Grid scores for all the models based on CV:

0.72000 (+/-0.04637) for {'classifier__hidden_layer_sizes': (2,), 'classifier__learning_rate': 'adaptive', 'classifier__max_iter': 5000}
0.33750 (+/-0.00000) for {'classifier__hidden_layer_sizes': (2, 2), 'classifier__learning_rate': 'adaptive', 'classifier__max_iter': 5000}
0.91750 (+/-0.05148) for {'classifier__hidden_layer_sizes': (5,), 'classifier__learning_rate': 'adaptive', 'classifier__max_iter': 5000}
0.89750 (+/-0.02449) for {'classifier__hidden_layer_sizes': (5, 2), 'classifier__learning_rate': 'adaptive', 'classifier__max_iter': 5000}
0.93750 (+/-0.05244) for {'classifier__hidden_layer_sizes': (5, 5), 'classifier__learning_rate': 'adaptive', 'classifier__max_iter': 5000}
0.91750 (+/-0.05148) for {'classifier__hidden_layer_sizes': (10,), 'classifier__learning_rate': 'adaptive', 'classifier__max_iter': 5000}
0.93250 (+/-0.03742) for {'classifier__hidden_layer_sizes': (10, 2), 'classifier__learning_rate': 'adaptive', 'classifier__max