In [1]:
# import statements
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup, Comment
import time
from datetime import datetime
import csv
from collections import defaultdict
import matplotlib.pyplot as plt
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
# Import necessary modules
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import mean_squared_error
from math import sqrt
import sklearn.metrics
from sklearn.metrics import r2_score
from sklearn.metrics import classification_report,confusion_matrix,make_scorer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
#from basketball_reference_web_scraper import 
import pickle
import random
from client import Nba_Season

In [10]:
# load list of features and samples from saved data
def load_past_data(files):
    '''
    files: list of tuples in form of (features,samples,is_norm) file paths
    returns: samples,features_norm
    '''
    samples = []
    features_norm = []

    for file in files:
        features_yr = np.genfromtxt(file[0],delimiter=',')
        if file[2]:
            features_norm.extend(features_yr)
        else:
            features_yr_norm = [[float(i)/sum(j) for i in j ]for j in features_yr]
            features_norm.extend(features_yr_norm)
        
        samples_yr = np.genfromtxt(file[1],delimiter=',')
        samples.extend(samples_yr)

    return samples,features_norm

## Using client.py

### 1. Loading from saved data

In [3]:
with open('old_on_off_stats/2022-2023_on_off.pkl', 'rb') as f:
    loaded_on_off = pickle.load(f)

with open('old_on_off_stats/2022-2023_team_stats.pkl', 'rb') as f:
    loaded_stats = pickle.load(f)

nba_szn_2022_2023 = Nba_Season('2022','2023',team_stats=loaded_stats,team_on_off=loaded_on_off,
                               features=np.genfromtxt('old_samps_feats/2022-2023_nba_features_inj.csv',delimiter=',')
                               ,samples=np.genfromtxt('old_samps_feats/2022-2023_nba_samples_inj.csv',delimiter=','))

### 2. Generating from CSV of games in form of [date, away team, away pts, home team, home pts]

In [6]:
nba_szn_2022_2023 = Nba_Season('2022','2023')
nba_szn_2022_2023.pop_const_new()
# probably need to wait before calling generate features
features, samples = nba_szn_2022_2023.generate_features('old_games_inj/2022-2023_season_injury.csv')
# save generated data
nba_szn_2022_2023.save_data(save_path='old_samps_feats/')

# manually save on off or team stats using commented code below
# with open('old_on_off_stats/2022-2023_team_stats.pkl', 'wb') as f:
#     pickle.dump(nba_szn_2022_2023.team_stats, f)

## 2013 - 2023 NBA Seasons

In [None]:
# TODO fix scraper for this season!
nba_szn_2014 = Nba_Season('2013','2014')
nba_szn_2014.pop_const_new()

In [None]:
features_2014, samples_2014 = nba_szn_2014.generate_features('old_games_inj/2013-2014_season_inj.csv')
nba_szn_2014.save_data(save_path='old_samps_feats/')

In [2]:
# load old gay boy stuff :3
features_norm = np.genfromtxt('old_samps_feats/2015-2023_nba_features_norm_inj.csv',delimiter=',')
samples = np.genfromtxt('old_samps_feats/2015-2023_nba_samples_inj.csv',delimiter=',')

## 2014 - 2023 NBA Seasons

In [4]:
samples_1d = [0 if j[0] == 0 else 1 for j in samples]
feat_train, feat_test, samp_train, samp_test = train_test_split(features_norm,samples_1d, test_size=0.25, random_state=1)

In [9]:
mlp = MLPClassifier(hidden_layer_sizes=(16,32,64,64,32,16), alpha=0.0075, learning_rate='invscaling', activation='tanh', tol=0.001, solver='lbfgs',max_iter=1500)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TRAINING SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TEST SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TRAINING SET 

TN, FP, FN, TP
[3725  675  687 3550]
              precision    recall  f1-score   support

           0       0.84      0.85      0.85      4400
           1       0.84      0.84      0.84      4237

    accuracy                           0.84      8637
   macro avg       0.84      0.84      0.84      8637
weighted avg       0.84      0.84      0.84      8637

TEST SET 

TN, FP, FN, TP
[738 753 684 705]
              precision    recall  f1-score   support

           0       0.52      0.49      0.51      1491
           1       0.48      0.51      0.50      1389

    accuracy                           0.50      2880
   macro avg       0.50      0.50      0.50      2880
weighted avg       0.50      0.50      0.50      2880



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [18]:
mlp = MLPClassifier(hidden_layer_sizes=(16,32,64,64,32,16), alpha=0.0001, learning_rate='invscaling', activation='tanh', solver='adam',max_iter=10000)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TRAINING SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TEST SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TN, FP, FN, TP
[3508  597  927 3029]
              precision    recall  f1-score   support

           0       0.79      0.85      0.82      4105
           1       0.84      0.77      0.80      3956

    accuracy                           0.81      8061
   macro avg       0.81      0.81      0.81      8061
weighted avg       0.81      0.81      0.81      8061

TN, FP, FN, TP
[967 819 923 747]
              precision    recall  f1-score   support

           0       0.51      0.54      0.53      1786
           1       0.48      0.45      0.46      1670

    accuracy                           0.50      3456
   macro avg       0.49      0.49      0.49      3456
weighted avg       0.49      0.50      0.49      3456



In [19]:
mlp = MLPClassifier(hidden_layer_sizes=(32,64,128,64,32), alpha=0.5, learning_rate='invscaling', activation='relu', tol=0.001,solver='lbfgs',max_iter=2500)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TRAINING SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TEST SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TRAINING SET 

TN, FP, FN, TP
[3609  496  504 3452]
              precision    recall  f1-score   support

           0       0.88      0.88      0.88      4105
           1       0.87      0.87      0.87      3956

    accuracy                           0.88      8061
   macro avg       0.88      0.88      0.88      8061
weighted avg       0.88      0.88      0.88      8061

TEST SET 

TN, FP, FN, TP
[920 866 887 783]
              precision    recall  f1-score   support

           0       0.51      0.52      0.51      1786
           1       0.47      0.47      0.47      1670

    accuracy                           0.49      3456
   macro avg       0.49      0.49      0.49      3456
weighted avg       0.49      0.49      0.49      3456



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [20]:
GRID = [
    {'scaler': [StandardScaler()],
     'estimator': [MLPClassifier(random_state=1)],
     'estimator__solver': ['adam'],
     'estimator__learning_rate_init': [0.0001,0.001,0.01],
     'estimator__learning_rate' :['invscaling','constant'],
     'estimator__max_iter': [10000],
     'estimator__hidden_layer_sizes': [(32,64,32), (64,64,64), (64,128,64), (32,64,64,32), (16,32,64,32,16), (32,64,128,64,32), (16,32,64,64,32,16)],
     'estimator__activation': ['logistic', 'tanh', 'relu'],
     'estimator__alpha': [0.0001, 0.001, 0.005],
     'estimator__tol' : [0.01, 0.0001, 0.001],
     'estimator__early_stopping': [True, False]
     }
]

PIPELINE = Pipeline([('scaler', None), ('estimator', MLPClassifier())])

grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID, 
                            scoring=make_scorer(sklearn.metrics.accuracy_score),# average='macro'), 
                            n_jobs=-1, refit=True, verbose=1, 
                            return_train_score=False)

grid_search.fit(feat_train,samp_train)
print('Best parameters found:\n', grid_search.best_params_)
print('Best parameters score:\n', grid_search.best_score_)

Fitting 5 folds for each of 2268 candidates, totalling 11340 fits
Best parameters found:
 {'estimator': MLPClassifier(alpha=0.005, hidden_layer_sizes=(16, 32, 64, 32, 16),
              learning_rate='invscaling', max_iter=10000, random_state=1), 'estimator__activation': 'relu', 'estimator__alpha': 0.005, 'estimator__early_stopping': False, 'estimator__hidden_layer_sizes': (16, 32, 64, 32, 16), 'estimator__learning_rate': 'invscaling', 'estimator__learning_rate_init': 0.001, 'estimator__max_iter': 10000, 'estimator__solver': 'adam', 'estimator__tol': 0.0001, 'scaler': StandardScaler()}
Best parameters score:
 0.5117242965422075


In [27]:
mlp = MLPClassifier(hidden_layer_sizes=(128,256,256,128), alpha=0.005, learning_rate='invscaling', learning_rate_init=0.001, activation='relu', tol=0.0001,solver='adam',max_iter=10000)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TRAINING SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TEST SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TRAINING SET 

TN, FP, FN, TP
[3902  498  431 3806]
              precision    recall  f1-score   support

           0       0.90      0.89      0.89      4400
           1       0.88      0.90      0.89      4237

    accuracy                           0.89      8637
   macro avg       0.89      0.89      0.89      8637
weighted avg       0.89      0.89      0.89      8637

TEST SET 

TN, FP, FN, TP
[743 748 672 717]
              precision    recall  f1-score   support

           0       0.53      0.50      0.51      1491
           1       0.49      0.52      0.50      1389

    accuracy                           0.51      2880
   macro avg       0.51      0.51      0.51      2880
weighted avg       0.51      0.51      0.51      2880



In [41]:
mlp = MLPClassifier(hidden_layer_sizes=(128,256,512,256,128), alpha=0.005, learning_rate='invscaling', learning_rate_init=0.00001, activation='relu',solver='adam',max_iter=10000)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TRAINING SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TEST SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TRAINING SET 

TN, FP, FN, TP
[3370 1030 1456 2781]
              precision    recall  f1-score   support

           0       0.70      0.77      0.73      4400
           1       0.73      0.66      0.69      4237

    accuracy                           0.71      8637
   macro avg       0.71      0.71      0.71      8637
weighted avg       0.71      0.71      0.71      8637

TEST SET 

TN, FP, FN, TP
[824 667 763 626]
              precision    recall  f1-score   support

           0       0.52      0.55      0.54      1491
           1       0.48      0.45      0.47      1389

    accuracy                           0.50      2880
   macro avg       0.50      0.50      0.50      2880
weighted avg       0.50      0.50      0.50      2880



## 2022-2023 NBA Season with Injuries 

In [23]:
features_norm = []
samples = []

pop_team_stats('2023')

time.sleep(15)

pop_team_on_off('2023')

time.sleep(15)

# with open('2022-2023_on_off.pkl', 'wb') as f:
#     pickle.dump(TEAM_ON_OFF, f)

# with open('2022-2023_on_off.pkl', 'rb') as f:
#     loaded_dict = pickle.load(f)

features_2023,samples_2023 = generate_features('2023','old_games_inj/2022-2023_season_injury.csv')

np.savetxt('2022-2023_nba_features_inj.csv', features_2023, delimiter=',')
np.savetxt('2022-2023_nba_samples_inj.csv', samples_2023, delimiter=',')

#features_2023_ext,samples_2023_ext = generate_features('2023','old_games_inj/2022-2023_season_inj_ext.csv')

#features_2023.extend(features_2023_ext)
#samples_2023.extend(samples_2023_ext)
features_2023_norm = [[float(i)/sum(j) for i in j ]for j in features_2023]

features_norm.extend(features_2023_norm)
samples.extend(samples_2023)

#features_2023_norm = [[float(i)/sum(j) for i in j ]for j in features_2023]
samples_2023_1d = [0 if j[0] == 0 else 1 for j in samples_2023]
feat_train, feat_test, samp_train, samp_test = train_test_split(features_2023_norm,samples_2023_1d, test_size=0.30, random_state=1)

In [58]:
mlp = MLPClassifier(hidden_layer_sizes=(16,32,64,64,32,16), alpha=0.075, learning_rate='invscaling', activation='tanh', solver='lbfgs',max_iter=10000)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TN, FP, FN, TP
[496   1   1 426]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       497
           1       1.00      1.00      1.00       427

    accuracy                           1.00       924
   macro avg       1.00      1.00      1.00       924
weighted avg       1.00      1.00      1.00       924

TN, FP, FN, TP
[114 108  85  89]
              precision    recall  f1-score   support

           0       0.57      0.51      0.54       222
           1       0.45      0.51      0.48       174

    accuracy                           0.51       396
   macro avg       0.51      0.51      0.51       396
weighted avg       0.52      0.51      0.51       396



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [64]:
mlp = MLPClassifier(hidden_layer_sizes=(16,32,64,64,32,16), alpha=0.0001, learning_rate='invscaling', activation='tanh', solver='adam',max_iter=10000)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TN, FP, FN, TP
[451  46  14 413]
              precision    recall  f1-score   support

           0       0.97      0.91      0.94       497
           1       0.90      0.97      0.93       427

    accuracy                           0.94       924
   macro avg       0.93      0.94      0.93       924
weighted avg       0.94      0.94      0.94       924

TN, FP, FN, TP
[111 111  95  79]
              precision    recall  f1-score   support

           0       0.54      0.50      0.52       222
           1       0.42      0.45      0.43       174

    accuracy                           0.48       396
   macro avg       0.48      0.48      0.48       396
weighted avg       0.48      0.48      0.48       396



In [65]:
arcs = [(128,256,128),(64,128,128,64),(128,256,256,128)]
for arc in arcs:
    mlp = MLPClassifier(hidden_layer_sizes=arc, alpha=0.0075, learning_rate_init=0.001, activation='tanh', solver='adam', epsilon=0.0000001, max_iter=10000)
    mlp.fit(feat_train,samp_train)
    print(arc)
    predict_train = mlp.predict(feat_train)
    predict_test = mlp.predict(feat_test)

    print('TN, FP, FN, TP')
    print(confusion_matrix(samp_train,predict_train).ravel())
    print(classification_report(samp_train,predict_train))
    print('TN, FP, FN, TP')
    print(confusion_matrix(samp_test,predict_test).ravel())
    print(classification_report(samp_test,predict_test))


(128, 256, 128)
TN, FP, FN, TP
[487  10   8 419]
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       497
           1       0.98      0.98      0.98       427

    accuracy                           0.98       924
   macro avg       0.98      0.98      0.98       924
weighted avg       0.98      0.98      0.98       924

TN, FP, FN, TP
[118 104  94  80]
              precision    recall  f1-score   support

           0       0.56      0.53      0.54       222
           1       0.43      0.46      0.45       174

    accuracy                           0.50       396
   macro avg       0.50      0.50      0.50       396
weighted avg       0.50      0.50      0.50       396

(64, 128, 128, 64)
TN, FP, FN, TP
[488   9  29 398]
              precision    recall  f1-score   support

           0       0.94      0.98      0.96       497
           1       0.98      0.93      0.95       427

    accuracy                           0.96     

In [68]:
mlp = MLPClassifier(hidden_layer_sizes=(16,32,64,64,32,16), alpha=0.05, learning_rate='invscaling', activation='tanh', solver='lbfgs',max_iter=5000)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print(classification_report(samp_train,predict_train))
print(classification_report(samp_test,predict_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       497
           1       1.00      1.00      1.00       427

    accuracy                           1.00       924
   macro avg       1.00      1.00      1.00       924
weighted avg       1.00      1.00      1.00       924

              precision    recall  f1-score   support

           0       0.54      0.55      0.55       222
           1       0.41      0.40      0.40       174

    accuracy                           0.48       396
   macro avg       0.48      0.48      0.48       396
weighted avg       0.48      0.48      0.48       396



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


## 2018 - 2023 NBA Seasons with Injuries

### 1. Generate Samples and Features for each Season

### 2. Test Model and Tune Hyper Paramaters

In [21]:
samples_1d =  [0 if j[0] == 0 else 1 for j in samples]
feat_train, feat_test, samp_train, samp_test = train_test_split(features_norm,samples_1d, test_size=0.30, random_state=1)

In [31]:
mlp = MLPClassifier(hidden_layer_sizes=(16,32,64,64,32,16), alpha=0.0001, learning_rate_init=0.025, learning_rate='adaptive', activation='tanh', solver='adam', epsilon=0.0001, max_iter=10000)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TN, FP, FN, TP
[2715   76   67 2448]
              precision    recall  f1-score   support

           0       0.98      0.97      0.97      2791
           1       0.97      0.97      0.97      2515

    accuracy                           0.97      5306
   macro avg       0.97      0.97      0.97      5306
weighted avg       0.97      0.97      0.97      5306

TN, FP, FN, TP
[571 597 588 519]
              precision    recall  f1-score   support

           0       0.49      0.49      0.49      1168
           1       0.47      0.47      0.47      1107

    accuracy                           0.48      2275
   macro avg       0.48      0.48      0.48      2275
weighted avg       0.48      0.48      0.48      2275



In [36]:
mlp = MLPClassifier(hidden_layer_sizes=(16,32,64,64,32,16), alpha=0.0001, learning_rate_init=0.0025, learning_rate='adaptive', activation='relu', solver='adam', epsilon=0.000001, max_iter=5000)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TN, FP, FN, TP
[2441  350 1301 1214]
              precision    recall  f1-score   support

           0       0.65      0.87      0.75      2791
           1       0.78      0.48      0.60      2515

    accuracy                           0.69      5306
   macro avg       0.71      0.68      0.67      5306
weighted avg       0.71      0.69      0.68      5306

TN, FP, FN, TP
[824 344 788 319]
              precision    recall  f1-score   support

           0       0.51      0.71      0.59      1168
           1       0.48      0.29      0.36      1107

    accuracy                           0.50      2275
   macro avg       0.50      0.50      0.48      2275
weighted avg       0.50      0.50      0.48      2275



In [43]:
mlp = MLPClassifier(max_iter=10000)

parameter_space = {
    'hidden_layer_sizes': [(16,32,16), (32,64,32), (64,64,64), (64,128,48), (16,32,64,32,16)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05, 0.001],
    'learning_rate': ['constant','adaptive'],
}

clf = GridSearchCV(mlp,parameter_space,n_jobs=-1)
clf.fit(feat_train,samp_train)

print('Best parameters found:\n', clf.best_params_)

Best parameters found:
 {'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (64, 128, 48), 'learning_rate': 'adaptive', 'solver': 'sgd'}


In [55]:
GRID = [
    {'scaler': [StandardScaler()],
     'estimator': [MLPClassifier(random_state=1)],
     'estimator__solver': ['adam'],
     'estimator__learning_rate_init': [0.0001],
     'estimator__max_iter': [10000],
     'estimator__hidden_layer_sizes': [(16,32,16), (32,64,32), (64,64,64), (64,128,48), (16,32,64,32,16)],
     'estimator__activation': ['logistic', 'tanh', 'relu'],
     'estimator__alpha': [0.0001, 0.001, 0.005],
     'estimator__epsilon' : [0.001,0.00001,0.00000001],
     'estimator__tol' : [0.01, 0.0001, 0.000001],
     'estimator__early_stopping': [True, False]
     }
]

PIPELINE = Pipeline([('scaler', None), ('estimator', MLPClassifier())])

grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID, 
                            scoring=make_scorer(sklearn.metrics.accuracy_score),# average='macro'), 
                            n_jobs=-1, refit=True, verbose=1, 
                            return_train_score=False)

grid_search.fit(feat_train,samp_train)
print('Best parameters found:\n', grid_search.best_params_)

Fitting 5 folds for each of 810 candidates, totalling 4050 fits
Best parameters found:
 {'estimator': MLPClassifier(hidden_layer_sizes=(32, 64, 32), learning_rate_init=0.0001,
              max_iter=10000, random_state=1), 'estimator__activation': 'relu', 'estimator__alpha': 0.0001, 'estimator__early_stopping': False, 'estimator__epsilon': 1e-08, 'estimator__hidden_layer_sizes': (32, 64, 32), 'estimator__learning_rate_init': 0.0001, 'estimator__max_iter': 10000, 'estimator__solver': 'adam', 'estimator__tol': 0.0001, 'scaler': StandardScaler()}


In [None]:
means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

In [78]:
mlp = MLPClassifier(hidden_layer_sizes=(32,64,128,64,32), alpha=0.075, learning_rate='invscaling', activation='tanh', solver='lbfgs',max_iter=10000)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TN, FP, FN, TP
[2787    4   11 2504]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2791
           1       1.00      1.00      1.00      2515

    accuracy                           1.00      5306
   macro avg       1.00      1.00      1.00      5306
weighted avg       1.00      1.00      1.00      5306

TN, FP, FN, TP
[593 575 602 505]
              precision    recall  f1-score   support

           0       0.50      0.51      0.50      1168
           1       0.47      0.46      0.46      1107

    accuracy                           0.48      2275
   macro avg       0.48      0.48      0.48      2275
weighted avg       0.48      0.48      0.48      2275



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [93]:
mlp = MLPClassifier(hidden_layer_sizes=(32,64,128,64,32), alpha=0.5, learning_rate='invscaling', activation='relu', tol=0.001,solver='lbfgs',max_iter=2500)
mlp.fit(feat_train,samp_train)

predict_train = mlp.predict(feat_train)
predict_test = mlp.predict(feat_test)

print('TRAINING SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_train,predict_train).ravel())
print(classification_report(samp_train,predict_train))
print('TEST SET \n')
print('TN, FP, FN, TP')
print(confusion_matrix(samp_test,predict_test).ravel())
print(classification_report(samp_test,predict_test))

TRAINING SET 

TN, FP, FN, TP
[2637  154  182 2333]
              precision    recall  f1-score   support

           0       0.94      0.94      0.94      2791
           1       0.94      0.93      0.93      2515

    accuracy                           0.94      5306
   macro avg       0.94      0.94      0.94      5306
weighted avg       0.94      0.94      0.94      5306

TEST SET 

TN, FP, FN, TP
[609 559 558 549]
              precision    recall  f1-score   support

           0       0.52      0.52      0.52      1168
           1       0.50      0.50      0.50      1107

    accuracy                           0.51      2275
   macro avg       0.51      0.51      0.51      2275
weighted avg       0.51      0.51      0.51      2275



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
