In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is 
import sklearn
assert sklearn.__version__ >= "0.20"
from sklearn.model_selection import train_test_split, cross_val_score, KFold, cross_val_predict
from sklearn.linear_model import Perceptron
from sklearn import preprocessing
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, classification_report
from sklearn.metrics import roc_curve, roc_auc_score, f1_score, roc_curve, roc_auc_score 

# Common imports
import numpy as np
import pandas as pd
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

In [3]:
url= "brain_stroke_in_numeric_without_useless_data.csv"
brain = pd.read_csv(url)
brain = brain.drop(brain.columns[0], axis=1)
X = brain.iloc[:, :8]
y = brain.iloc[:, 8]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
per_clf = Perceptron(max_iter=1000, tol=1e-3, random_state=42)
y_pred = per_clf.fit(X_train, y_train).predict(X_test)
print("Accuracy:",accuracy_score(y_test, y_pred))

Accuracy: 0.9478260869565217


In [6]:
kf = KFold(n_splits=10)
score = cross_val_score(per_clf, X_train, y_train, cv=kf, scoring='accuracy')
print("Accuracy Cross Validation score :{}".format(score.mean()))
y_train_pred = cross_val_predict(per_clf, X_train, y_train, cv=kf)
tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
print("TN: %.0f" % tn + ", FP: %.0f" % fp + ", FN: %.0f" % fn + ", TP: %.0f" % tp)
print("Precision:",precision_score(y_train, y_train_pred))
print("Recall:", recall_score(y_train, y_train_pred))
print("f1 score:", f1_score(y_train, y_train_pred))

Accuracy Cross Validation score :0.8125424035833086
TN: 2778, FP: 538, FN: 116, TP: 54
Precision: 0.09121621621621621
Recall: 0.3176470588235294
f1 score: 0.14173228346456693


In [7]:
# if we want to change the number and size of layers we need to use the MLPCClassifier from sklearn

In [8]:
mlp_clf = MLPClassifier(random_state=1)
y_pred = mlp_clf.fit(X_train, y_train).predict(X_test)
print("Accuracy:",accuracy_score(y_test, y_pred))

Accuracy: 0.9491638795986622


In [9]:
kf = KFold(n_splits=10)
score = cross_val_score(mlp_clf, X_train, y_train, cv=kf, scoring='accuracy')
print("Accuracy Cross Validation score :{}".format(score.mean()))

y_train_pred = cross_val_predict(mlp_clf, X_train, y_train, cv=kf)
tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
print("TN: %.0f" % tn + ", FP: %.0f" % fp + ", FN: %.0f" % fn + ", TP: %.0f" % tp)
print("Precision:",precision_score(y_train, y_train_pred))
print("Recall:", recall_score(y_train, y_train_pred))
print("f1 score:", f1_score(y_train, y_train_pred))

Accuracy Cross Validation score :0.9503647531535092
TN: 3313, FP: 3, FN: 170, TP: 0
Precision: 0.0
Recall: 0.0
f1 score: 0.0


In [10]:
# learning rate change 

In [17]:
clf = [
    MLPClassifier(random_state=1),

    ]
clf_columns = []
clf_compare = pd.DataFrame(columns = clf_columns)

row_index = 0
for alg in clf:
        
    predicted = alg.fit(X_train, y_train).predict(X_test)
    kf = KFold(n_splits=10)
    y_train_pred = cross_val_predict(mlp_clf, X_train, y_train, cv=kf)
    clf_name = alg.__class__.__name__
    clf_compare.loc[row_index, 'Accuracy'] = accuracy_score(y_test, predicted )
    tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
    clf_compare.loc[row_index, 'TN'] = tn
    clf_compare.loc[row_index, 'FP'] = fp
    clf_compare.loc[row_index, 'FN'] = fn
    clf_compare.loc[row_index, 'TP'] = tp
    clf_compare.loc[row_index, 'Sensitivity'] = tp / (tp + fn)
    clf_compare.loc[row_index, 'Specificity'] = tn / (tn + fp)
    clf_compare.loc[row_index, 'Precission'] = precision_score(y_train, y_train_pred, zero_division=0)
    clf_compare.loc[row_index, 'Recall'] = recall_score(y_train, y_train_pred, zero_division=0)
    fpr, tpr, thresholds = roc_curve(y_train, y_train_pred)
    clf_compare.loc[row_index, 'AURC'] = roc_auc_score(y_train, y_train_pred)
    row_index+=1
    
clf_compare.sort_values(by = ['Accuracy'], ascending = False, inplace = True)    
clf_compare

Unnamed: 0,Accuracy,TN,FP,FN,TP,Sensitivity,Specificity,Precission,Recall,AURC
0,0.949164,3313.0,3.0,170.0,0.0,0.0,0.999095,0.0,0.0,0.499548


In [None]:
# number of layers

In [None]:
clf = [
    MLPClassifier(random_state=1),

    ]
clf_columns = []
clf_compare = pd.DataFrame(columns = clf_columns)

row_index = 0
for alg in clf:
        
    predicted = alg.fit(X_train, y_train).predict(X_test)
    kf = KFold(n_splits=10)
    y_train_pred = cross_val_predict(mlp_clf, X_train, y_train, cv=kf)
    clf_name = alg.__class__.__name__
    clf_compare.loc[row_index, 'Accuracy'] = accuracy_score(y_test, predicted )
    tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
    clf_compare.loc[row_index, 'TN'] = tn
    clf_compare.loc[row_index, 'FP'] = fp
    clf_compare.loc[row_index, 'FN'] = fn
    clf_compare.loc[row_index, 'TP'] = tp
    clf_compare.loc[row_index, 'Sensitivity'] = tp / (tp + fn)
    clf_compare.loc[row_index, 'Specificity'] = tn / (tn + fp)
    clf_compare.loc[row_index, 'Precission'] = precision_score(y_train, y_train_pred, zero_division=0)
    clf_compare.loc[row_index, 'Recall'] = recall_score(y_train, y_train_pred, zero_division=0)
    fpr, tpr, thresholds = roc_curve(y_train, y_train_pred)
    clf_compare.loc[row_index, 'AURC'] = roc_auc_score(y_train, y_train_pred)
    row_index+=1
    
clf_compare.sort_values(by = ['Accuracy'], ascending = False, inplace = True)    
clf_compare

In [None]:
# size of layers

In [None]:
clf = [
    MLPClassifier(random_state=1),

    ]
clf_columns = []
clf_compare = pd.DataFrame(columns = clf_columns)

row_index = 0
for alg in clf:
        
    predicted = alg.fit(X_train, y_train).predict(X_test)
    kf = KFold(n_splits=10)
    y_train_pred = cross_val_predict(mlp_clf, X_train, y_train, cv=kf)
    clf_name = alg.__class__.__name__
    clf_compare.loc[row_index, 'Accuracy'] = accuracy_score(y_test, predicted )
    tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
    clf_compare.loc[row_index, 'TN'] = tn
    clf_compare.loc[row_index, 'FP'] = fp
    clf_compare.loc[row_index, 'FN'] = fn
    clf_compare.loc[row_index, 'TP'] = tp
    clf_compare.loc[row_index, 'Sensitivity'] = tp / (tp + fn)
    clf_compare.loc[row_index, 'Specificity'] = tn / (tn + fp)
    clf_compare.loc[row_index, 'Precission'] = precision_score(y_train, y_train_pred, zero_division=0)
    clf_compare.loc[row_index, 'Recall'] = recall_score(y_train, y_train_pred, zero_division=0)
    fpr, tpr, thresholds = roc_curve(y_train, y_train_pred)
    clf_compare.loc[row_index, 'AURC'] = roc_auc_score(y_train, y_train_pred)
    row_index+=1
    
clf_compare.sort_values(by = ['Accuracy'], ascending = False, inplace = True)    
clf_compare

In [None]:
# number of iterations

In [None]:
clf = [
    MLPClassifier(random_state=1),

    ]
clf_columns = []
clf_compare = pd.DataFrame(columns = clf_columns)

row_index = 0
for alg in clf:
        
    predicted = alg.fit(X_train, y_train).predict(X_test)
    kf = KFold(n_splits=10)
    y_train_pred = cross_val_predict(mlp_clf, X_train, y_train, cv=kf)
    clf_name = alg.__class__.__name__
    clf_compare.loc[row_index, 'Accuracy'] = accuracy_score(y_test, predicted )
    tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
    clf_compare.loc[row_index, 'TN'] = tn
    clf_compare.loc[row_index, 'FP'] = fp
    clf_compare.loc[row_index, 'FN'] = fn
    clf_compare.loc[row_index, 'TP'] = tp
    clf_compare.loc[row_index, 'Sensitivity'] = tp / (tp + fn)
    clf_compare.loc[row_index, 'Specificity'] = tn / (tn + fp)
    clf_compare.loc[row_index, 'Precission'] = precision_score(y_train, y_train_pred, zero_division=0)
    clf_compare.loc[row_index, 'Recall'] = recall_score(y_train, y_train_pred, zero_division=0)
    fpr, tpr, thresholds = roc_curve(y_train, y_train_pred)
    clf_compare.loc[row_index, 'AURC'] = roc_auc_score(y_train, y_train_pred)
    row_index+=1
    
clf_compare.sort_values(by = ['Accuracy'], ascending = False, inplace = True)    
clf_compare

In [None]:
# batch size

In [None]:
clf = [
    MLPClassifier(random_state=1),

    ]
clf_columns = []
clf_compare = pd.DataFrame(columns = clf_columns)

row_index = 0
for alg in clf:
        
    predicted = alg.fit(X_train, y_train).predict(X_test)
    kf = KFold(n_splits=10)
    y_train_pred = cross_val_predict(mlp_clf, X_train, y_train, cv=kf)
    clf_name = alg.__class__.__name__
    clf_compare.loc[row_index, 'Accuracy'] = accuracy_score(y_test, predicted )
    tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
    clf_compare.loc[row_index, 'TN'] = tn
    clf_compare.loc[row_index, 'FP'] = fp
    clf_compare.loc[row_index, 'FN'] = fn
    clf_compare.loc[row_index, 'TP'] = tp
    clf_compare.loc[row_index, 'Sensitivity'] = tp / (tp + fn)
    clf_compare.loc[row_index, 'Specificity'] = tn / (tn + fp)
    clf_compare.loc[row_index, 'Precission'] = precision_score(y_train, y_train_pred, zero_division=0)
    clf_compare.loc[row_index, 'Recall'] = recall_score(y_train, y_train_pred, zero_division=0)
    fpr, tpr, thresholds = roc_curve(y_train, y_train_pred)
    clf_compare.loc[row_index, 'AURC'] = roc_auc_score(y_train, y_train_pred)
    row_index+=1
    
clf_compare.sort_values(by = ['Accuracy'], ascending = False, inplace = True)    
clf_compare

In [None]:
# epochs

In [None]:
clf = [
    MLPClassifier(random_state=1),

    ]
clf_columns = []
clf_compare = pd.DataFrame(columns = clf_columns)

row_index = 0
for alg in clf:
        
    predicted = alg.fit(X_train, y_train).predict(X_test)
    kf = KFold(n_splits=10)
    y_train_pred = cross_val_predict(mlp_clf, X_train, y_train, cv=kf)
    clf_name = alg.__class__.__name__
    clf_compare.loc[row_index, 'Accuracy'] = accuracy_score(y_test, predicted )
    tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
    clf_compare.loc[row_index, 'TN'] = tn
    clf_compare.loc[row_index, 'FP'] = fp
    clf_compare.loc[row_index, 'FN'] = fn
    clf_compare.loc[row_index, 'TP'] = tp
    clf_compare.loc[row_index, 'Sensitivity'] = tp / (tp + fn)
    clf_compare.loc[row_index, 'Specificity'] = tn / (tn + fp)
    clf_compare.loc[row_index, 'Precission'] = precision_score(y_train, y_train_pred, zero_division=0)
    clf_compare.loc[row_index, 'Recall'] = recall_score(y_train, y_train_pred, zero_division=0)
    fpr, tpr, thresholds = roc_curve(y_train, y_train_pred)
    clf_compare.loc[row_index, 'AURC'] = roc_auc_score(y_train, y_train_pred)
    row_index+=1
    
clf_compare.sort_values(by = ['Accuracy'], ascending = False, inplace = True)    
clf_compare

In [None]:
# momentum

In [None]:
clf = [
    MLPClassifier(random_state=1),

    ]
clf_columns = []
clf_compare = pd.DataFrame(columns = clf_columns)

row_index = 0
for alg in clf:
        
    predicted = alg.fit(X_train, y_train).predict(X_test)
    kf = KFold(n_splits=10)
    y_train_pred = cross_val_predict(mlp_clf, X_train, y_train, cv=kf)
    clf_name = alg.__class__.__name__
    clf_compare.loc[row_index, 'Accuracy'] = accuracy_score(y_test, predicted )
    tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
    clf_compare.loc[row_index, 'TN'] = tn
    clf_compare.loc[row_index, 'FP'] = fp
    clf_compare.loc[row_index, 'FN'] = fn
    clf_compare.loc[row_index, 'TP'] = tp
    clf_compare.loc[row_index, 'Sensitivity'] = tp / (tp + fn)
    clf_compare.loc[row_index, 'Specificity'] = tn / (tn + fp)
    clf_compare.loc[row_index, 'Precission'] = precision_score(y_train, y_train_pred, zero_division=0)
    clf_compare.loc[row_index, 'Recall'] = recall_score(y_train, y_train_pred, zero_division=0)
    fpr, tpr, thresholds = roc_curve(y_train, y_train_pred)
    clf_compare.loc[row_index, 'AURC'] = roc_auc_score(y_train, y_train_pred)
    row_index+=1
    
clf_compare.sort_values(by = ['Accuracy'], ascending = False, inplace = True)    
clf_compare

In [None]:
# validation treshold

In [None]:
clf = [
    MLPClassifier(random_state=1),

    ]
clf_columns = []
clf_compare = pd.DataFrame(columns = clf_columns)

row_index = 0
for alg in clf:
        
    predicted = alg.fit(X_train, y_train).predict(X_test)
    kf = KFold(n_splits=10)
    y_train_pred = cross_val_predict(mlp_clf, X_train, y_train, cv=kf)
    clf_name = alg.__class__.__name__
    clf_compare.loc[row_index, 'Accuracy'] = accuracy_score(y_test, predicted )
    tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
    clf_compare.loc[row_index, 'TN'] = tn
    clf_compare.loc[row_index, 'FP'] = fp
    clf_compare.loc[row_index, 'FN'] = fn
    clf_compare.loc[row_index, 'TP'] = tp
    clf_compare.loc[row_index, 'Sensitivity'] = tp / (tp + fn)
    clf_compare.loc[row_index, 'Specificity'] = tn / (tn + fp)
    clf_compare.loc[row_index, 'Precission'] = precision_score(y_train, y_train_pred, zero_division=0)
    clf_compare.loc[row_index, 'Recall'] = recall_score(y_train, y_train_pred, zero_division=0)
    fpr, tpr, thresholds = roc_curve(y_train, y_train_pred)
    clf_compare.loc[row_index, 'AURC'] = roc_auc_score(y_train, y_train_pred)
    row_index+=1
    
clf_compare.sort_values(by = ['Accuracy'], ascending = False, inplace = True)    
clf_compare