In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
df = pd.read_csv("../datasets/diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
X = df.iloc[:, :8]
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [4]:
y = df.Outcome
y.head()
print(f'X Shape: {X.shape} \nY Shape: {y.shape}')

X Shape: (768, 8) 
Y Shape: (768,)


In [5]:
scaler = StandardScaler()
X_std = scaler.fit_transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.3, random_state=0)

In [7]:
print(f'X_train Shape: {X_train.shape} \ny_train Shape: {y_train.shape}')
print(f'X_test Shape: {X_test.shape} \ny_test Shape: {y_test.shape}')

X_train Shape: (537, 8) 
y_train Shape: (537,)
X_test Shape: (231, 8) 
y_test Shape: (231,)


In [8]:
mlp = MLPClassifier(hidden_layer_sizes=(55), activation='relu', solver='sgd', max_iter=200, verbose=True)
mlp

MLPClassifier(hidden_layer_sizes=55, solver='sgd', verbose=True)

In [9]:
mlp.fit(X_train, y_train)

Iteration 1, loss = 0.78723177
Iteration 2, loss = 0.78476270
Iteration 3, loss = 0.78096166
Iteration 4, loss = 0.77653339
Iteration 5, loss = 0.77128427
Iteration 6, loss = 0.76572132
Iteration 7, loss = 0.75993280
Iteration 8, loss = 0.75407951
Iteration 9, loss = 0.74852353
Iteration 10, loss = 0.74254054
Iteration 11, loss = 0.73691780
Iteration 12, loss = 0.73147906
Iteration 13, loss = 0.72609850
Iteration 14, loss = 0.72076817
Iteration 15, loss = 0.71560539
Iteration 16, loss = 0.71076776
Iteration 17, loss = 0.70583140
Iteration 18, loss = 0.70123901
Iteration 19, loss = 0.69674073
Iteration 20, loss = 0.69228461
Iteration 21, loss = 0.68808209
Iteration 22, loss = 0.68397665
Iteration 23, loss = 0.67999384
Iteration 24, loss = 0.67607168
Iteration 25, loss = 0.67222624
Iteration 26, loss = 0.66863754
Iteration 27, loss = 0.66515794
Iteration 28, loss = 0.66163474
Iteration 29, loss = 0.65828358
Iteration 30, loss = 0.65507519
Iteration 31, loss = 0.65191146
Iteration 32, los



MLPClassifier(hidden_layer_sizes=55, solver='sgd', verbose=True)

In [10]:
print(mlp.score(X_train,y_train)*100)

77.46741154562383


In [11]:
print(mlp.score(X_test, y_test)*100) # Overfitting

80.08658008658008


In [12]:
y_pred = mlp.predict(X_test)
y_pred

array([1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0], dtype=int64)

In [13]:
y_test.ravel()

array([1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0], dtype=int64)

In [14]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[145  12]
 [ 34  40]]


### Confusion Matrix
| Not Diabetic  -  140 | Not Diabetic but Shows you have Diabetes  -  17
| ----------- | ----------- |
| Diabetic but shows not Diabetic  -  36 | Diabetic  -  38 |

In [15]:
cr = classification_report(y_test, y_pred)
print(cr)

              precision    recall  f1-score   support

           0       0.81      0.92      0.86       157
           1       0.77      0.54      0.63        74

    accuracy                           0.80       231
   macro avg       0.79      0.73      0.75       231
weighted avg       0.80      0.80      0.79       231



In [16]:
X_test

array([[-0.84488505,  2.44447821,  0.35643175, ...,  1.38436175,
         2.784923  , -0.95646168],
       [-0.54791859, -0.43485916,  0.25303625, ...,  0.20401277,
        -0.20499449, -0.87137393],
       [ 0.04601433, -1.40507067, -0.36733675, ...,  0.25478047,
        -0.24425603, -0.70119842],
       ...,
       [-0.84488505, -0.81042491, -0.47073225, ..., -1.02710391,
        -0.63989158, -0.95646168],
       [-0.84488505, -0.46615631,  0.35643175, ...,  0.69899783,
        -0.83015905, -0.61611067],
       [ 1.82781311,  0.03459802,  0.45982725, ..., -0.5575027 ,
         0.12117833,  1.00055664]])

In [17]:
df.tail()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.34,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1
767,1,93,70,31,0,30.4,0.315,23,0


In [18]:
sample = np.array([5,121,72,23,	112,26.2,0.245,30])
sample.shape

(8,)

In [19]:
print(f'X Test Shape: {X_test.shape}')

X Test Shape: (231, 8)


In [20]:
sample = np.expand_dims(sample, 0)
sample.shape

(1, 8)

In [21]:
print(mlp.predict(sample)) # wrong prediction

[1]


In [22]:
sample2 = np.array([5,126,60,0,0,30.1,0.349,47])
sample2 = np.expand_dims(sample2, 0)
print(mlp.predict(sample2))

[1]
