# ANN Implementation

## Step 1 - Load the required libraries and modules

In [20]:
import pandas as pd
from sklearn.metrics import r2_score, accuracy_score
from sklearn.model_selection import train_test_split

## Step 2 - Reading the Data and Performing Basic Data Checks

In [2]:
df = pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df.shape

(768, 9)

## Step 3- Creating Arrays for the Independent and Dependent variables

In [28]:
target_column = df['Outcome']
predictors = df.iloc[:,0:8]

## Step 4 - Feature Scaling

In [30]:
X = df[predictors.columns]
y = target_column

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=42)

## Step 5 - Run the model

In [31]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(8, 8, 8), activation='relu', solver='adam', max_iter=500)
mlp.fit(X_train, y_train)
predict_train = mlp.predict(X_train)
predict_test = mlp.predict(X_test)

In [32]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_train, predict_train))
print(classification_report(y_train, predict_train))

[[378  23]
 [176  37]]
              precision    recall  f1-score   support

           0       0.68      0.94      0.79       401
           1       0.62      0.17      0.27       213

    accuracy                           0.68       614
   macro avg       0.65      0.56      0.53       614
weighted avg       0.66      0.68      0.61       614



### For hidden layer of size = 5, 5, 5

In [33]:
from sklearn.neural_network import MLPClassifier

mlp1 = MLPClassifier(hidden_layer_sizes=(5, 5, 5), activation='relu', solver='adam', max_iter=500)
mlp1.fit(X_train, y_train)
predict_train1 = mlp1.predict(X_train)
predict_test1 = mlp1.predict(X_test)



In [34]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_train, predict_train1))
print(classification_report(y_train, predict_train1))

[[388  13]
 [165  48]]
              precision    recall  f1-score   support

           0       0.70      0.97      0.81       401
           1       0.79      0.23      0.35       213

    accuracy                           0.71       614
   macro avg       0.74      0.60      0.58       614
weighted avg       0.73      0.71      0.65       614



### For hidden layer of size = 3, 3, 3

In [35]:
from sklearn.neural_network import MLPClassifier

mlp2 = MLPClassifier(hidden_layer_sizes=(3, 3, 3), activation='relu', solver='adam', max_iter=500)
mlp2.fit(X_train, y_train)
predict_train2 = mlp2.predict(X_train)
predict_test2 = mlp2.predict(X_test)

In [36]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_train, predict_train2))
print(classification_report(y_train, predict_train2))

[[375  26]
 [172  41]]
              precision    recall  f1-score   support

           0       0.69      0.94      0.79       401
           1       0.61      0.19      0.29       213

    accuracy                           0.68       614
   macro avg       0.65      0.56      0.54       614
weighted avg       0.66      0.68      0.62       614



## Accuracy Score

In [37]:
print("For size (8, 8, 8): ", accuracy_score(y_train, predict_train))
print("For size (5, 5, 5): ", accuracy_score(y_train, predict_train1))
print("For size (3, 3, 3): ", accuracy_score(y_train, predict_train2))

For size (8, 8, 8):  0.6758957654723127
For size (5, 5, 5):  0.7100977198697068
For size (3, 3, 3):  0.6775244299674267


In [40]:
df1 = pd.read_csv('diabetes.csv')
df1.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [41]:
X = df.drop('Outcome', axis=1)
y = df['Outcome']

In [42]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [43]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

knn1 = KNeighborsClassifier(n_neighbors=7)
knn1.fit(X_train, y_train)

y_pred1 = knn1.predict(X_test)

In [45]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(max_depth=5)
model.fit(X_train, y_train)

y_pred2 = model.predict(X_test)

In [49]:
models = []
train_accu = []
test_accu = []

def store_results(model, train_acc, test_acc):
    models.append(model)
    train_accu.append(train_acc)
    test_accu.append(test_acc)

In [50]:
store_results('MLPClassifier', accuracy_score(y_train, predict_train), accuracy_score(y_test, predict_test))
store_results('MLPClassifier', accuracy_score(y_train, predict_train1), accuracy_score(y_test, predict_test1))
store_results('KNeighborsClassifier', accuracy_score(y_train, knn.predict(X_train)), accuracy_score(y_test, y_pred))
store_results('KNeighborsClassifier', accuracy_score(y_train, knn1.predict(X_train)), accuracy_score(y_test, y_pred1))
store_results('DecisionTreeClassifier', accuracy_score(y_train, model.predict(X_train)), accuracy_score(y_test, y_pred2))

In [51]:
result = pd.DataFrame({
    'ML Model': models,
    'Train Accuracy': train_accu,
    'Test Accuracy': test_accu,
})

In [52]:
result

Unnamed: 0,ML Model,Train Accuracy,Test Accuracy
0,MLPClassifier,0.628664,0.642857
1,MLPClassifier,0.610749,0.597403
2,KNeighborsClassifier,0.798046,0.714286
3,KNeighborsClassifier,0.776873,0.753247
4,DecisionTreeClassifier,0.81759,0.766234
