In [None]:
### Load Packages

In [None]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve, precision_recall_curve
from sklearn.ensemble import RandomForestClassifier

In [None]:
from keras.models import Sequential
from keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization
from keras.optimizers import Adam, SGD, RMSprop

In [None]:
## Load in the data set 
names = ["times_pregnant", "glucose_tolerance_test", "blood_pressure", "skin_thickness", "insulin", 
         "bmi", "pedigree_function", "age", "has_diabetes"]
diabetes_df = pd.read_csv(r'.\diabetes.csv', names=names, header=0)

In [None]:
diabetes_df.head()

In [None]:
diabetes_df.shape

In [None]:
X = diabetes_df.iloc[:,:-1].values
y = diabetes_df.iloc[:, -1].values

In [None]:
# Split the data to Train, and Test (75%, 25%)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=11111)

#### Preparing a baseline model to compare NN with:

In [None]:
rf_model = RandomForestClassifier(n_estimators=200)

In [None]:
rf_model.fit(X_train, y_train)

In [None]:
y_pred_class_rf = rf_model.predict(X_test)
y_pred_prob_rf = rf_model.predict_proba(X_test)


print('accuracy is {:.3f}'.format(accuracy_score(y_test,y_pred_class_rf)))
print('roc-auc is {:.3f}'.format(roc_auc_score(y_test,y_pred_prob_rf[:,1])))

In [None]:
def plot_roc(y_test, y_pred, model_name):
    fpr, tpr, thr = roc_curve(y_test, y_pred)
    fig, ax = plt.subplots(figsize=(8, 8))
    ax.plot(fpr, tpr, 'k-')
    ax.plot([0, 1], [0, 1], 'k--', linewidth=.5)  # roc curve for random model
    ax.grid(True)
    ax.set(title='ROC Curve for {} on PIMA diabetes problem'.format(model_name),
           xlim=[-0.01, 1.01], ylim=[-0.01, 1.01])


In [None]:
plot_roc(y_test, y_pred_prob_rf[:, 1], 'RF')

### Building a Neural network:

In [None]:
normalizer = StandardScaler()
X_train_norm = normalizer.fit_transform(X_train)
X_test_norm = normalizer.fit_transform(X_test)

In [None]:
X_train_norm.shape

In [None]:
# data has 8 inputs. Hence, there will be 8 input nodes

##### Case 1 : 
Hidden Layers : 1 <br>
Hidden Nodes in Layer : 12 <br>
Activation Function : Sigmoid <br>
Loss function : Binary-crossentropy <br>
Optimizer : SGD <br>
Regularization : - <br>

In [None]:
model_1 = Sequential()

In [None]:
model_1.add(Dense(12,input_shape=(8,),activation='sigmoid'))

In [None]:
model_1.add(Dense(1,activation='sigmoid'))

In [None]:
model_1.summary()

In [None]:
model_1.compile(SGD(lr = 0.01), "binary_crossentropy", metrics=["accuracy"])

In [None]:
run_hist_1 = model_1.fit(X_train_norm, y_train, validation_data=(X_test_norm, y_test), epochs=200)

In [None]:
#y_pred_class_nn_1 = model_1.predict(X_test_norm)
y_pred_prob_nn_1 = model_1.predict(X_test_norm)

In [None]:
print('roc-auc is {:.3f}'.format(roc_auc_score(y_test,y_pred_prob_nn_1)))

In [None]:
#plot_roc(y_test, y_pred_prob_nn_1, 'NN')

In [None]:
run_hist_1.history.keys()

In [None]:
fig, ax = plt.subplots()
ax.plot(run_hist_1.history["loss"],'r', marker='.', label="Train Loss")
ax.plot(run_hist_1.history["val_loss"],'b', marker='.', label="Validation Loss")
ax.legend()

In [None]:
# Hidden Layers : 2
# Hidden Nodes in Layer : 6 each
# Activation Function : Relu x 2 , Sigmoid(final layer)
# Loss function : Binary-crossentropy
# Optimizer : SGD
# Regularization : -

In [None]:
model_2 = Sequential()

In [None]:
model_2.add(
    Dense(
        units=6,
        input_shape=(8,),
        activation='ReLU'
    )
)

In [None]:
model_2.add(
    Dense(
        units=6,
        activation='ReLU'
    )
)

In [None]:
model_2.add(Dense(1, activation="sigmoid"))

In [None]:
model_2.summary()

In [None]:
model_2.compile(
    optimizer=SGD(lr=0.003),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
run_hist_2 = model_2.fit(X_train_norm,y_train, validation_data=(X_test_norm, y_test), epochs=1500)

In [None]:
#y_pred_class_nn_2 = model_2.predict(X_test_norm)
y_pred_prob_nn_2 = model_2.predict(X_test_norm)

In [None]:

print('roc-auc is {:.3f}'.format(roc_auc_score(y_test,y_pred_prob_nn_2)))


In [None]:
plot_roc(y_test, y_pred_prob_nn_2, 'NN')

run_hist_2.history.keys()



In [None]:
fig, ax = plt.subplots()
ax.plot(run_hist_2.history["loss"],'r', marker='.', label="Train Loss")
ax.plot(run_hist_2.history["val_loss"],'b', marker='.', label="Validation Loss")
ax.legend()