In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

In [None]:
diabetes = pd.read_csv("/Users/kjx/Downloads/diabetes_data_upload.csv")

In [None]:
print(diabetes.shape)
print(diabetes.value_counts())
print(diabetes.describe())


In [None]:
diabetes.nunique()

In [None]:
data = diabetes.replace(to_replace=['Yes', 'No', 'Male', 'Female', 'Positive', 'Negative'], value = [1,0, 1, 0, 1, 0])
data.shape
data.iloc[:, 0:17]

In [None]:

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler((0,1))
data_scaled2 = sc.fit_transform(data.iloc[:,0:16])
print(data_scaled2)
print(data["class"])



In [None]:

X = data.iloc[:, 0:16].values

Y = data.iloc[:, 16].values


X2 = data_scaled2
print(X2)
Y2 = data["class"].values
print(Y2)

X2.shape


In [None]:
#splitting training data and test data
from sklearn.model_selection import train_test_split
X2_train, X2_test, Y2_train, Y2_test = train_test_split(X2, Y2, test_size = 0.3, random_state = 0, shuffle = True)
X2_train[0].shape



In [None]:
#Training model using function

def get_model(hiddenLayerOne = 6, dropout = 0.5, learnRate = 0.01):
    # initialize a sequential model and add layer to flatten the
	# input data
    model = tf.keras.models.Sequential()
    #input tensor without affecting the batch size, flatten each batch in the input to 1 dimension
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(hiddenLayerOne, activation = 'relu', input_shape=X2_train[0].shape))
    #Dropping out nodes to prevent overfitting
    model.add(tf.keras.layers.Dropout(dropout))
    #output
    model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learnRate), loss = 'binary_crossentropy', metrics = ['accuracy'])

    return model


#set seed
tf.random.set_seed(1)

model = get_model()

#no hyperparameter tuning
fitting = model.fit(X2_train, Y2_train, validation_data=(X2_test, Y2_test), batch_size = 100, epochs = 300)



#result
print("Evaluating model ...")
accuracy = model.evaluate(X2_test, Y2_test)[1]
print("accuracy: {: .2f}%".format(accuracy * 100))

    

In [None]:


plt.plot(fitting.history['loss'], label = 'train')
plt.plot(fitting.history['val_loss'], label = 'test')
plt.ylabel('Model Loss')
plt.xlabel('Epoch')
plt.legend()
#plt.show


Y_pred = model.predict(X2_test)
y_pred = (Y_pred > 0.5).astype(int)



from sklearn.metrics import confusion_matrix
print(confusion_matrix(Y2_test, y_pred))



In [None]:
plt.plot(fitting.history['accuracy'], label = 'train')
plt.plot(fitting.history['val_accuracy'], label = 'test')
plt.ylabel('Model Accuracy')
plt.xlabel('Epoch')
plt.legend()
#plt.show


In [None]:
##age: 68
#gender: male
#polyuria: no
#polydipsia: no
#sudden weight loss: no
#weakness: yes
#polyphagia: yes
#genital thrush: no
#visual blurring: yes
#itching: no
#irritability: no
#delayed healing: no
#partial paresis: no
#muscle stiffness: no
#alopecia: no
#obesity: no

new = np.array([48, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1])
new2 = new.reshape(1, -1)
new3 = sc.transform(new2)



print(new3)

model.predict(new3)


In [None]:
from keras.utils import plot_model
plot_model(model, show_shapes=True, show_layer_names= True)

In [None]:
get_model(hiddenLayerOne = 16, dropout = 0.3, learnRate = 0.01)

#no hyperparameter tuning
fitting = model.fit(X2_train, Y2_train, validation_data=(X2_test, Y2_test), batch_size = 90, epochs = 250)

plt.plot(fitting.history['loss'], label = 'train')
plt.plot(fitting.history['val_loss'], label = 'test')
plt.ylabel('Model Loss')
plt.xlabel('Epoch')
plt.legend()
#plt.show

In [None]:
plt.plot(fitting.history['accuracy'], label = 'train')
plt.plot(fitting.history['val_accuracy'], label = 'test')
plt.ylabel('Model Accuracy')
plt.xlabel('Epoch')
plt.legend()
#plt.show

In [None]:
#tensorflow hypermeter tuning
#KerasClassifier make it compatible with scikit - learn function (for hyperparameter turning)
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

#scaled data
print(X2_test, X2_train, Y2_test, Y2_train)

In [None]:
tf.random.set_seed(1)

#wrap model first
model = KerasClassifier(build_fn=get_model, verbose = 0)

#define grid search space
hiddenLayerOne = [6,8,10,12]
learnRate = [1e-2, 1e-3, 1e-4]
dropout = [0.3, 0.4, 0.5]
batchSize = [30, 40, 50, 60, 70, 80, 90, 100]
epochs = [50, 100, 150, 200, 250, 300, 350, 400]

#create dictionary for the grid
grid = dict(
    hiddenLayerOne = hiddenLayerOne,
    learnRate = learnRate,
    dropout = dropout,
    batch_size = batchSize,
    epochs = epochs
)

# initialize a random search with a 3-fold cross-validation and then
# start the hyperparameter search process

print("Random Search ...")
searcher = RandomizedSearchCV(estimator= model, n_jobs= 1, cv = 3, param_distributions= grid, scoring = "accuracy")
searchResults = searcher.fit(X2_train, Y2_train)

#summarise grid search info
bestScore = searchResults.best_score_
bestPara = searchResults.best_params_
print("[INFO] best score is {:.2f} using {}".format(bestScore,
	bestPara))


#plotting loss 





In [None]:
import sklearn as sklearn
sklearn.metrics.get_scorer_names()

In [None]:
# extract the best model, make predictions on our data, and show a
# classification report
print("[INFO] evaluating the best model...")
bestModel = searchResults.best_estimator_

accuracy = bestModel.score(X2_test, Y2_test)
print("accuracy:", accuracy)

In [None]:
get_model(hiddenLayerOne = 16, dropout = 0.4, learnRate = 0.01)

#set seed
tf.random.set_seed(1)

model = get_model()

#after hyperparameter tuning
fitting = model.fit(X2_train, Y2_train, validation_data=(X2_test, Y2_test), batch_size = 70, epochs = 350)

from keras.utils import plot_model
plot_model(model, show_shapes=True, show_layer_names= True)

plt.plot(fitting.history['loss'], label = 'train')
plt.plot(fitting.history['val_loss'], label = 'test')
plt.ylabel('Model Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show


In [None]:
plt.plot(fitting.history['accuracy'], label = 'train')
plt.plot(fitting.history['val_accuracy'], label = 'test')
plt.ylabel('Model Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show

In [None]:
#initializing the ANN
ann = tf.keras.models.Sequential()

#add the input layer and first hidden layer
ann.add(tf.keras.layers.Dense(units = 16, activation = 'relu', input_shape=X_train[0].shape))

#add output layer
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

#First attempt, 16, 16, 1

In [None]:
#training ann

ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

fitting = ann.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size = 40, epochs = 100)

In [None]:
#plotting loss 

plt.plot(fitting.history['loss'], label = 'train')
plt.plot(fitting.history['val_loss'], label = 'test')
plt.ylabel('Model Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show



In [None]:
ann.summary()

In [None]:
from keras.utils import plot_model
plot_model(ann, show_shapes=True, show_layer_names= True)

In [None]:
#initializing the ANN
ann_2 = tf.keras.models.Sequential()

#add the input layer and first hidden layer
ann_2.add(tf.keras.layers.Dense(units = 14, activation = 'relu', input_shape=X_train[0].shape))

#add output layer
ann_2.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

#Second Attempt attempt, 16, 14, 1

#training ann

ann_2.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

fitting_2 = ann_2.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size = 40, epochs = 100)

#plotting loss 

plt.plot(fitting_2.history['loss'], label = 'train')
plt.plot(fitting_2.history['val_loss'], label = 'test')
plt.ylabel('Model Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show




In [None]:
test_loss, test_acc = ann_2.evaluate(X_test, Y_test)
print('Test loss:', test_loss)
print('Test accuracy:', test_acc)

plot_model(ann_2, show_shapes=True, show_layer_names= True)


In [None]:

Y_pred = ann_2.predict(X_test)
y_pred = (Y_pred > 0.5).astype(int)

print(y_pred)

from sklearn.metrics import confusion_matrix
print(confusion_matrix(Y_test, y_pred))

In [None]:
#initializing the ANN
ann_3 = tf.keras.models.Sequential()

#add the input layer and first hidden layer
ann_3.add(tf.keras.layers.Dense(units = 8, activation = 'relu', input_shape=X_train[0].shape))

ann_3.add(tf.keras.layers.Dense(units=8, activation='relu'))

#add output layer
ann_3.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

#Second Attempt attempt, 16, 16, 16, 1

#training ann

ann_3.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

fitting_3 = ann_3.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size = 40, epochs = 100)

#plotting loss 

plt.plot(fitting_3.history['loss'], label = 'train')
plt.plot(fitting_3.history['val_loss'], label = 'test')
plt.ylabel('Model Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show






In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

def ann_two():

    #initializing the ANN
    ann_2 = tf.keras.models.Sequential()

    #add the input layer and first hidden layer
    ann_2.add(tf.keras.layers.Dense(units = 16, activation = 'relu', input_shape=X_train[0].shape))

    ann_2.add(tf.keras.layers.Dense(units=16, activation='relu'))

    #add output layer
    ann_2.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    #Second Attempt attempt, 16, 16, 16, 1

    #training ann

    ann_2.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

    return ann_2

keras_2 = KerasClassifier(build_fn = ann_two)

scores = cross_val_score(keras_2, X, Y, cv=10, scoring='accuracy')
print(scores)

print(scores.mean())

In [None]:
def ann_one():
    #initializing the ANN
    ann = tf.keras.models.Sequential()

    #add the input layer and first hidden layer
    ann.add(tf.keras.layers.Dense(units = i, activation = 'relu', input_shape=X_train[0].shape))

    #add output layer
    ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

    return ann

keras_1 = KerasClassifier(build_fn = ann_one)

scores_1 = cross_val_score(keras_1, X, Y, cv=10, scoring='accuracy')
print(scores_1)

print(scores_1.mean())


In [None]:


tf.random.set_seed(1)

from sklearn.model_selection import KFold

kf = KFold(n_splits = 3)

for train_index, test_index in kf.split(X2):
    X2_train, X2_test = X2[train_index], X2[test_index]
    Y2_train, Y2_test = Y2[train_index], Y2[test_index]

    fitting =  model.fit(X2_train, Y2_train, validation_data=(X_test, Y_test), batch_size = 30, epochs = 400)

    plt.plot(fitting.history['accuracy'], label = 'train')
    plt.ylabel('Model Accuracy')
    plt.xlabel('Epoch')
    plt.legend()
    plt.show

    plt.plot(fitting.history['val_accuracy'], label = 'test')
    plt.ylabel('Model Accuracy')
    plt.xlabel('Epoch')
    plt.legend()
    plt.show

    print("train_acc: ", np.mean(fitting.history['accuracy']))
    print("test_acc: ", np.mean(fitting.history['val_accuracy']))

In [None]:
from sklearn.model_selection import GridSearchCV


def ann_one(activation='relu', optimizer='adam'):
    #initializing the ANN
    ann = tf.keras.models.Sequential()

    #add the input layer and first hidden layer
    ann.add(tf.keras.layers.Dense(units = 14, activation = activation, input_shape=X_train[0].shape))

    #add output layer
    ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    ann.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])

    return ann

keras_1 = KerasClassifier(build_fn = ann_one)



accuracies = cross_val_score(estimator=keras_1, scoring="accuracy", 
    X=X_train, y=Y_train, cv=10)

print(accuracies.mean())

In [None]:
#initializing the ANN
ann_2 = tf.keras.models.Sequential()

#add the input layer and first hidden layer
ann_2.add(tf.keras.layers.Dense(units = 14, activation = 'relu', input_shape=X2_train[0].shape))

#add output layer
ann_2.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

#Second Attempt attempt, 16, 14, 1

#training ann

ann_2.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

fitting_2 = ann_2.fit(X2_train, Y2_train, validation_data=(X2_test, Y2_test), batch_size = 40, epochs = 100)

#plotting loss 

plt.plot(fitting_2.history['loss'], label = 'train')
plt.plot(fitting_2.history['val_loss'], label = 'test')
plt.ylabel('Model Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show