In [1]:
# Import modules
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(
    Path("alzheimer_clean.csv")   
)

# Review the DataFrame
df.head()

Unnamed: 0,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,SleepQuality,...,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Diagnosis
0,73,0,0,2,22.927749,0,13.297218,6.327112,1.347214,9.025679,...,6.518877,0,0,1.725883,0,0,0,1,0,0
1,89,0,0,0,26.827681,0,4.542524,7.619885,0.518767,7.151293,...,7.118696,0,0,2.592424,0,0,0,0,1,0
2,73,0,3,1,17.795882,0,19.555085,7.844988,1.826335,9.673574,...,5.895077,0,0,7.119548,0,1,0,1,0,0
3,74,1,0,1,33.800817,1,12.209266,8.428001,7.435604,8.392554,...,8.965106,0,1,6.481226,0,0,0,0,0,0
4,89,0,0,0,20.716974,0,18.454356,6.310461,0.795498,5.597238,...,6.045039,0,0,0.014691,0,0,1,1,0,0


In [3]:
o5_df = df[["Diagnosis", "FunctionalAssessment", "ADL", "MemoryComplaints", "MMSE", "BehavioralProblems"]]

In [4]:
y = o5_df["Diagnosis"]
x = o5_df.drop(columns="Diagnosis")

In [5]:
# Encode the categorical variables using get_dummies
X = pd.get_dummies(x)

In [6]:
# split original dataset X and y into a training set + temp set (test_size=0.2 for 20% of data going towards the temp set and 80% for training set)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=1)

# split temp set into validation set + test set (50/50) 
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1)

# goal = train using 80% of original data, tune hyperparameters using the validation set + evaluate model performance on unseen data using test data

In [7]:
print(X_train.shape, X_val.shape, X_test.shape)

(1719, 5) (215, 5) (215, 5)


In [8]:
# Instantiate a StandardScaler instance
scaler = StandardScaler()

# Fit the training data to the standard scaler
X_scaler = scaler.fit(X_train)

# Transform the training data using the scaler
X_train_scaled = X_scaler.transform(X_train)

# Transform the testing data using the scaler
X_test_scaled = X_scaler.transform(X_test)

# Transform the validation data using the scaler
X_val_scaled = X_scaler.transform(X_val)

Doing a Grid Search for n_neighours optimal value using cross-validation in Python with SciKit-Learn

In [9]:
# Define the parameter grid with the correct parameter name 'n_neighbors'
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}

# Create a KNN classifier
knn = KNeighborsClassifier()

# Perform grid search with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Access the best parameters and print the best n_neighbors value
best_n_neighbors = grid_search.best_params_['n_neighbors']
print("Best n_neighbors value:", best_n_neighbors)

Best n_neighbors value: 3


Trying the "rule of thumb for n_neighbours" = square root of total number of samples in the dataset (1719 for training set = 41)

In [10]:
# Define the parameter grid with the correct parameter name 'n_neighbors'
param_grid = {'n_neighbors': [35, 37, 39, 41, 43]}

# Create a KNN classifier
knn = KNeighborsClassifier()

# Perform grid search with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Access the best parameters and print the best n_neighbors value
best_n_neighbors = grid_search.best_params_['n_neighbors']
print("Best n_neighbors value:", best_n_neighbors)

Best n_neighbors value: 35


try n_neighbors = 3

In [11]:
# Import the KNeighborsClassifier module from sklearn
from sklearn.neighbors import KNeighborsClassifier

# Instantiate the KNeighborsClassifier model with n_neighbors = 19 
knn = KNeighborsClassifier(n_neighbors=3)

In [12]:
# Train the model using the training data
knn.fit(X_train_scaled, y_train)


KNeighborsClassifier(n_neighbors=3)

In [13]:
validation_accuracy = knn.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.6651162790697674


In [14]:
# Create predictions using the testing data
y_pred = knn.predict(X_test_scaled)

In [15]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.89      0.90       139
           1       0.81      0.82      0.81        76

    accuracy                           0.87       215
   macro avg       0.85      0.85      0.85       215
weighted avg       0.87      0.87      0.87       215



Try n_neighbors = 35

---> lowered validation and accuracy so keep n_neighbours = 3

In [16]:
# Import the KNeighborsClassifier module from sklearn
from sklearn.neighbors import KNeighborsClassifier

# Instantiate the KNeighborsClassifier model with n_neighbors = 19 
knn = KNeighborsClassifier(n_neighbors=35)

In [17]:
# Train the model using the training data
knn.fit(X_train_scaled, y_train)


KNeighborsClassifier(n_neighbors=35)

In [18]:
validation_accuracy = knn.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.641860465116279


In [19]:
# Create predictions using the testing data
y_pred = knn.predict(X_test_scaled)

In [20]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.89      0.89      0.89       139
           1       0.80      0.80      0.80        76

    accuracy                           0.86       215
   macro avg       0.85      0.85      0.85       215
weighted avg       0.86      0.86      0.86       215



Normalization to scale features to a range between 0 and 1 (want to bring data to common scale)

---> decreased accuracy, recall, precision. Keep standardscaler 

In [21]:
from sklearn.preprocessing import MinMaxScaler

In [22]:
# Create a MinMaxScaler instance
scaler = MinMaxScaler()

# Fit the training data to the standard scaler
X_scaler = scaler.fit(X_train)

# Transform the training data using the scaler
X_train_scaled_norm = X_scaler.transform(X_train)

# Transform the testing data using the scaler
X_test_scaled_norm = X_scaler.transform(X_test)

# Transform the validation data using the scaler
X_val_scaled_norm = X_scaler.transform(X_val)

In [23]:
from sklearn.pipeline import make_pipeline

# Create a pipeline with StandardScaler and KNN classifier
pipeline = make_pipeline(MinMaxScaler(), KNeighborsClassifier(n_neighbors=3))

# Fit the pipeline on the training data and make predictions
pipeline.fit(X_train, y_train)

Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                ('kneighborsclassifier', KNeighborsClassifier(n_neighbors=3))])

In [24]:
# Instantiate the KNeighborsClassifier model with n_neighbors = 19 
knn = KNeighborsClassifier(n_neighbors=3)

In [25]:
# Train the model using the training data
knn.fit(X_train_scaled_norm, y_train)

KNeighborsClassifier(n_neighbors=3)

In [26]:
validation_accuracy = knn.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.6511627906976745


In [27]:
# Create predictions using the testing data
y_pred = knn.predict(X_test)

In [28]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.64      0.98      0.78       139
           1       0.25      0.01      0.03        76

    accuracy                           0.64       215
   macro avg       0.45      0.50      0.40       215
weighted avg       0.51      0.64      0.51       215



Normalization and Pipeline Feature has the same validation accuracy, but lowers f1-score whilst increasing recall for [0], which decreasing for [1]. 

NO IMPROVEMENT WITH CHANGE IN WEIGHT BY DISTANCE (uniform and distance the same)

In [29]:
# Create a k-NN classifier with distance-based weighting
knn_distance = KNeighborsClassifier(n_neighbors=3, weights='distance')

In [30]:
# Train the model using the training data
knn_distance.fit(X_train_scaled, y_train)

KNeighborsClassifier(n_neighbors=3, weights='distance')

In [31]:
validation_accuracy = knn_distance.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

# validation decreased with weight change

Validation Accuracy: 0.6651162790697674


In [32]:
# Create predictions using the testing data
y_pred = knn_distance.predict(X_test_scaled)

In [33]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.89      0.90       139
           1       0.81      0.82      0.81        76

    accuracy                           0.87       215
   macro avg       0.85      0.85      0.85       215
weighted avg       0.87      0.87      0.87       215



Try lowered n_neighbors (=19)

In [34]:
# Import the KNeighborsClassifier module from sklearn
from sklearn.neighbors import KNeighborsClassifier

# Instantiate the KNeighborsClassifier model with n_neighbors = 19 
knn = KNeighborsClassifier(n_neighbors=19)

In [35]:
# Train the model using the training data
knn.fit(X_train_scaled, y_train)

KNeighborsClassifier(n_neighbors=19)

In [36]:
validation_accuracy = knn.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.641860465116279


In [37]:
# Create predictions using the testing data
y_pred = knn.predict(X_test_scaled)

In [38]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.91      0.90       139
           1       0.83      0.82      0.82        76

    accuracy                           0.87       215
   macro avg       0.86      0.86      0.86       215
weighted avg       0.87      0.87      0.87       215



TESTING DEEP LEARNING MODELS

In [39]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=5))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

# results promising; but attempt to improve 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3540 - accuracy: 0.8605
7/7 - 0s - loss: 0.2825 - accuracy: 0.8837
Loss: 0.35397177934646606, Accuracy: 0.8604651093482971
Validation Set - Loss: 0.2824665307998657, Accuracy: 0.8837209343910217


First start w/ grid search to see if random neuron number was optimal

In [40]:
# Define the deep learning model function

def create_model(neurons=16):
    nn_model = tf.keras.models.Sequential()
    nn_model.add(tf.keras.layers.Dense(units=neurons, activation="relu", input_dim=5))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return nn_model

# Create a KerasClassifier based on the model function
nn_model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)

# Define the grid of hyperparameters to search
param_grid = {'neurons': [8, 16, 32, 64]}

# Perform grid search
grid = GridSearchCV(estimator=nn_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train_scaled, y_train)

# Get the best parameters and results
print("Best Parameters: ", grid_result.best_params_)
print("Best Score: ", grid_result.best_score_)

Best Parameters:  {'neurons': 64}
Best Score:  0.9197149038314819


Test model using 64 neurons in layer

-----> optimized 8 neurons x 1 layer (50 epochs to test)

In [41]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, activation="relu", input_dim=5))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3466 - accuracy: 0.8884
7/7 - 0s - loss: 0.2486 - accuracy: 0.9023
Loss: 0.3465898633003235, Accuracy: 0.8883720636367798
Validation Set - Loss: 0.24862170219421387, Accuracy: 0.9023255705833435


Increase neurons to = 64 increase accuracy for model

Checking optimization of epochs # using GridSearchCV

In [42]:
# Define the deep learning model function

def create_model(neurons=64):
    nn_model = tf.keras.models.Sequential()
    nn_model.add(tf.keras.layers.Dense(units=neurons, activation="relu", input_dim=5))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return nn_model

# Create a KerasClassifier based on the model function
nn_model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)

# Define the grid of hyperparameters to search
param_grid = {'epochs': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]}

# Perform grid search
grid = GridSearchCV(estimator=nn_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train_scaled, y_train)

# Get the best parameters and results
print("Best Parameters: ", grid_result.best_params_)
print("Best Score: ", grid_result.best_score_)

Best Parameters:  {'epochs': 100}
Best Score:  0.9371686100959777


In [43]:
# Define the deep learning model function

def create_model(neurons=64):
    nn_model = tf.keras.models.Sequential()
    nn_model.add(tf.keras.layers.Dense(units=neurons, activation="relu", input_dim=5))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return nn_model

# Create a KerasClassifier based on the model function
nn_model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)

# Define the grid of hyperparameters to search
param_grid = {'epochs': [100, 150, 200]}

# Perform grid search
grid = GridSearchCV(estimator=nn_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train_scaled, y_train)

# Get the best parameters and results
print("Best Parameters: ", grid_result.best_params_)
print("Best Score: ", grid_result.best_score_)

Best Parameters:  {'epochs': 200}
Best Score:  0.9412349939346314


In [44]:
# Define the deep learning model function

def create_model(neurons=64):
    nn_model = tf.keras.models.Sequential()
    nn_model.add(tf.keras.layers.Dense(units=neurons, activation="relu", input_dim=5))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return nn_model

# Create a KerasClassifier based on the model function
nn_model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)

# Define the grid of hyperparameters to search
param_grid = {'epochs': [200, 300, 400]}

# Perform grid search
grid = GridSearchCV(estimator=nn_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train_scaled, y_train)

# Get the best parameters and results
print("Best Parameters: ", grid_result.best_params_)
print("Best Score: ", grid_result.best_score_)

Best Parameters:  {'epochs': 300}
Best Score:  0.9412417650222779


Keeping epochs=200 because lowest runtime and highest score

Checking validation set using new parameters (optimized epochs=200 as well as neuron =8 in single layer)

---> lower than for epochs = 50, so try epochs=100

---> epochs = 100 also has lower results than epochs=50. Keep epochs = 50

In [45]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=5))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=200, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [46]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=5))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Checking parameters neurons x optimizer for optimization

In [47]:
# Define the deep learning model function

def create_model(neurons=8, optimizer="adam"):
    nn_model = tf.keras.models.Sequential()
    nn_model.add(tf.keras.layers.Dense(units=neurons, activation="relu", input_dim=5))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    nn_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return nn_model

# Create a KerasClassifier based on the model function
nn_model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)

# Define the grid of hyperparameters to search
param_grid = {'neurons': [8, 16, 32],'optimizer': ["adam", "SGD", "RMSprop"]}

# Perform grid search
grid = GridSearchCV(estimator=nn_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train_scaled, y_train)

# Get the best parameters and results
print("Best Parameters: ", grid_result.best_params_)
print("Best Score: ", grid_result.best_score_)

Best Parameters:  {'neurons': 32, 'optimizer': 'adam'}
Best Score:  0.9063478946685791


In [48]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, activation="relu", input_dim=5))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3468 - accuracy: 0.8837
7/7 - 0s - loss: 0.2464 - accuracy: 0.8930
Loss: 0.34679630398750305, Accuracy: 0.8837209343910217
Validation Set - Loss: 0.24641434848308563, Accuracy: 0.8930232524871826


neuron =64, epochs =50, optimizer = RMSprop gives the best balance between test accuracy vs. validation accuracy

Testing other activation functions

---> AF = tanh decreases accuracy

---> PReLU also increases accuracy + validation

In [49]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, activation="tanh", input_dim=5))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3771 - accuracy: 0.8605
7/7 - 0s - loss: 0.3051 - accuracy: 0.8837
Loss: 0.37712523341178894, Accuracy: 0.8604651093482971
Validation Set - Loss: 0.30506715178489685, Accuracy: 0.8837209343910217


In [50]:
from tensorflow.keras.layers import PReLU

# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, input_dim=5))
nn_model.add(PReLU())  # Adding PReLU activation function here

# Output layer for binary classification (1 neuron for 0 or 1)
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model with validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data and validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled, y_test, verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3525 - accuracy: 0.8791
7/7 - 0s - loss: 0.2447 - accuracy: 0.9209
Loss: 0.3525349497795105, Accuracy: 0.8790697455406189
Validation Set - Loss: 0.2446981966495514, Accuracy: 0.9209302067756653


Testing addition of more hidden layers

---> adding one hidden layer increased accuracy and decreased loss

---> adding two hidden layers increased accuracy and decreased loss

---> adding three hidden layers decreased accuracy and increased loss

In [51]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, input_dim=5))
nn_model.add(tf.keras.layers.Dense(units=64))

nn_model.add(PReLU())  # Adding PReLU activation function here
# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3412 - accuracy: 0.9070
7/7 - 0s - loss: 0.2299 - accuracy: 0.9395
Loss: 0.3412415683269501, Accuracy: 0.9069767594337463
Validation Set - Loss: 0.22990761697292328, Accuracy: 0.9395349025726318


In [52]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, input_dim=5))
nn_model.add(tf.keras.layers.Dense(units=64))
nn_model.add(tf.keras.layers.Dense(units=64))

nn_model.add(PReLU())  # Adding PReLU activation function here
# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3512 - accuracy: 0.9023
7/7 - 0s - loss: 0.2993 - accuracy: 0.8930
Loss: 0.35117873549461365, Accuracy: 0.9023255705833435
Validation Set - Loss: 0.2992880046367645, Accuracy: 0.8930232524871826


In [53]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, input_dim=5))
nn_model.add(tf.keras.layers.Dense(units=64))
nn_model.add(tf.keras.layers.Dense(units=64))
nn_model.add(tf.keras.layers.Dense(units=64))

nn_model.add(PReLU())  # Adding PReLU activation function here
# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3303 - accuracy: 0.9163
7/7 - 0s - loss: 0.2785 - accuracy: 0.8977
Loss: 0.3302818834781647, Accuracy: 0.9162790775299072
Validation Set - Loss: 0.27854907512664795, Accuracy: 0.8976744413375854


All modifications to learning speed had decreased accuracy for both testing and validation -- keep original settings. Do not specify. 

In [54]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, input_dim=5))
nn_model.add(tf.keras.layers.Dense(units=64))
nn_model.add(tf.keras.layers.Dense(units=64))

nn_model.add(PReLU())  # Adding PReLU activation function here
# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.005)

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3826 - accuracy: 0.8977
7/7 - 0s - loss: 0.2626 - accuracy: 0.9023
Loss: 0.38261446356773376, Accuracy: 0.8976744413375854
Validation Set - Loss: 0.2626054883003235, Accuracy: 0.9023255705833435


In [55]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, input_dim=5))
nn_model.add(tf.keras.layers.Dense(units=64))
nn_model.add(tf.keras.layers.Dense(units=64))

nn_model.add(PReLU())  # Adding PReLU activation function here
# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3491 - accuracy: 0.9116
7/7 - 0s - loss: 0.2697 - accuracy: 0.9163
Loss: 0.34906652569770813, Accuracy: 0.9116278886795044
Validation Set - Loss: 0.26971372961997986, Accuracy: 0.9162790775299072


In [56]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, input_dim=5))
nn_model.add(tf.keras.layers.Dense(units=64))
nn_model.add(tf.keras.layers.Dense(units=64))

nn_model.add(PReLU())  # Adding PReLU activation function here
# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.01)

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3245 - accuracy: 0.9209
7/7 - 0s - loss: 0.2703 - accuracy: 0.9116
Loss: 0.32449808716773987, Accuracy: 0.9209302067756653
Validation Set - Loss: 0.2703070044517517, Accuracy: 0.9116278886795044


In [57]:
# Calculate Spearman's rank correlation coefficients with the target variable
spearman_corr = df.corr(method='spearman')['Diagnosis'].abs().sort_values(ascending=False)

# Print the ranked features based on Spearman's correlation coefficients
print("Ranked Features based on Spearman's Correlation Coefficients:")
print(spearman_corr)

# Select the top 'k' features based on your criteria
# For example, selecting the top 5 features
top_features = spearman_corr.index[:10].tolist()
print("Top Features selected:")
print(top_features)

Ranked Features based on Spearman's Correlation Coefficients:
Diagnosis                    1.000000
FunctionalAssessment         0.366687
ADL                          0.330450
MemoryComplaints             0.306742
MMSE                         0.236271
BehavioralProblems           0.224350
SleepQuality                 0.056069
EducationLevel               0.043325
CholesterolHDL               0.042542
Hypertension                 0.035080
FamilyHistoryAlzheimers      0.032900
CholesterolLDL               0.032010
Diabetes                     0.031508
CardiovascularDisease        0.031490
BMI                          0.026402
Disorientation               0.024648
CholesterolTriglycerides     0.023072
HeadInjury                   0.021411
Gender                       0.020975
PersonalityChanges           0.020627
Confusion                    0.019186
Ethnicity                    0.017744
SystolicBP                   0.015822
DifficultyCompletingTasks    0.009069
DietQuality               

In [58]:
# Define the deep learning model 
final5_model = tf.keras.models.Sequential()
final5_model.add(tf.keras.layers.Dense(units=64, input_dim=5))
final5_model.add(tf.keras.layers.Dense(units=64))
final5_model.add(tf.keras.layers.Dense(units=64))

final5_model.add(PReLU())  # Adding PReLU activation function here
# Output layer/binary classification so 1 neuron for 0 or 1
final5_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
final5_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model w/ validation dataset
fit5_model = final5_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = final5_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = final5_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3718 - accuracy: 0.8791
7/7 - 0s - loss: 0.2533 - accuracy: 0.9116
Loss: 0.37178295850753784, Accuracy: 0.8790697455406189
Validation Set - Loss: 0.25329574942588806, Accuracy: 0.9116278886795044


In [59]:
final5_model.save("NN_top5Features_model.h5")

In [60]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.91      0.90       139
           1       0.83      0.82      0.82        76

    accuracy                           0.87       215
   macro avg       0.86      0.86      0.86       215
weighted avg       0.87      0.87      0.87       215



Use random forest classifier model on top 5 features (to compare to full dataset analysis)

In [61]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Create a Random Forest classifier instance
rf_model = RandomForestClassifier(n_estimators=500, random_state=78)

# Fit the model with the training data
rf_model.fit(X_train, y_train)

# Predict on the validation set
predictions = rf_model.predict(X_val)

# Evaluate the model
accuracy = accuracy_score(y_val, predictions)
conf_matrix = confusion_matrix(y_val, predictions)
class_report = classification_report(y_val, predictions)

# Print or use the evaluation metrics as needed
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Accuracy: 0.9488372093023256
Confusion Matrix:
 [[135   3]
 [  8  69]]
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.98      0.96       138
           1       0.96      0.90      0.93        77

    accuracy                           0.95       215
   macro avg       0.95      0.94      0.94       215
weighted avg       0.95      0.95      0.95       215

