In [1]:
# Import modules
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import load_model

In [2]:
# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(
    Path("alzheimer_clean.csv")   
)

# Review the DataFrame
df.head()

Unnamed: 0,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,SleepQuality,...,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Diagnosis
0,73,0,0,2,22.927749,0,13.297218,6.327112,1.347214,9.025679,...,6.518877,0,0,1.725883,0,0,0,1,0,0
1,89,0,0,0,26.827681,0,4.542524,7.619885,0.518767,7.151293,...,7.118696,0,0,2.592424,0,0,0,0,1,0
2,73,0,3,1,17.795882,0,19.555085,7.844988,1.826335,9.673574,...,5.895077,0,0,7.119548,0,1,0,1,0,0
3,74,1,0,1,33.800817,1,12.209266,8.428001,7.435604,8.392554,...,8.965106,0,1,6.481226,0,0,0,0,0,0
4,89,0,0,0,20.716974,0,18.454356,6.310461,0.795498,5.597238,...,6.045039,0,0,0.014691,0,0,1,1,0,0


In [3]:
y = df["Diagnosis"]
x = df.drop(columns="Diagnosis")

In [4]:
# Encode the categorical variables using get_dummies
X = pd.get_dummies(x)

In [5]:
# split original dataset X and y into a training set + temp set (test_size=0.2 for 20% of data going towards the temp set and 80% for training set)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=1)

# split temp set into validation set + test set (50/50) 
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1)

# goal = train using 80% of original data, tune hyperparameters using the validation set + evaluate model performance on unseen data using test data

In [6]:
print(X_train.shape, X_val.shape, X_test.shape)

(1719, 32) (215, 32) (215, 32)


In [7]:
# Instantiate a StandardScaler instance
scaler = StandardScaler()

# Fit the training data to the standard scaler
X_scaler = scaler.fit(X_train)

# Transform the training data using the scaler
X_train_scaled = X_scaler.transform(X_train)

# Transform the testing data using the scaler
X_test_scaled = X_scaler.transform(X_test)

# Transform the validation data using the scaler
X_val_scaled = X_scaler.transform(X_val)

Doing a Grid Search for n_neighours optimal value using cross-validation in Python with SciKit-Learn

In [45]:
# Define the parameter grid with the correct parameter name 'n_neighbors'
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}

# Create a KNN classifier
knn = KNeighborsClassifier()

# Perform grid search with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Access the best parameters and print the best n_neighbors value
best_n_neighbors = grid_search.best_params_['n_neighbors']
print("Best n_neighbors value:", best_n_neighbors)

Best n_neighbors value: 11


In [46]:
# Define the parameter grid with the correct parameter name 'n_neighbors'
param_grid = {'n_neighbors': [11, 13, 15, 17, 19]}

# Create a KNN classifier
knn = KNeighborsClassifier()

# Perform grid search with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Access the best parameters and print the best n_neighbors value
best_n_neighbors = grid_search.best_params_['n_neighbors']
print("Best n_neighbors value:", best_n_neighbors)

Best n_neighbors value: 19


Trying the "rule of thumb for n_neighbours" = square root of total number of samples in the dataset (1719 for training set = 41)

In [47]:
# Define the parameter grid with the correct parameter name 'n_neighbors'
param_grid = {'n_neighbors': [35, 37, 39, 41, 43]}

# Create a KNN classifier
knn = KNeighborsClassifier()

# Perform grid search with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Access the best parameters and print the best n_neighbors value
best_n_neighbors = grid_search.best_params_['n_neighbors']
print("Best n_neighbors value:", best_n_neighbors)

Best n_neighbors value: 43


Keep n_neighbors = 41 to attempt at best balance between bias and variance

In [48]:
# Import the KNeighborsClassifier module from sklearn
from sklearn.neighbors import KNeighborsClassifier

# Instantiate the KNeighborsClassifier model with n_neighbors = 19 
knn = KNeighborsClassifier(n_neighbors=41)

In [49]:
# Train the model using the training data
knn.fit(X_train_scaled, y_train)


KNeighborsClassifier(n_neighbors=41)

In [50]:
validation_accuracy = knn.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.641860465116279


In [51]:
# Create predictions using the testing data
y_pred = knn.predict(X_test_scaled)

In [52]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.71      0.97      0.82       139
           1       0.85      0.29      0.43        76

    accuracy                           0.73       215
   macro avg       0.78      0.63      0.63       215
weighted avg       0.76      0.73      0.68       215



Normalization to scale features to a range between 0 and 1 (want to bring data to common scale)

In [53]:
from sklearn.preprocessing import MinMaxScaler

In [54]:
# Create a MinMaxScaler instance
scaler = MinMaxScaler()

# Fit the training data to the standard scaler
X_scaler = scaler.fit(X_train)

# Transform the training data using the scaler
X_train_scaled_norm = X_scaler.transform(X_train)

# Transform the testing data using the scaler
X_test_scaled_norm = X_scaler.transform(X_test)

# Transform the validation data using the scaler
X_val_scaled_norm = X_scaler.transform(X_val)

In [55]:
from sklearn.pipeline import make_pipeline

# Create a pipeline with StandardScaler and KNN classifier
pipeline = make_pipeline(MinMaxScaler(), KNeighborsClassifier(n_neighbors=41))

# Fit the pipeline on the training data and make predictions
pipeline.fit(X_train, y_train)

Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                ('kneighborsclassifier', KNeighborsClassifier(n_neighbors=41))])

In [56]:
# Instantiate the KNeighborsClassifier model with n_neighbors = 19 
knn = KNeighborsClassifier(n_neighbors=41)

In [57]:
# Train the model using the training data
knn.fit(X_train_scaled_norm, y_train)

KNeighborsClassifier(n_neighbors=41)

In [58]:
validation_accuracy = knn.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.641860465116279


In [59]:
# Create predictions using the testing data
y_pred = knn.predict(X_test)

In [60]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.65      1.00      0.79       139
           1       0.00      0.00      0.00        76

    accuracy                           0.65       215
   macro avg       0.32      0.50      0.39       215
weighted avg       0.42      0.65      0.51       215



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Normalization and Pipeline Feature has the same validation accuracy, but lowers f1-score whilst increasing recall for [0], which decreasing for [1]. 

NO IMPROVEMENT WITH CHANGE IN WEIGHT BY DISTANCE (uniform and distance the same)

In [61]:
# Create a k-NN classifier with distance-based weighting
knn_distance = KNeighborsClassifier(n_neighbors=41, weights='uniform')

In [62]:
# Train the model using the training data
knn_distance.fit(X_train_scaled, y_train)

KNeighborsClassifier(n_neighbors=41)

In [64]:
validation_accuracy = knn_distance.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

# validation decreased with weight change

Validation Accuracy: 0.641860465116279


In [65]:
# Create predictions using the testing data
y_pred = knn_distance.predict(X_test_scaled)

In [66]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.71      0.97      0.82       139
           1       0.85      0.29      0.43        76

    accuracy                           0.73       215
   macro avg       0.78      0.63      0.63       215
weighted avg       0.76      0.73      0.68       215



Try lowered n_neighbors (=19)

In [67]:
# Import the KNeighborsClassifier module from sklearn
from sklearn.neighbors import KNeighborsClassifier

# Instantiate the KNeighborsClassifier model with n_neighbors = 19 
knn = KNeighborsClassifier(n_neighbors=19)

In [68]:
# Train the model using the training data
knn.fit(X_train_scaled, y_train)

KNeighborsClassifier(n_neighbors=19)

In [69]:
validation_accuracy = knn.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.6232558139534884


In [70]:
# Create predictions using the testing data
y_pred = knn.predict(X_test_scaled)

In [71]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.73      0.95      0.82       139
           1       0.79      0.34      0.48        76

    accuracy                           0.73       215
   macro avg       0.76      0.65      0.65       215
weighted avg       0.75      0.73      0.70       215



Tried even lower n_neighbors (11) = lowered accuracy, recall, precision and approximately the same validation score. 

In [72]:
# Import the KNeighborsClassifier module from sklearn
from sklearn.neighbors import KNeighborsClassifier

# Instantiate the KNeighborsClassifier model with n_neighbors = 19 
knn = KNeighborsClassifier(n_neighbors=11)

In [73]:
# Train the model using the training data
knn.fit(X_train_scaled, y_train)

KNeighborsClassifier(n_neighbors=11)

In [74]:
validation_accuracy = knn.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.6


In [75]:
# Create predictions using the testing data
y_pred = knn.predict(X_test_scaled)

In [76]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.70      0.91      0.79       139
           1       0.64      0.28      0.39        76

    accuracy                           0.69       215
   macro avg       0.67      0.59      0.59       215
weighted avg       0.68      0.69      0.65       215



In [77]:
# Import the KNeighborsClassifier module from sklearn
from sklearn.neighbors import KNeighborsClassifier

# Instantiate the KNeighborsClassifier model with n_neighbors = 19 
knn = KNeighborsClassifier(n_neighbors=31)

In [78]:
# Train the model using the training data
knn.fit(X_train_scaled, y_train)

KNeighborsClassifier(n_neighbors=31)

In [79]:
validation_accuracy = knn.score(X_val, y_val)
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.6325581395348837


In [80]:
# Create predictions using the testing data
y_pred = knn.predict(X_test_scaled)

In [81]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.73      0.95      0.82       139
           1       0.79      0.34      0.48        76

    accuracy                           0.73       215
   macro avg       0.76      0.65      0.65       215
weighted avg       0.75      0.73      0.70       215



Lowered validation accuracy for n_neighbors = 31. Keep n_neighbors =41. Low accuracy and averaged f1-score of 0.73 has urged consideration of deeper learning. 

TESTING DEEP LEARNING MODELS

In [9]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

# results promising; but attempt to improve 

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.4964 - accuracy: 0.7721
215/215 - 0s - loss: 0.3319 - accuracy: 0.8558
Loss: 0.4963589056979778, Accuracy: 0.7720929980278015
Validation Set - Loss: 0.3318569258201954, Accuracy: 0.8558139801025391


First start w/ grid search to see if random neuron number was optimal

In [12]:
# Define the deep learning model function

def create_model(neurons=16):
    nn_model = tf.keras.models.Sequential()
    nn_model.add(tf.keras.layers.Dense(units=neurons, activation="relu", input_dim=32))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return nn_model

# Create a KerasClassifier based on the model function
nn_model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)

# Define the grid of hyperparameters to search
param_grid = {'neurons': [8, 16, 32, 64]}

# Perform grid search
grid = GridSearchCV(estimator=nn_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train_scaled, y_train)

# Get the best parameters and results
print("Best Parameters: ", grid_result.best_params_)
print("Best Score: ", grid_result.best_score_)

Best Parameters:  {'neurons': 8}
Best Score:  0.840619695186615


Test model using 8 neurons in layer

-----> optimized 8 neurons x 1 layer (50 epochs to test)

In [11]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.4471 - accuracy: 0.8093
215/215 - 0s - loss: 0.3593 - accuracy: 0.8372
Loss: 0.4471356630325317, Accuracy: 0.8093023300170898
Validation Set - Loss: 0.35927448757859165, Accuracy: 0.8372092843055725


Decrease neurons to = 8 increase accuracy for model, and slight decrease in validation (both values now over 0.8; keep neurons = 8)

Checking optimization of epochs # using GridSearchCV

In [13]:
# Define the deep learning model function

def create_model(neurons=8):
    nn_model = tf.keras.models.Sequential()
    nn_model.add(tf.keras.layers.Dense(units=neurons, activation="relu", input_dim=32))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return nn_model

# Create a KerasClassifier based on the model function
nn_model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)

# Define the grid of hyperparameters to search
param_grid = {'epochs': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]}

# Perform grid search
grid = GridSearchCV(estimator=nn_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train_scaled, y_train)

# Get the best parameters and results
print("Best Parameters: ", grid_result.best_params_)
print("Best Score: ", grid_result.best_score_)

Best Parameters:  {'epochs': 20}
Best Score:  0.8429435849189758


Checking validation set using new parameters (optimized epochs=20 as well as neuron =8 in single layer)

In [14]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=20, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
215/215 - 0s - loss: 0.4425 - accuracy: 0.8093
215/215 - 0s - loss: 0.3755 - accuracy: 0.8279
Loss: 0.44248663131580795, Accuracy: 0.8093023300170898
Validation Set - Loss: 0.37551643321680467, Accuracy: 0.8279069662094116


Trying epochs=40 because epochs=20 is lower than 50 (balanced with validation) -- keep epochs =50

In [15]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=40, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
215/215 - 0s - loss: 0.4788 - accuracy: 0.7767
215/215 - 0s - loss: 0.3456 - accuracy: 0.8465
Loss: 0.47875270386074864, Accuracy: 0.7767441868782043
Validation Set - Loss: 0.3455992439458537, Accuracy: 0.8465116024017334


Increasing epochs further from 50 also lowers accuracy for both test and validation -- keep 50 

In [16]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


Checking parameters neurons x optimizer for optimization

In [8]:
# Define the deep learning model function

def create_model(neurons=8, optimizer="adam"):
    nn_model = tf.keras.models.Sequential()
    nn_model.add(tf.keras.layers.Dense(units=neurons, activation="relu", input_dim=32))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    nn_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return nn_model

# Create a KerasClassifier based on the model function
nn_model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)

# Define the grid of hyperparameters to search
param_grid = {'neurons': [8, 16, 32],'optimizer': ["adam", "SGD", "RMSprop"]}

# Perform grid search
grid = GridSearchCV(estimator=nn_model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train_scaled, y_train)

# Get the best parameters and results
print("Best Parameters: ", grid_result.best_params_)
print("Best Score: ", grid_result.best_score_)

Best Parameters:  {'neurons': 8, 'optimizer': 'RMSprop'}
Best Score:  0.8499186396598816


In [9]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.4477 - accuracy: 0.8140
215/215 - 0s - loss: 0.3756 - accuracy: 0.8047
Loss: 0.44765095156292584, Accuracy: 0.8139534592628479
Validation Set - Loss: 0.375601616293885, Accuracy: 0.804651141166687


neuron =8, epochs =50, optimizer = RMSprop gives the best balance between test accuracy vs. validation accuracy

Testing other activation functions

---> AF = tanh decreases accuracy

---> PReLU also decreases accuracy

In [10]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="tanh", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.4665 - accuracy: 0.7860
215/215 - 0s - loss: 0.3858 - accuracy: 0.8279
Loss: 0.4665170201035433, Accuracy: 0.7860465049743652
Validation Set - Loss: 0.3858138135699339, Accuracy: 0.8279069662094116


In [11]:
from tensorflow.keras.layers import PReLU

# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, input_dim=32))
nn_model.add(PReLU())  # Adding PReLU activation function here

# Output layer for binary classification (1 neuron for 0 or 1)
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model with validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data and validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled, y_test, verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

ERROR! Session/line number was not unique in database. History logging moved to new session 452
Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.4722 - accuracy: 0.8000
215/215 - 0s - loss: 0.3695 - accuracy: 0.8186
Loss: 0.47216000778730527, Accuracy: 0.800000011920929
Validation Set - Loss: 0.3694671855416409, Accuracy: 0.8186046481132507


Testing addition of more hidden layers

---> adding a hidden layer decreased accuracy and increased loss

In [12]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))
nn_model.add(Dense(units=8, activation='relu'))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.4555 - accuracy: 0.7907
215/215 - 0s - loss: 0.4072 - accuracy: 0.8047
Loss: 0.45547000114307845, Accuracy: 0.7906976938247681
Validation Set - Loss: 0.4072035289087961, Accuracy: 0.804651141166687


Optimizing learning speeds!

---> LR = 0.001 decreases accuracy 

---> LR = 0.005 increases accuracy

---> LR = 0.0001 decreases accuracy

---> LR = 0.01 decreases accuracy

In [11]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.4570 - accuracy: 0.8000
215/215 - 0s - loss: 0.3604 - accuracy: 0.8326
Loss: 0.457002972170364, Accuracy: 0.800000011920929
Validation Set - Loss: 0.3603828771169795, Accuracy: 0.8325581550598145


In [12]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.005)

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.5088 - accuracy: 0.8140
215/215 - 0s - loss: 0.3911 - accuracy: 0.8372
Loss: 0.5087994292724964, Accuracy: 0.8139534592628479
Validation Set - Loss: 0.39105257336483445, Accuracy: 0.8372092843055725


In [14]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.01)

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.5780 - accuracy: 0.7907
215/215 - 0s - loss: 0.4254 - accuracy: 0.8372
Loss: 0.5780076121175012, Accuracy: 0.7906976938247681
Validation Set - Loss: 0.42541782315387283, Accuracy: 0.8372092843055725


In [13]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.0001)

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model w/ validation dataset
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = nn_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.5132 - accuracy: 0.7163
215/215 - 0s - loss: 0.4844 - accuracy: 0.7349
Loss: 0.5132466205330782, Accuracy: 0.7162790894508362
Validation Set - Loss: 0.4843922340592673, Accuracy: 0.734883725643158


In [8]:
# Define the deep learning model 
final32_model = tf.keras.models.Sequential()
final32_model.add(tf.keras.layers.Dense(units=8, activation="relu", input_dim=32))

# Output layer/binary classification so 1 neuron for 0 or 1
final32_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.005)

# Compile the Sequential model together and customize metrics
final32_model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model w/ validation dataset
fit32_model = final32_model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

# Evaluate the model using the test data + validation data
model_loss, model_accuracy = final32_model.evaluate(X_test_scaled,y_test,verbose=2)
model_loss_val, model_accuracy_val = final32_model.evaluate(X_val_scaled, y_val, verbose=2)

print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"Validation Set - Loss: {model_loss_val}, Accuracy: {model_accuracy_val}")

Train on 1719 samples, validate on 215 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
215/215 - 0s - loss: 0.4715 - accuracy: 0.8140
215/215 - 0s - loss: 0.3853 - accuracy: 0.8512
Loss: 0.4715019203895746, Accuracy: 0.8139534592628479
Validation Set - Loss: 0.3852890559407168, Accuracy: 0.8511627912521362


In [9]:
final32_model.save("NN_AllFeatures_model.h5")

In [10]:
final32_model.save("NN_AllFeatures_model")

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: No module named 'tensorflow_core.estimator'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: No module named 'tensorflow_core.estimator'
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: NN_AllFeatures_model\assets


In [13]:
# Calculate Spearman's rank correlation coefficients with the target variable
spearman_corr = df.corr(method='spearman')['Diagnosis'].abs().sort_values(ascending=False)

# Print the ranked features based on Spearman's correlation coefficients
print("Ranked Features based on Spearman's Correlation Coefficients:")
print(spearman_corr)

# Select the top 'k' features based on your criteria
# For example, selecting the top 5 features
top_features = spearman_corr.index[:10].tolist()
print("Top Features selected:")
print(top_features)

Ranked Features based on Spearman's Correlation Coefficients:
Diagnosis                    1.000000
FunctionalAssessment         0.366687
ADL                          0.330450
MemoryComplaints             0.306742
MMSE                         0.236271
BehavioralProblems           0.224350
SleepQuality                 0.056069
EducationLevel               0.043325
CholesterolHDL               0.042542
Hypertension                 0.035080
FamilyHistoryAlzheimers      0.032900
CholesterolLDL               0.032010
Diabetes                     0.031508
CardiovascularDisease        0.031490
BMI                          0.026402
Disorientation               0.024648
CholesterolTriglycerides     0.023072
HeadInjury                   0.021411
Gender                       0.020975
PersonalityChanges           0.020627
Confusion                    0.019186
Ethnicity                    0.017744
SystolicBP                   0.015822
DifficultyCompletingTasks    0.009069
DietQuality               