In [None]:
# Import the dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [None]:
# Get the file path for the CSV file
file_path = Path("../Data/diabetes_binary_health_indicators_BRFSS2015.csv")

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
df.head()

In [None]:
# Drop any rows with missing values
df.dropna(inplace=True)

In [None]:
# Get the features and the target variables
X = df.drop(columns=['Diabetes_binary'])
y = df['Diabetes_binary']

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Get the shape of the training and testing sets
print(f"Training set shape: {X_train.shape}, {y_train.shape}")
print(f"Testing set shape: {X_test.shape}, {y_test.shape}")

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

Compile, train and evaluate the model

In [None]:
# Define the model - deep neural network, the number of input features and the hidden nodes for each layer
number_input_features = len(X_train.columns)
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 40
output_nodes = 1

nn = tf.keras.models.Sequential()

# Add the first hidden layer
hidden_layer1 = tf.keras.layers.Dense(units=hidden_nodes_layer1,
                                      input_dim=number_input_features,
                                      activation='relu')
nn.add(hidden_layer1)

# Add the second hidden layer
hidden_layer2 = tf.keras.layers.Dense(units=hidden_nodes_layer2,
                                      activation='relu')
nn.add(hidden_layer2)

# Add the output layer
output_layer = tf.keras.layers.Dense(units=output_nodes,
                                     activation='sigmoid')
nn.add(output_layer)

# Check the structure of the model
nn.summary()

In [None]:
# Compile the model
nn.compile(loss='binary_crossentropy',
           optimizer='adam',
           metrics=['accuracy']) 

In [None]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, 
                    epochs=100, 
                    batch_size=32, 
                    validation_split=0.2, 
                    verbose=1)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export the model to a .h5 file
model_path = Path("Models/neural_network.h5")
nn.save(model_path)