In [1]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [10]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
import kerastuner as kt
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
import seaborn as sns
import matplotlib.pyplot as plt

#  Import and read the heart_disease_dataset.csv.
import pandas as pd

url= 'https://raw.githubusercontent.com/Nathanhans/project-4/main/heart_disease_dataset.csv'
heart_disease_df = pd.read_csv(url)
heart_disease_df.head()

Unnamed: 0,State,Sex,GeneralHealth,PhysicalHealthDays,MentalHealthDays,LastCheckupTime,PhysicalActivities,SleepHours,RemovedTeeth,HadHeartAttack,...,HeightInMeters,WeightInKilograms,BMI,AlcoholDrinkers,HIVTesting,FluVaxLast12,PneumoVaxEver,TetanusLast10Tdap,HighRiskLastYear,CovidPos
0,Alabama,Female,Very good,4,0,Within past year (anytime less than 12 months ...,Yes,9,None of them,No,...,1.6,71.67,27.99,No,No,Yes,Yes,"Yes, received Tdap",No,No
1,Alabama,Male,Very good,0,0,Within past year (anytime less than 12 months ...,Yes,6,None of them,No,...,1.78,95.25,30.13,No,No,Yes,Yes,"Yes, received tetanus shot but not sure what type",No,No
2,Alabama,Male,Very good,0,0,Within past year (anytime less than 12 months ...,No,8,"6 or more, but not all",No,...,1.85,108.86,31.66,Yes,No,No,Yes,"No, did not receive any tetanus shot in the pa...",No,Yes
3,Alabama,Female,Fair,5,0,Within past year (anytime less than 12 months ...,Yes,9,None of them,No,...,1.7,90.72,31.32,No,No,Yes,Yes,"No, did not receive any tetanus shot in the pa...",No,Yes
4,Alabama,Female,Good,3,15,Within past year (anytime less than 12 months ...,Yes,5,1 to 5,No,...,1.55,79.38,33.07,No,No,Yes,Yes,"No, did not receive any tetanus shot in the pa...",No,No


In [11]:
#confirming lenght to make sure no data was lost
len(heart_disease_df)

246022

In [12]:
# Drop the non-beneficial columns such as State
heart_disease_df.head()

Unnamed: 0,State,Sex,GeneralHealth,PhysicalHealthDays,MentalHealthDays,LastCheckupTime,PhysicalActivities,SleepHours,RemovedTeeth,HadHeartAttack,...,HeightInMeters,WeightInKilograms,BMI,AlcoholDrinkers,HIVTesting,FluVaxLast12,PneumoVaxEver,TetanusLast10Tdap,HighRiskLastYear,CovidPos
0,Alabama,Female,Very good,4,0,Within past year (anytime less than 12 months ...,Yes,9,None of them,No,...,1.6,71.67,27.99,No,No,Yes,Yes,"Yes, received Tdap",No,No
1,Alabama,Male,Very good,0,0,Within past year (anytime less than 12 months ...,Yes,6,None of them,No,...,1.78,95.25,30.13,No,No,Yes,Yes,"Yes, received tetanus shot but not sure what type",No,No
2,Alabama,Male,Very good,0,0,Within past year (anytime less than 12 months ...,No,8,"6 or more, but not all",No,...,1.85,108.86,31.66,Yes,No,No,Yes,"No, did not receive any tetanus shot in the pa...",No,Yes
3,Alabama,Female,Fair,5,0,Within past year (anytime less than 12 months ...,Yes,9,None of them,No,...,1.7,90.72,31.32,No,No,Yes,Yes,"No, did not receive any tetanus shot in the pa...",No,Yes
4,Alabama,Female,Good,3,15,Within past year (anytime less than 12 months ...,Yes,5,1 to 5,No,...,1.55,79.38,33.07,No,No,Yes,Yes,"No, did not receive any tetanus shot in the pa...",No,No


In [13]:
heart_disease_df= heart_disease_df.copy().drop(['State'],axis=1)


In [14]:
# nuniuqe to view the unique data in each column
heart_disease_df.nunique()

Sex                             2
GeneralHealth                   5
PhysicalHealthDays             31
MentalHealthDays               31
LastCheckupTime                 4
PhysicalActivities              2
SleepHours                     23
RemovedTeeth                    4
HadHeartAttack                  2
HadAngina                       2
HadStroke                       2
HadAsthma                       2
HadSkinCancer                   2
HadCOPD                         2
HadDepressiveDisorder           2
HadKidneyDisease                2
HadArthritis                    2
HadDiabetes                     4
DeafOrHardOfHearing             2
BlindOrVisionDifficulty         2
DifficultyConcentrating         2
DifficultyWalking               2
DifficultyDressingBathing       2
DifficultyErrands               2
SmokerStatus                    4
ECigaretteUsage                 4
ChestScan                       2
RaceEthnicityCategory           5
AgeCategory                    13
HeightInMeters

In [15]:
target_column = heart_disease_df['HadHeartAttack']
df = heart_disease_df.drop('HadHeartAttack', axis=1)  # Drop the target column from the DataFrame

# Apply get_dummies() to the remaining DataFrame
df_dummies = pd.get_dummies(df)

# Concatenate the target column with the DataFrame of dummy variables
df_processed = pd.concat([target_column, df_dummies], axis=1)


In [16]:
df_processed.head()

Unnamed: 0,HadHeartAttack,PhysicalHealthDays,MentalHealthDays,SleepHours,HeightInMeters,WeightInKilograms,BMI,Sex_Female,Sex_Male,GeneralHealth_Excellent,...,PneumoVaxEver_Yes,"TetanusLast10Tdap_No, did not receive any tetanus shot in the past 10 years","TetanusLast10Tdap_Yes, received Tdap","TetanusLast10Tdap_Yes, received tetanus shot but not sure what type","TetanusLast10Tdap_Yes, received tetanus shot, but not Tdap",HighRiskLastYear_No,HighRiskLastYear_Yes,CovidPos_No,CovidPos_Tested positive using home test without a health professional,CovidPos_Yes
0,No,4,0,9,1.6,71.67,27.99,1,0,0,...,1,0,1,0,0,1,0,1,0,0
1,No,0,0,6,1.78,95.25,30.13,0,1,0,...,1,0,0,1,0,1,0,1,0,0
2,No,0,0,8,1.85,108.86,31.66,0,1,0,...,1,1,0,0,0,1,0,0,0,1
3,No,5,0,9,1.7,90.72,31.32,1,0,0,...,1,1,0,0,0,1,0,0,0,1
4,No,3,15,5,1.55,79.38,33.07,1,0,0,...,1,1,0,0,0,1,0,1,0,0


In [17]:
# Split our preprocessed data into our features and target arrays
y = df_processed['HadHeartAttack']
X = df_processed.drop(['HadHeartAttack'], axis = 1)


In [18]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [19]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [20]:
print(X_train_scaled.shape)
print(X_test_scaled.shape)

(184516, 100)
(61506, 100)


In [21]:
# Create instance of labelencoder
le= LabelEncoder()
# Fit the LabelEncoder on the target variable
y_train_encoded = le.fit_transform(y_train)
#Fit the LabelEncoder on the test variable
y_test_encoded= le.fit_transform(y_test)

In [None]:
print(f"X_train_scaled shape: {X_train_scaled.shape}")
print(f"y_train shape: {y_train_encoded.shape}")
print(f"X_train_scaled data type: {X_train_scaled.dtype}")
print(f"y_train data type: {y_train_encoded.dtype}")

X_train_scaled shape: (184516, 100)
y_train shape: (184516,)
X_train_scaled data type: float64
y_train data type: int64


**Model Architecture**:

Input Layer: Number of Input Features (100)

Hidden Layer 1: 7 nodes, Activation Function: relu

Hidden Layer 2: 1 nodes, Activation Function: relu

Hidden Layer 3: 9 nodes, Activation Function: relu

Output Layer: 1 node, Activation Function: sigmoid

Number of epochs: 7


In [23]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn = tf.keras.models.Sequential()

# First hidden layer ()
nn.add(tf.keras.layers.Dense(units=7, activation="relu", input_dim=100))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=1, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=9, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 7)                 707       
                                                                 
 dense_5 (Dense)             (None, 1)                 8         
                                                                 
 dense_6 (Dense)             (None, 9)                 18        
                                                                 
 dense_7 (Dense)             (None, 1)                 10        
                                                                 
Total params: 743 (2.90 KB)
Trainable params: 743 (2.90 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [24]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [25]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train_encoded, epochs=7)

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [26]:
predictions= nn.predict(X_test_scaled)
predictions




array([[0.00057731],
       [0.15182947],
       [0.03910579],
       ...,
       [0.00074579],
       [0.00135344],
       [0.06515021]], dtype=float32)

In [27]:
round_array = np.array([int(np.round(i)) for i in predictions])
round_array

  round_array = np.array([int(np.round(i)) for i in predictions])


array([0, 0, 0, ..., 0, 0, 0])

In [28]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test_encoded, round_array)

array([[57925,   222],
       [ 2928,   431]])

In [29]:
from sklearn.metrics import classification_report
print(classification_report(y_test_encoded, round_array))

              precision    recall  f1-score   support

           0       0.95      1.00      0.97     58147
           1       0.66      0.13      0.21      3359

    accuracy                           0.95     61506
   macro avg       0.81      0.56      0.59     61506
weighted avg       0.94      0.95      0.93     61506



In [30]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test_encoded,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


1923/1923 - 2s - loss: 0.1453 - accuracy: 0.9488 - 2s/epoch - 820us/step
Loss: 0.14530032873153687, Accuracy: 0.9487854838371277


In [31]:
# Export our model to HDF5 file
from google.colab import files

nn.save("/content/sally_auto_opt_colab_hyper_param_model_1_updated.h5")
files.download("/content/sally_auto_opt_colab_hyper_param_model_1_updated.h5")

  saving_api.save_model(


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>