In [24]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
import numpy as np
import pandas as pd

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,Female,80.0,0,1,never,25.19,6.6,140,0
1,Female,54.0,0,0,No Info,27.32,6.6,80,0
2,Male,28.0,0,0,never,27.32,5.7,158,0
3,Female,36.0,0,0,current,23.45,5.0,155,0
4,Male,76.0,1,1,current,20.14,4.8,155,0
...,...,...,...,...,...,...,...,...,...
99995,Female,80.0,0,0,No Info,27.32,6.2,90,0
99996,Female,2.0,0,0,No Info,17.37,6.5,100,0
99997,Male,66.0,0,0,former,27.83,5.7,155,0
99998,Female,24.0,0,0,never,35.42,4.0,100,0


In [None]:
#  Read the .csv.
diabetes_df = pd.read_csv("../diabetes_prediction_dataset.csv")
diabetes_df


In [25]:
diabetes_df.describe()

Unnamed: 0,age,hypertension,heart_disease,bmi,HbA1c_level,blood_glucose_level,diabetes
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,41.885856,0.07485,0.03942,27.320767,5.527507,138.05806,0.085
std,22.51684,0.26315,0.194593,6.636783,1.070672,40.708136,0.278883
min,0.08,0.0,0.0,10.01,3.5,80.0,0.0
25%,24.0,0.0,0.0,23.63,4.8,100.0,0.0
50%,43.0,0.0,0.0,27.32,5.8,140.0,0.0
75%,60.0,0.0,0.0,29.58,6.2,159.0,0.0
max,80.0,1.0,1.0,95.69,9.0,300.0,1.0


In [26]:
diabetes_df.dtypes

gender                  object
age                    float64
hypertension             int64
heart_disease            int64
smoking_history         object
bmi                    float64
HbA1c_level            float64
blood_glucose_level      int64
diabetes                 int64
dtype: object

In [27]:
#DATA PROCESSING
# Check the value counts for smoking_history,  
diabetes_df["smoking_history"].value_counts()

No Info        35816
never          35095
former          9352
current         9286
not current     6447
ever            4004
Name: smoking_history, dtype: int64

In [28]:
# change "ever" to never.
diabetes_df['smoking_history'] = diabetes_df['smoking_history'].replace("ever","never")
diabetes_df["smoking_history"].value_counts()

never          39099
No Info        35816
former          9352
current         9286
not current     6447
Name: smoking_history, dtype: int64

In [29]:
#creating for our smoking_history and gender columns
dummies_df = diabetes_df[["smoking_history", "gender"]]
dummies = pd.get_dummies(dummies_df, dtype=float)
dummies.head()


Unnamed: 0,smoking_history_No Info,smoking_history_current,smoking_history_former,smoking_history_never,smoking_history_not current,gender_Female,gender_Male,gender_Other
0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
3,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [30]:
# define our target variabale 
y = diabetes_df['diabetes'].values.reshape(-1,1)

In [31]:
diabetes_df.drop(columns="diabetes")

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level
0,Female,80.0,0,1,never,25.19,6.6,140
1,Female,54.0,0,0,No Info,27.32,6.6,80
2,Male,28.0,0,0,never,27.32,5.7,158
3,Female,36.0,0,0,current,23.45,5.0,155
4,Male,76.0,1,1,current,20.14,4.8,155
...,...,...,...,...,...,...,...,...
99995,Female,80.0,0,0,No Info,27.32,6.2,90
99996,Female,2.0,0,0,No Info,17.37,6.5,100
99997,Male,66.0,0,0,former,27.83,5.7,155
99998,Female,24.0,0,0,never,35.42,4.0,100


In [32]:
diabetes_df = diabetes_df.merge(dummies, left_index=True, right_index=True)
diabetes_df.drop(columns=["smoking_history", "gender"], inplace=True)
diabetes_df.head()

Unnamed: 0,age,hypertension,heart_disease,bmi,HbA1c_level,blood_glucose_level,diabetes,smoking_history_No Info,smoking_history_current,smoking_history_former,smoking_history_never,smoking_history_not current,gender_Female,gender_Male,gender_Other
0,80.0,0,1,25.19,6.6,140,0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,54.0,0,0,27.32,6.6,80,0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,28.0,0,0,27.32,5.7,158,0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
3,36.0,0,0,23.45,5.0,155,0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
4,76.0,1,1,20.14,4.8,155,0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [33]:
#check the data types again
diabetes_df.dtypes

age                            float64
hypertension                     int64
heart_disease                    int64
bmi                            float64
HbA1c_level                    float64
blood_glucose_level              int64
diabetes                         int64
smoking_history_No Info        float64
smoking_history_current        float64
smoking_history_former         float64
smoking_history_never          float64
smoking_history_not current    float64
gender_Female                  float64
gender_Male                    float64
gender_Other                   float64
dtype: object

In [34]:
# define our features 
X = diabetes_df

In [35]:
# create our training and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30)

In [36]:
# Create and scaling our data sets 
StanScale = StandardScaler()

X_scaler = StanScale.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [47]:
len(X_train_scaled[0])
# X_train

15

In [50]:
n_input_features = len(X_train_scaled[0])
n_nodes_L1 = 45
n_nodes_L2 = 20
# n_nodes_L3

nn = tf.keras.models.Sequential()

nn.add(tf.keras.layers.Dense(units=n_nodes_L1, input_dim=n_input_features, activation="relu"))
nn.add(tf.keras.layers.Dense(units=n_nodes_L2, activation="relu"))
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 45)                720       
                                                                 
 dense_1 (Dense)             (None, 20)                920       
                                                                 
 dense_2 (Dense)             (None, 1)                 21        
                                                                 
Total params: 1661 (6.49 KB)
Trainable params: 1661 (6.49 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [51]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [56]:
len(y_train)
len(X_train_scaled)

75000

In [60]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=15,
        max_value=30,
        step=2), activation=activation, input_dim=15))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 3)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [61]:
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=15,
    hyperband_iterations=2)

INFO:tensorflow:Reloading Tuner from .\untitled_project\tuner0.json


In [62]:
tuner.search(X_train_scaled, y_train, epochs=15,validation_data=(X_test_scaled,y_test))

Trial 9 Complete [00h 00m 08s]
val_accuracy: 1.0

Best val_accuracy So Far: 1.0
Total elapsed time: 00h 01m 00s

Search: Running Trial #10

Value             |Best Value So Far |Hyperparameter
tanh              |tanh              |activation
25                |23                |first_units
1                 |1                 |num_layers
7                 |9                 |units_0
9                 |3                 |units_1
3                 |None              |units_2
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2
Epoch 2/2

KeyboardInterrupt: 