In [1]:
# Import our dependencies

!pip install keras-tuner -q
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import keras_tuner as kt


#  Import and read the charity_data.csv.
application_df = pd.read_csv("Resources/KaggleV2-May-2016.csv")
application_df.head()

Unnamed: 0,PatientId,AppointmentID,Gender,Day of Week,ScheduledDay,AppointmentDay,Age,Neighbourhood,Scholarship,Hipertension,Diabetes,Alcoholism,Handcap,SMS_received,No-show,Unnamed: 15
0,29872500000000.0,5642903,F,Friday,2016-04-29T18:38:08Z,2016-04-29T00:00:00Z,62,JARDIM DA PENHA,0,1,0,0,0,0,No,
1,558998000000000.0,5642503,M,Friday,2016-04-29T16:08:27Z,2016-04-29T00:00:00Z,56,JARDIM DA PENHA,0,0,0,0,0,0,No,
2,4262960000000.0,5642549,F,Friday,2016-04-29T16:19:04Z,2016-04-29T00:00:00Z,62,MATA DA PRAIA,0,0,0,0,0,0,No,
3,867951000000.0,5642828,F,Friday,2016-04-29T17:29:31Z,2016-04-29T00:00:00Z,8,PONTAL DE CAMBURI,0,0,0,0,0,0,No,
4,8841190000000.0,5642494,F,Friday,2016-04-29T16:07:23Z,2016-04-29T00:00:00Z,56,JARDIM DA PENHA,0,1,1,0,0,0,No,


In [2]:
# Drop the non-beneficial ID columns.
application_df = application_df.drop(columns=["PatientId","AppointmentID", "ScheduledDay", "AppointmentDay", "Neighbourhood", "Unnamed: 15"], axis=1)
application_df.head()

Unnamed: 0,Gender,Day of Week,Age,Scholarship,Hipertension,Diabetes,Alcoholism,Handcap,SMS_received,No-show
0,F,Friday,62,0,1,0,0,0,0,No
1,M,Friday,56,0,0,0,0,0,0,No
2,F,Friday,62,0,0,0,0,0,0,No
3,F,Friday,8,0,0,0,0,0,0,No
4,F,Friday,56,0,1,1,0,0,0,No


In [3]:
cleaned_df = application_df.rename(columns={'Hipertension':'Hypertension', 'Handcap':'Handicap'})
cleaned_df.head()

Unnamed: 0,Gender,Day of Week,Age,Scholarship,Hypertension,Diabetes,Alcoholism,Handicap,SMS_received,No-show
0,F,Friday,62,0,1,0,0,0,0,No
1,M,Friday,56,0,0,0,0,0,0,No
2,F,Friday,62,0,0,0,0,0,0,No
3,F,Friday,8,0,0,0,0,0,0,No
4,F,Friday,56,0,1,1,0,0,0,No


In [4]:
# Using the map() method
cleaned_df['No-show'] = cleaned_df['No-show'].map({'Yes': 1, 'No': 0})

# Using the replace() method
#cleaned_df = cleaned_df['No-show'].replace({'yes': 1, 'no': 0}, inplace=True)
cleaned_df.head()

Unnamed: 0,Gender,Day of Week,Age,Scholarship,Hypertension,Diabetes,Alcoholism,Handicap,SMS_received,No-show
0,F,Friday,62,0,1,0,0,0,0,0
1,M,Friday,56,0,0,0,0,0,0,0
2,F,Friday,62,0,0,0,0,0,0,0
3,F,Friday,8,0,0,0,0,0,0,0
4,F,Friday,56,0,1,1,0,0,0,0


In [8]:
no_scholarship_df = cleaned_df.drop(columns={'Scholarship'})
no_scholarship_df.head()

Unnamed: 0,Gender,Day of Week,Age,Hypertension,Diabetes,Alcoholism,Handicap,SMS_received,No-show
0,F,Friday,62,1,0,0,0,0,0
1,M,Friday,56,0,0,0,0,0,0
2,F,Friday,62,0,0,0,0,0,0
3,F,Friday,8,0,0,0,0,0,0
4,F,Friday,56,1,1,0,0,0,0


In [9]:
application_convert = pd.get_dummies(no_scholarship_df)
application_convert

Unnamed: 0,Age,Hypertension,Diabetes,Alcoholism,Handicap,SMS_received,No-show,Gender_F,Gender_M,Day of Week_Friday,Day of Week_Monday,Day of Week_Saturday,Day of Week_Thursday,Day of Week_Tuesday,Day of Week_Wednesday
0,62,1,0,0,0,0,0,1,0,1,0,0,0,0,0
1,56,0,0,0,0,0,0,0,1,1,0,0,0,0,0
2,62,0,0,0,0,0,0,1,0,1,0,0,0,0,0
3,8,0,0,0,0,0,0,1,0,1,0,0,0,0,0
4,56,1,1,0,0,0,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110522,56,0,0,0,0,1,0,1,0,0,0,0,0,1,0
110523,51,0,0,0,0,1,0,1,0,0,0,0,0,1,0
110524,21,0,0,0,0,1,0,1,0,0,0,0,0,0,1
110525,38,0,0,0,0,1,0,1,0,0,0,0,0,0,1


In [10]:
X = application_convert.drop(["No-show"], axis=1)
y = application_convert["No-show"].values

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [12]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [13]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value= 10,
        step=5), activation=activation, input_dim=X_train_scaled.shape[1]))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=5),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [14]:
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

INFO:tensorflow:Reloading Tuner from .\untitled_project\tuner0.json


In [15]:
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 60 Complete [00h 01m 52s]
val_accuracy: 0.7980602383613586

Best val_accuracy So Far: 0.7981687784194946
Total elapsed time: 00h 41m 23s
INFO:tensorflow:Oracle triggered exit


In [16]:
top_hyper = tuner.get_best_hyperparameters()[0]
top_hyper.values

{'activation': 'relu',
 'first_units': 6,
 'num_layers': 1,
 'units_0': 6,
 'units_1': 1,
 'units_2': 6,
 'units_3': 6,
 'units_4': 1,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [17]:
best_model = tuner.get_best_models()[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

864/864 - 1s - loss: 0.4922 - accuracy: 0.7982
Loss: 0.49215075373649597, Accuracy: 0.7981687784194946


In [20]:
best_model.save('../Downloads/no_scholarship_no_show.h5')