In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
jeo_gen_df = pd.read_csv('./Resources/final_gender_jeop.csv')
jeo_gen_df.head()

In [None]:
new_df = jeo_gen_df.drop(['First_Name','Last_Name','city','state','Year','Gender'], axis=1)
new_df.head()

In [None]:
new_df_converted_to_numerical = pd.get_dummies(new_df)
new_df_converted_to_numerical

In [None]:
# Split our preprocessed data into our features and target arrays
X = new_df_converted_to_numerical.drop(['winner_flg'], axis =1)
y = new_df_converted_to_numerical['winner_flg']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=1, stratify= y)
     

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
#Define a process that creates a new Sequential model with hyperparameter options

number_input_features = len(X_train_scaled[0])

def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh', 'sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=100,
        step=5), activation=activation, input_dim=number_input_features))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=100,
            step=5),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [None]:
#Import kerastuner library and instantiate the tuner to perform the hypertuning.
!pip install keras-tuner --upgrade
import keras_tuner as kt
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=2,
    hyperband_iterations=2)

# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))
     

In [None]:
#Print tuner results to determine the most accurate models to use on the test data
tuner.results_summary(3)

In [None]:
# Get top 3 model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
    print(param.values)


In [None]:
# Evaluate the top 3 models against the test dataset
top_model = tuner.get_best_models(3)
for model in top_model:
    model_loss, model_accuracy = model.evaluate(X_test_scaled,y_test,verbose=2)
    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_test_scaled[0])

nn_model = tf.keras.models.Sequential()

# First hidden layer 
nn_model.add(tf.keras.layers.Dense(units=81, activation="sigmoid", input_dim=number_input_features))

# Second hidden layer 
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Third hidden layer -- Changed the activation functiom from "Relu" to "tanh" (Model Optimization Method 2)
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Fourth hidden layer -- Changed the activation functiom from "Relu" to "tanh" (Model Optimization Method 2)
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))


# Output layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model.summary()
     
     

In [None]:
# Compile the model
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

#Reduced number of epochs from 100 to 50 for better optimization (Model Optimization Method 3)
fit_model = nn_model.fit(X_test_scaled, y_test, epochs=100) 

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export this model to HDF5
filename = '/content/drive/MyDrive/Jeopardy_Gender_Optimization.h5'
nn_model.save(filename)