# Tuning, Part 2

We've been able to obtain a fairly reliable 73% accuracy after tuning once, removing less-impactful features, and consolidating less-impactful values for some of the features, but we are still 1.7% short of our goal, so we're going to do one more round of hyperparameter tuning, using the features obtained from previous tests.

~~If this doesn't get to 75% accuracy, I will have to admit defeat, because I will know no other ways of optimizing it.~~

In [1]:
# Import our dependencies
from pathlib import Path
import shutil
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import keras_tuner as kt

Using TensorFlow backend


In [2]:
#  Import and read the charity_data.csv.
#application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df = pd.read_csv("Data/charity_data.csv")[["APPLICATION_TYPE", "AFFILIATION", "CLASSIFICATION", "ORGANIZATION", "INCOME_AMT", "IS_SUCCESSFUL"]]
application_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,ORGANIZATION,INCOME_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,Association,0,1
1,T3,Independent,C2000,Co-operative,1-9999,1
2,T5,CompanySponsored,C3000,Association,0,0
3,T3,CompanySponsored,C2000,Trust,10000-24999,1
4,T3,Independent,C1000,Trust,100000-499999,1


In [3]:
# Choose a cutoff value and create a list of application types to be replaced
for app in application_df['APPLICATION_TYPE'].value_counts().index[8:]:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Choose a cutoff value and create a list of affiliations to be replaced
for app in application_df['AFFILIATION'].value_counts().index[2:]:
    application_df['AFFILIATION'] = application_df['AFFILIATION'].replace(app,"Other")

# Choose a cutoff value and create a list of classifications to be replaced
for cls in application_df['CLASSIFICATION'].value_counts().index[5:]:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")

# Choose a cutoff value and create a list of organizations to be replaced
for app in application_df['ORGANIZATION'].value_counts().index[2:]:
    application_df['ORGANIZATION'] = application_df['ORGANIZATION'].replace(app,"Other")

# Choose a cutoff value and create a list of income amounts to be replaced
for inc in application_df['INCOME_AMT'].value_counts().index[3:]:
    application_df['INCOME_AMT'] = application_df['INCOME_AMT'].replace(inc,"Other")

# Convert categorical data to numeric with `pd.get_dummies`
application_df = pd.get_dummies(application_df)
application_df.head()

Unnamed: 0,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,APPLICATION_TYPE_T8,...,CLASSIFICATION_C2100,CLASSIFICATION_C3000,CLASSIFICATION_Other,ORGANIZATION_Association,ORGANIZATION_Other,ORGANIZATION_Trust,INCOME_AMT_0,INCOME_AMT_100000-499999,INCOME_AMT_25000-99999,INCOME_AMT_Other
0,1,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
1,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
2,0,0,0,0,0,0,1,0,0,0,...,0,1,0,1,0,0,1,0,0,0
3,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
4,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0


In [4]:
# Split our preprocessed data into our features and target arrays
features = application_df.drop("IS_SUCCESSFUL", axis=1)
target = application_df["IS_SUCCESSFUL"]

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=1)

In [5]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Compile, Train and Evaluate the Model

In [6]:
num_features = len(features.columns)
optimizer_map = {
    'Adadelta': tf.keras.optimizers.Adadelta,
    'Adafactor': tf.keras.optimizers.Adafactor,
    'Adagrad': tf.keras.optimizers.Adagrad,
    'Adam': tf.keras.optimizers.Adam,
    'AdamW': tf.keras.optimizers.AdamW,
    'Adamax': tf.keras.optimizers.Adamax,
    'Ftrl': tf.keras.optimizers.Ftrl,
    'Lion': tf.keras.optimizers.Lion,
    'Nadam': tf.keras.optimizers.Nadam,
    'RMSprop': tf.keras.optimizers.RMSprop,
    'SGD': tf.keras.optimizers.SGD
}

# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    model = tf.keras.models.Sequential()

    # Register hyperparams up here so they appear in the order we want
    min_neurons = 2
    max_neurons = 128
    min_layers = 2
    max_layers = 4
    num_layers = hp.Int('num_hidden_layers', min_layers, max_layers)
    activation_funcs = ['relu', 'selu', 'gelu', 'tanh', 'sigmoid']
    activations = []
    unit_counts = []
    for i in range(max_layers):
        activations.append(hp.Choice(f'activation_{i}', activation_funcs))
        unit_counts.append(hp.Int(f'units_{i}', min_neurons, max_neurons))

    # Allow kerastuner to decide number of layers and neurons, and activation funcs
    for i in range(num_layers):
        if i == 0:
            model.add(tf.keras.layers.Dense(unit_counts[i], activation=activations[i], input_dim=num_features))
        else:
            model.add(tf.keras.layers.Dense(unit_counts[i], activation=activations[i]))
    
    activation = hp.Choice(f'activation_output', activation_funcs)
    model.add(
        tf.keras.layers.Dense(
            hp.Fixed("units_output", 1),
            activation=activation
        )
    )

    optimizer = hp.Choice(
        "optimizer",
        [
            'Adadelta',
            'Adafactor',
            'Adagrad',
            'Adam',
            'AdamW',
            'Adamax',
            'Ftrl',
            'Lion',
            'Nadam',
            'RMSprop',
            'SGD'
        ]
    )
    # Compile the model
    model.compile(loss="binary_crossentropy", optimizer=optimizer_map[optimizer](), metrics=["accuracy"])

    return model

In [7]:
# Create a `Hyperband()` tuner instance
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=100,
    hyperband_iterations=4
)

In [8]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,batch_size=128,validation_data=(X_test_scaled,y_test))

Trial 1016 Complete [00h 00m 49s]
val_accuracy: 0.7292128205299377

Best val_accuracy So Far: 0.731195330619812
Total elapsed time: 02h 19m 21s


In [9]:
# Get top 3 model hyperparameters and print the values
best_hypers = tuner.get_best_hyperparameters(5)
for best_hyper in best_hypers:
    print(best_hyper.values)

{'num_hidden_layers': 3, 'activation_0': 'tanh', 'units_0': 53, 'activation_1': 'gelu', 'units_1': 96, 'activation_2': 'tanh', 'units_2': 65, 'activation_3': 'tanh', 'units_3': 102, 'activation_output': 'tanh', 'units_output': 1, 'optimizer': 'SGD', 'tuner/epochs': 100, 'tuner/initial_epoch': 34, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0990'}
{'num_hidden_layers': 3, 'activation_0': 'tanh', 'units_0': 53, 'activation_1': 'gelu', 'units_1': 96, 'activation_2': 'tanh', 'units_2': 65, 'activation_3': 'tanh', 'units_3': 102, 'activation_output': 'tanh', 'units_output': 1, 'optimizer': 'SGD', 'tuner/epochs': 34, 'tuner/initial_epoch': 12, 'tuner/bracket': 2, 'tuner/round': 1, 'tuner/trial_id': '0989'}
{'num_hidden_layers': 3, 'activation_0': 'selu', 'units_0': 51, 'activation_1': 'gelu', 'units_1': 114, 'activation_2': 'relu', 'units_2': 45, 'activation_3': 'sigmoid', 'units_3': 107, 'activation_output': 'sigmoid', 'units_output': 1, 'optimizer': 'Adamax', 'tuner/epochs': 1

In [10]:
# Evaluate the top 3 models against the test dataset
best_models = tuner.get_best_models(5)
for best_model in best_models:
    model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.5641 - accuracy: 0.7312 - 499ms/epoch - 2ms/step
Loss: 0.5640894174575806, Accuracy: 0.731195330619812
268/268 - 0s - loss: 0.5619 - accuracy: 0.7307 - 491ms/epoch - 2ms/step
Loss: 0.5618554353713989, Accuracy: 0.7307288646697998
268/268 - 0s - loss: 0.5582 - accuracy: 0.7306 - 497ms/epoch - 2ms/step
Loss: 0.5582293272018433, Accuracy: 0.7306122183799744
268/268 - 1s - loss: 0.5577 - accuracy: 0.7305 - 509ms/epoch - 2ms/step
Loss: 0.5576839447021484, Accuracy: 0.7304956316947937
268/268 - 0s - loss: 0.5774 - accuracy: 0.7305 - 488ms/epoch - 2ms/step
Loss: 0.5773752331733704, Accuracy: 0.7304956316947937


In [11]:
# Clear checkpoints from tuning run
shutil.rmtree(Path('./untitled_project/'))

# Analysis

In the end, it looks like we couldn't increase our accuracy at all. Random chance fluctuations end up outweighing any gains we've made due to tuning and tweaking, given that our best accuracies now are less than 0.25% higher than our initial attempt.

This may simply be the result of a good initial set of guesses on my part, but I can't help feeling there should be a way to get the accuracy to the 75% suggested by the assignment.