In [1]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [2]:
# Loading the data
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
df = pd.read_csv(url)

In [3]:
# Preprocessing the data
# Drop columns 'EIN' and 'NAME'
df = df.drop(columns=['EIN', 'NAME'])

In [4]:
# Binning for 'APPLICATION_TYPE' column
application_type_counts = df['APPLICATION_TYPE'].value_counts()
cutoff = 500
other_types = list(application_type_counts[application_type_counts < cutoff].index)
df['APPLICATION_TYPE'] = df['APPLICATION_TYPE'].apply(lambda x: 'Other' if x in other_types else x)

In [5]:
# Binning for 'CLASSIFICATION' column
classification_counts = df['CLASSIFICATION'].value_counts()
cutoff = 1000
other_classifications = list(classification_counts[classification_counts < cutoff].index)
df['CLASSIFICATION'] = df['CLASSIFICATION'].apply(lambda x: 'Other' if x in other_classifications else x)

In [6]:
# Convert categorical data to numeric using one-hot encoding
df = pd.get_dummies(df)

In [7]:
# Splitting the data into features and target
X = df.drop(columns=['IS_SUCCESSFUL'])
y = df['IS_SUCCESSFUL']

In [8]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [9]:
# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['sigmoid', 'relu','tanh'])

    # Allow kerastuner to decide number of neurons in first layer
    model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
    min_value=1,
    max_value=30,
    step=5), activation=activation, input_dim=X_train_scaled.shape[1]))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 8)):
        model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=30,
            step=5),
            activation=activation))

    model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    return model

In [11]:
! pip install keras_tuner



In [12]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

Reloading Tuner from ./untitled_project/tuner0.json


In [13]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

In [14]:
# Get top 3 model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
    print(param.values)

{'activation': 'relu', 'first_units': 26, 'num_layers': 2, 'units_0': 21, 'units_1': 6, 'units_2': 21, 'units_3': 21, 'units_4': 21, 'units_5': 6, 'units_6': 6, 'units_7': 21, 'tuner/epochs': 20, 'tuner/initial_epoch': 7, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0019'}
{'activation': 'relu', 'first_units': 21, 'num_layers': 1, 'units_0': 6, 'units_1': 6, 'units_2': 21, 'units_3': 1, 'units_4': 16, 'units_5': 1, 'units_6': 21, 'units_7': 1, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}
{'activation': 'tanh', 'first_units': 6, 'num_layers': 6, 'units_0': 1, 'units_1': 26, 'units_2': 1, 'units_3': 6, 'units_4': 11, 'units_5': 1, 'units_6': 21, 'units_7': 21, 'tuner/epochs': 20, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}


In [15]:
# Evaluate the top 3 models against the test dataset
top_models = tuner.get_best_models(3)
for i, model in enumerate(top_models, start=1):
    model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test, verbose=2)
    print(f"Model {i}: Loss: {model_loss}, Accuracy: {model_accuracy}")


268/268 - 1s - loss: 0.5534 - accuracy: 0.7279 - 1s/epoch - 5ms/step
Model 1: Loss: 0.5534289479255676, Accuracy: 0.7279300093650818
268/268 - 1s - loss: 0.5613 - accuracy: 0.7265 - 1s/epoch - 4ms/step
Model 2: Loss: 0.5612772107124329, Accuracy: 0.7265306115150452
268/268 - 1s - loss: 0.5798 - accuracy: 0.7264 - 959ms/epoch - 4ms/step
Model 3: Loss: 0.579780101776123, Accuracy: 0.7264139652252197


In [16]:
# Saving the model
model.save("AlphabetSoupCharity_Optimization.h5")

  saving_api.save_model(
