In [146]:
# Set up dependencies   
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.callbacks import LambdaCallback
import tensorflow as tf
import pandas as pd 

In [147]:
#  Import and read the charity_data.csv.
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [148]:
# Drop the 'EIN' and 'NAME' columns.
colums_to_drop = ["EIN", "NAME"]    
application_df = application_df.drop(colums_to_drop, axis=1)
application_df.head()   

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [149]:
# Determine the number of unique values in each column.
application_df.nunique(axis=0)

APPLICATION_TYPE            17
AFFILIATION                  6
CLASSIFICATION              71
USE_CASE                     5
ORGANIZATION                 4
STATUS                       2
INCOME_AMT                   9
SPECIAL_CONSIDERATIONS       2
ASK_AMT                   8747
IS_SUCCESSFUL                2
dtype: int64

In [150]:
# Look at APPLICATION_TYPE value counts for binning
application_type = application_df.APPLICATION_TYPE.value_counts()
print(application_type)

T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64


## Optimization Attempt 1
Change cut off from 250 to 800    

In [151]:
# Cut off at 800 and create a list of application types to be replaced
cutoff_value_app = 800
application_types_to_replace = application_type[application_type < cutoff_value_app].index.tolist()

# Replace with "Other" in the dataframe
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app, "Other")

# Check to make sure binning was successful
print(application_df['APPLICATION_TYPE'].value_counts())

T3       27037
Other     2266
T4        1542
T6        1216
T5        1173
T19       1065
Name: APPLICATION_TYPE, dtype: int64


In [152]:
# Look at CLASSIFICATION value counts for binning
classification_counts = application_df.CLASSIFICATION.value_counts()
print(classification_counts)

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: CLASSIFICATION, Length: 71, dtype: int64


In [180]:
# Cut off at 1000 and create a list of classifications to be replaced
cut_off_value_class = 2000  
classifications_to_replace = classification_counts[classification_counts < cut_off_value_class].index.tolist()  

# Replace with "Other" in the dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
application_df['CLASSIFICATION'].value_counts()  

C1000    17326
C2000     6074
Other     6062
C1200     4837
Name: CLASSIFICATION, dtype: int64

In [181]:
# Check the data types of each column.  
application_df.dtypes

APPLICATION_TYPE          object
AFFILIATION               object
CLASSIFICATION            object
USE_CASE                  object
ORGANIZATION              object
STATUS                     int64
INCOME_AMT                object
SPECIAL_CONSIDERATIONS    object
ASK_AMT                    int64
IS_SUCCESSFUL              int64
dtype: object

In [182]:
# Convert categorical data to numeric with `pd.get_dummies`
application_df_dummies = pd.get_dummies(application_df)

# Display the resulting DataFrame
application_df_dummies.head()

Unnamed: 0,STATUS,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,AFFILIATION_CompanySponsored,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,1,108590,1,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,1,5000,0,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,1,0
3,1,6692,1,0,0,1,0,0,0,1,...,0,1,0,0,0,0,0,0,1,0
4,1,142590,1,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


In [183]:
# Split the preprocessed data into target and features arrays   
y = application_df_dummies["IS_SUCCESSFUL"].values
X = application_df_dummies.drop(["IS_SUCCESSFUL"], axis=1).values   

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [184]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler with the training data
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [185]:
number_input_features = len(X_train[0])
print(number_input_features)

38


## Optimization attempt 1 

Increase the number of neurons in the input layer

Add two hidden layers 

Increase neurons in each layer 

Change activation functions 

In [186]:
# Define the model
optimize_nn = tf.keras.models.Sequential()  

# Layer 0
optimize_nn.add(tf.keras.layers.Dense(units=128, input_dim=number_input_features, activation='relu'))   

# Layer 1
optimize_nn.add(tf.keras.layers.Dense(units=128, activation='relu'))

# Layer 2
optimize_nn.add(tf.keras.layers.Dense(units=128, activation='tanh'))

# Layer 3
optimize_nn.add(tf.keras.layers.Dense(units=128, activation='relu'))

# Layer 4
optimize_nn.add(tf.keras.layers.Dense(units=128, activation='tanh'))

# Layer 5
optimize_nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
optimize_nn.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 128)               4992      
                                                                 
 dense_17 (Dense)            (None, 128)               16512     
                                                                 
 dense_18 (Dense)            (None, 128)               16512     
                                                                 
 dense_19 (Dense)            (None, 128)               16512     
                                                                 
 dense_20 (Dense)            (None, 128)               16512     
                                                                 
 dense_21 (Dense)            (None, 1)                 129       
                                                                 
Total params: 71169 (278.00 KB)
Trainable params: 7116

In [187]:
# Compile the model
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
optimize_nn.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [188]:
# Train the model
optimize_nn.fit(X_train_scaled, y_train, epochs=100, callbacks=None)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x2a08fca00>

In [191]:
# Evaluate the model using the test data    
model_loss, model_accuracy = optimize_nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.5682 - accuracy: 0.7251 - 116ms/epoch - 431us/step
Loss: 0.568240761756897, Accuracy: 0.7251312136650085


## Attempt 2
Increase the number of neurons in the hidden layers 

Increase the number of epochs 

Use relu for the hidden layers and sigmoid for the output layer  

In [192]:
# Define the model 
optimize_nn2 = tf.keras.models.Sequential()  

# Layer 0
optimize_nn2.add(tf.keras.layers.Dense(units=200, input_dim=number_input_features, activation='relu'))   

# Layer 1
optimize_nn2.add(tf.keras.layers.Dense(units=200, activation='relu'))

# Layer 2
optimize_nn2.add(tf.keras.layers.Dense(units=200, activation='relu'))

# Layer 3 
optimize_nn2.add(tf.keras.layers.Dense(units=200, activation='relu'))

# Layer 4
optimize_nn.add(tf.keras.layers.Dense(units=200, activation='relu'))

# Layer 5
optimize_nn2.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
optimize_nn2.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_22 (Dense)            (None, 200)               7800      
                                                                 
 dense_23 (Dense)            (None, 200)               40200     
                                                                 
 dense_24 (Dense)            (None, 200)               40200     
                                                                 
 dense_25 (Dense)            (None, 200)               40200     
                                                                 
 dense_27 (Dense)            (None, 1)                 201       
                                                                 
Total params: 128601 (502.35 KB)
Trainable params: 128601 (502.35 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [193]:
# Compile the model
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
optimize_nn2.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [194]:
# Train the model
optimize_nn2.fit(X_train_scaled, y_train, epochs=200, callbacks=None)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x2a1118c10>

In [195]:
# Evaluate the model using the test data    
model_loss, model_accuracy = optimize_nn2.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.8017 - accuracy: 0.7244 - 191ms/epoch - 714us/step
Loss: 0.8017259836196899, Accuracy: 0.7244315147399902


## Use keras tuner to try to find the best hyperparameters for the model   

In [197]:
# Set up keras tuner
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch


def build_model(hp):
    tuner_model = keras.Sequential()

    # Choose the number of layers
    for i in range(hp.Int('num_layers', min_value=1, max_value=5)):
        # Choose the number of neurons in each layer
        tuner_model.add(layers.Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), 
                                     activation=hp.Choice(f'activation_{i}', ['relu', 'tanh', 'elu'])))
    
    tuner_model.add(layers.Dense(1, activation='sigmoid'))

    tuner_model.compile(optimizer=keras.optimizers.legacy.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return tuner_model

# Initialize the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    project_name='Tuner_hypers' 
)

# Search for the best hyperparameter configuration
tuner.search(X_train_scaled, y_train, epochs=100, validation_data=(X_train_scaled, y_train))

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the final model with the best hyperparameters
final_model = tuner.hypermodel.build(best_hps)

Trial 10 Complete [00h 01m 34s]
val_accuracy: 0.7395817041397095

Best val_accuracy So Far: 0.7395817041397095
Total elapsed time: 00h 24m 55s


In [198]:
# Print the best hyperparameters
print("Best Hyperparameters:")
print(f"Number of Layers: {best_hps.get('num_layers')}")
for i in range(best_hps.get('num_layers')):
    print(f"Units in Layer {i}: {best_hps.get(f'units_{i}')}")
    print(f"Activation in Layer {i}: {best_hps.get(f'activation_{i}')}")
print(f"Learning Rate: {best_hps.get('learning_rate')}")

Best Hyperparameters:
Number of Layers: 3
Units in Layer 0: 32
Activation in Layer 0: tanh
Units in Layer 1: 192
Activation in Layer 1: tanh
Units in Layer 2: 256
Activation in Layer 2: tanh
Learning Rate: 0.001


## Attempt 3
Use the results from the RandomSearch to build a new model with the best hyperparameters.    

In [203]:
# Define the model
optimize_nn3 = tf.keras.models.Sequential()  

# Layer 0
optimize_nn3.add(tf.keras.layers.Dense(units=32, input_dim=number_input_features, activation='tanh'))   

# Layer 1
optimize_nn3.add(tf.keras.layers.Dense(units=192, activation='tanh'))

# Layer 2
optimize_nn3.add(tf.keras.layers.Dense(units=256, activation='tanh'))

# Output layer 
optimize_nn3.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model

optimize_nn3.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 32)                1248      
                                                                 
 dense_17 (Dense)            (None, 192)               6336      
                                                                 
 dense_18 (Dense)            (None, 256)               49408     
                                                                 
 dense_19 (Dense)            (None, 1)                 257       
                                                                 
Total params: 57249 (223.63 KB)
Trainable params: 57249 (223.63 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [204]:
# Compile the model
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
optimize_nn3.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) 

In [205]:
# Train the model
optimize_nn3.fit(X_train_scaled, y_train, epochs=100, callbacks=None)   

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x2c575ba00>

In [206]:
# # Evaluate the model using the test data
model_loss, model_accuracy = optimize_nn3.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.5657 - accuracy: 0.7163 - 159ms/epoch - 592us/step
Loss: 0.5657157897949219, Accuracy: 0.7162682414054871
