In [9]:
# Importing necessary libraries
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Reading the data
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

# Dropping non-beneficial columns
application_df = application_df.drop(columns=['EIN', 'NAME'])
application_df.head()


Optimizer: SGD
  Units: 16, Layers: 2
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
  Units: 16, Layers: 3
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
  Units: 16, Layers: 4
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
  Units: 64, Layers: 2
Epoch 1/25
Epoch 2/2

In [None]:

# Reducing categorical variables
application_counts = application_df['APPLICATION_TYPE'].value_counts()
application_types_to_replace = application_counts[application_counts < 500].index.tolist()
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app, "Other")

classification_counts = application_df['CLASSIFICATION'].value_counts()
classification_types_to_replace = classification_counts[classification_counts < 1000].index.tolist()
for cls in classification_types_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls, "Other")


In [None]:
# One-hot encoding
application_df = pd.get_dummies(application_df)

# Splitting the data into features and target
y = application_df['IS_SUCCESSFUL'].values
X = application_df.drop(columns=['IS_SUCCESSFUL']).values

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Defining model creation function
def create_model(units, layers):
    nn = tf.keras.Sequential()
    nn.add(tf.keras.layers.Dense(units=256, activation='relu', input_dim=X_train.shape[1]))
    for _ in range(layers - 1):
        nn.add(tf.keras.layers.Dense(units=units, activation='relu'))
    nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
    return nn


In [None]:
# Defining combinations of units and layers
units_values = [16, 64, 128]
layers_values = [2, 3, 4]
combinations = [(units, layers) for units in units_values for layers in layers_values]



In [None]:
# Custom loss function
def custom_loss_function(y_true, y_pred):
    y_true = tf.cast(y_true, dtype=y_pred.dtype)
    return tf.reduce_mean(tf.square(y_true - y_pred))



In [None]:
# Optimizers
Optimizers = {
    'SGD': tf.keras.optimizers.SGD,
    'RMSprop': tf.keras.optimizers.RMSprop,
    'Adagrad': tf.keras.optimizers.Adagrad,
    'Adadelta': tf.keras.optimizers.Adadelta,
    'Adam': tf.keras.optimizers.Adam,
    'Adamax': tf.keras.optimizers.Adamax,
    'Nadam': tf.keras.optimizers.Nadam,
    'Ftrl': tf.keras.optimizers.Ftrl
}

# Create an empty list to store results
results = []
with tf.keras.utils.custom_object_scope({'custom_loss_function': custom_loss_function}):
    # Loop through each optimizer
    for optimizer_name, optimizer_class in Optimizers.items():
        print(f"Optimizer: {optimizer_name}")
        # results = {}  # Dictionary to store results for this optimizer
        
        # Loop through each combination of units and layers
        for units, layers in combinations:
            print(f"  Units: {units}, Layers: {layers}")
            
            # Create and compile the model
            model = create_model(units, layers)
            model.compile(loss='custom_loss_function', optimizer=optimizer_class(), metrics=['accuracy'])
            
            # Fit the model
            model.fit(X_train_scaled, y_train, epochs=25, batch_size=4096, verbose=1)
            
            # Evaluate the model
            _, accuracy = model.evaluate(X_test_scaled, y_test, verbose=1)
            
            # Store the results for this combination
            results.append({'Optimizer': optimizer_name, 'Units': units, 'Layers': layers, 'Accuracy': accuracy})

# Convert the list of dictionaries to a DataFrame
results_df = pd.DataFrame(results)

In [12]:
len(results_df)

72

In [13]:
results_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Optimizer  72 non-null     object 
 1   Units      72 non-null     int64  
 2   Layers     72 non-null     int64  
 3   Accuracy   72 non-null     float64
dtypes: float64(1), int64(2), object(1)
memory usage: 2.4+ KB


In [14]:
results_df_sorted = results_df.sort_values(by=['Accuracy', 'Layers', 'Units'], ascending=[False, True, True])
display(results_df_sorted.head(50))
display(results_df_sorted.tail(len(results_df_sorted)-50))

Unnamed: 0,Optimizer,Units,Layers,Accuracy
44,Adam,128,4,0.730758
13,RMSprop,64,3,0.730029
57,Nadam,64,2,0.7293
54,Nadam,16,2,0.729155
55,Nadam,16,3,0.728863
53,Adamax,128,4,0.728863
40,Adam,64,3,0.728571
56,Nadam,16,4,0.728571
39,Adam,64,2,0.728426
50,Adamax,64,4,0.728426


Unnamed: 0,Optimizer,Units,Layers,Accuracy
20,Adagrad,16,4,0.565889
23,Adagrad,64,4,0.564286
25,Adagrad,128,3,0.554082
34,Adadelta,128,3,0.55
29,Adadelta,16,4,0.534402
18,Adagrad,16,2,0.534111
63,Ftrl,16,2,0.534111
66,Ftrl,64,2,0.534111
69,Ftrl,128,2,0.534111
64,Ftrl,16,3,0.534111


In [15]:
# Select the best model (top row after sorting by accuracy)
best_model_row = results_df_sorted.iloc[0]

# Extract units and layers from the best model row
best_units = best_model_row['Units']
best_layers = best_model_row['Layers']
best_model = create_model(best_units, best_layers)
best_model.compile(loss='binary_crossentropy', optimizer=best_model_row['Optimizer'], metrics=['accuracy'])
# Fit the best model
best_model.fit(X_train_scaled, y_train, epochs=50, batch_size=4096, verbose=1)

# Evaluate the best model
_, best_accuracy = best_model.evaluate(X_test_scaled, y_test, verbose=1)

# Save the best model
best_model.save('AlphabetSoupCharity_Optimization.h5')

# Print the accuracy of the best model
print(f"Accuracy of the best model: {best_accuracy}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy of the best model: 0.7208454608917236


In [16]:
best_model_row

Optimizer        Adam
Units             128
Layers              4
Accuracy     0.730758
Name: 44, dtype: object