# Alphabet Soup Charity Model Training and Optimization

In [None]:

# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Load the charity_data.csv into a Pandas DataFrame
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
application_df = pd.read_csv(url)

# Display the first few rows of the dataframe
application_df.head()

# Drop the EIN and NAME columns
application_df = application_df.drop(columns=["EIN", "NAME"])

# Check the number of unique values in each column
unique_values = application_df.nunique()
print(unique_values)

# For columns with more than 10 unique values, count the occurrences for each unique value
for column in application_df.columns:
    if application_df[column].nunique() > 10:
        print(application_df[column].value_counts())

# Group rare categorical variables in 'APPLICATION_TYPE' as 'Other'
application_counts = application_df.APPLICATION_TYPE.value_counts()
rare_application_types = application_counts[application_counts < 1000].index
application_df.APPLICATION_TYPE = application_df.APPLICATION_TYPE.replace(rare_application_types, 'Other')

# Check if the grouping was successful
print(application_df.APPLICATION_TYPE.value_counts())

# Apply the same logic to other categorical variables with rare occurrences
# Example for 'CLASSIFICATION'
classification_counts = application_df.CLASSIFICATION.value_counts()
rare_classifications = classification_counts[classification_counts < 1000].index
application_df.CLASSIFICATION = application_df.CLASSIFICATION.replace(rare_classifications, 'Other')

# Check if the grouping was successful
print(application_df.CLASSIFICATION.value_counts())

# Use pd.get_dummies() to encode categorical variables
application_df = pd.get_dummies(application_df)

# Split the preprocessed data into features (X) and target (y)
X = application_df.drop(columns=["IS_SUCCESSFUL"])
y = application_df["IS_SUCCESSFUL"]

# Split the data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the training and testing features datasets
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Print the shapes of the datasets to confirm
print(X_train_scaled.shape, X_test_scaled.shape, y_train.shape, y_test.shape)


## Step 2: Compile, Train, and Evaluate the Model

In [None]:

# Import TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the number of input features
input_features = X_train_scaled.shape[1]

# Create the Sequential model
model = Sequential()

# Add the first hidden layer
model.add(Dense(units=80, activation='relu', input_dim=input_features))

# Add a second hidden layer
model.add(Dense(units=30, activation='relu'))

# Add the output layer
model.add(Dense(units=1, activation='sigmoid'))

# Check the structure of the model
model.summary()

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define a callback to save the model's weights
checkpoint = ModelCheckpoint(filepath="AlphabetSoupCharity.h5", save_weights_only=False, save_freq='epoch', verbose=1)

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=100, callbacks=[checkpoint], validation_data=(X_test_scaled, y_test))

# Evaluate the model using the test data
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Model Loss: {loss}, Model Accuracy: {accuracy}")

# Save the model to an HDF5 file
model.save("AlphabetSoupCharity.h5")


## Step 3: Optimize the Model

In [None]:

from tensorflow.keras.layers import Dropout

# Create a new Sequential model with additional layers and dropout
model_optimized = Sequential()

# Input layer and first hidden layer with more neurons
model_optimized.add(Dense(units=100, activation='relu', input_dim=input_features))

# Add a dropout layer to prevent overfitting
model_optimized.add(Dropout(0.2))

# Second hidden layer with increased neurons
model_optimized.add(Dense(units=50, activation='relu'))

# Add another dropout layer
model_optimized.add(Dropout(0.2))

# Third hidden layer
model_optimized.add(Dense(units=25, activation='relu'))

# Output layer with sigmoid activation for binary classification
model_optimized.add(Dense(units=1, activation='sigmoid'))

# Check the structure of the optimized model
model_optimized.summary()

# Compile the optimized model
model_optimized.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define a new callback to save the optimized model's weights
checkpoint_optimized = ModelCheckpoint(filepath="AlphabetSoupCharity_Optimization.h5", save_weights_only=False, save_freq='epoch', verbose=1)

# Train the optimized model
history_optimized = model_optimized.fit(X_train_scaled, y_train, epochs=150, callbacks=[checkpoint_optimized], validation_data=(X_test_scaled, y_test))

# Evaluate the optimized model using the test data
loss_optimized, accuracy_optimized = model_optimized.evaluate(X_test_scaled, y_test)
print(f"Optimized Model Loss: {loss_optimized}, Optimized Model Accuracy: {accuracy_optimized}")

# Save the optimized model to an HDF5 file
model_optimized.save("AlphabetSoupCharity_Optimization.h5")
