## Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from keras.callbacks import ModelCheckpoint
import numpy as np
#  Import and read the charity_data.csv.
import pandas as pd 
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

In [None]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.

application_df.drop(['EIN', 'NAME'], axis=1, inplace=True)
application_df.head()

In [None]:
# Determine the number of unique values in each column.
unique_value_counts = application_df.nunique()
print(unique_value_counts)

In [None]:
# Look at APPLICATION_TYPE value counts for binning
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
application_type_counts

In [None]:
# Choose a cutoff value
cutoff_value = 500  # You can adjust this cutoff value as needed

# Get the value counts of application types
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()

# Identify application types to be replaced with "Other"
application_types_to_replace = application_type_counts[application_type_counts < cutoff_value].index

# Replace in the DataFrame
application_df['APPLICATION_TYPE'].replace(application_types_to_replace, "Other", inplace=True)

# Check to make sure binning was successful
print(application_df['APPLICATION_TYPE'].value_counts())

In [None]:
# Look at CLASSIFICATION value counts for binning
application_df['CLASSIFICATION'].value_counts()

In [None]:
# You may find it helpful to look at CLASSIFICATION value counts >1

classification_counts=application_df['CLASSIFICATION'].value_counts().loc[lambda x : x >1]

In [None]:
# Choose a cutoff value (e.g., 1000) for classification counts
cutoff_value = 1000

# Get the counts of each classification
classification_counts = application_df['CLASSIFICATION'].value_counts()

# Create a list of classifications to be replaced based on the cutoff value
classifications_to_replace = classification_counts[classification_counts < cutoff_value].index.tolist()


# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
application_df['CLASSIFICATION'].value_counts()

In [None]:
# Convert categorical data to numeric with `pd.get_dummies`

df = pd.DataFrame(application_df)

# Convert 'Category' column to dummy variables
df = pd.get_dummies(df).astype(int)

In [None]:
# Split our preprocessed data into our features and target arrays
# Assuming you have a DataFrame 'application_df' with preprocessed data

# Define your features (X) and target (y)
X = df.drop(columns=['IS_SUCCESSFUL'],axis=1)
y = df['IS_SUCCESSFUL']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=70)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Compile, Train and Evaluate the Model

In [135]:


# Load your dataset and preprocess it as needed
# X_train, y_train, X_val, y_val, X_test, y_test = ...
# nn_model = tf.keras.models.Sequential()
# Define the neural network model
nn_model = tf.keras.models.Sequential()

# Hidden layers
nn_model.add(Dense(units=80, activation='sigmoid', input_dim=43))
nn_model.add(Dense(units=30, activation='sigmoid')) # Fully connected layer with ReLU activation
nn_model.add(Dense(units=1, activation='sigmoid'))  # Fully connected layer with ReLU activation
# Output layer
# nn_model.add(10, activation='softmax')  # Output layer with softmax activation (e.g., for classification)


# Compile the model with different optimization methods
# optimizers = {
# 'sgd': tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9),
# 'Adam': tf.keras.optimizers.Adam(learning_rate=0.001),
# 'RMSprop': tf.keras.optimizers.RMSprop(learning_rate=0.001)
# }
optimizers = {
'sgd',
'Adam',
'RMSprop'
}
# nn_model.summary()
# Iterate through different optimizers and train the model
for optimizer_name in optimizers:
    nn_model.compile(optimizer=optimizer_name, loss='binary_crossentropy', metrics=['accuracy'])
    # Train the model
    # nn_model.fit(X_train, y_train, epochs=10, batch_size=32)
    fit_model = nn_model.fit(X_train_scaled, y_train, epochs=10)
    # Evaluate the model on the test data
    model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)

    print(f"Optimizer: {optimizer_name}")
    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
268/268 - 0s - loss: 0.5571 - accuracy: 0.7262 - 186ms/epoch - 693us/step
Optimizer: RMSprop
Loss: 0.5571078658103943, Accuracy: 0.7261807322502136
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
268/268 - 0s - loss: 0.5566 - accuracy: 0.7257 - 179ms/epoch - 669us/step
Optimizer: sgd
Loss: 0.5565654635429382, Accuracy: 0.7257142663002014
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
268/268 - 0s - loss: 0.5559 - accuracy: 0.7272 - 175ms/epoch - 652us/step
Optimizer: Adam
Loss: 0.555928647518158, Accuracy: 0.7272303104400635


In [None]:
# Define the ModelCheckpoint callback
model_checkpoint = ModelCheckpoint(
    filepath='model_weights_epoch.h5',
    save_weights_only=True,
    period=5  # Save weights every 5 epochs
)

In [None]:
# # Export our model to HDF5 file
# nn_model.save("AlphabetSoupCharity.h5")