# <span style="color: red;">MODEL 1: remove "STATUS" & "SPECIAL_CONSIDERATIONS" columns from dataframe</span>


## <span style="color: orange;"> Preprocessing - Model 1</span>

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint # Import the ModelCheckpoint callback from tensorflow.keras.callbacks

#  Import and read the charity_data.csv.
import pandas as pd 
application1_df = pd.read_csv("https://static.bc-edx.com/data/dla-1-2/m21/lms/starter/charity_data.csv")
application1_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [2]:
# Look at STATUS value counts
status_counts = application1_df['STATUS'].value_counts()
print(status_counts)

1    34294
0        5
Name: STATUS, dtype: int64


In [3]:
# Look at SPECIAL_CONSIDERATION value counts
special_con_counts = application1_df['SPECIAL_CONSIDERATIONS'].value_counts()
print(special_con_counts)

N    34272
Y       27
Name: SPECIAL_CONSIDERATIONS, dtype: int64


In [4]:
# Drop the non-beneficial ID columns, 'EIN','NAME','STATUS' and 'SPECIAL_CONSIDERATIONS'.
application1_df=application1_df.drop(columns=['EIN','NAME','STATUS','SPECIAL_CONSIDERATIONS'])

In [5]:
# Determine the number of unique values in each column.
application1_df.nunique()

APPLICATION_TYPE      17
AFFILIATION            6
CLASSIFICATION        71
USE_CASE               5
ORGANIZATION           4
INCOME_AMT             9
ASK_AMT             8747
IS_SUCCESSFUL          2
dtype: int64

In [6]:
# Look at APPLICATION_TYPE value counts for binning
application1_type_counts = application1_df['APPLICATION_TYPE'].value_counts()
print(application1_type_counts)

T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64


In [7]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`

# Choose cutoff value
cutoff_value = 'T10'

# Create a list of application types to be replaced
application1_types_to_replace = application1_type_counts[application1_type_counts < application1_type_counts[cutoff_value]].index.tolist()

# Replace in dataframe
for app in application1_types_to_replace:
    application1_df['APPLICATION_TYPE'] = application1_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
application1_df['APPLICATION_TYPE'].value_counts()

T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: APPLICATION_TYPE, dtype: int64

In [8]:
# Look at CLASSIFICATION value counts for binning
classification_type_counts1 = application1_df['CLASSIFICATION'].value_counts()
print(classification_type_counts1)

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: CLASSIFICATION, Length: 71, dtype: int64


In [9]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
# Choose a cutoff value
cutoff_value = 'C2100'

# Create a list of classifications to be replaced with "Other"
classifications_to_replace = classification_type_counts1[classification_type_counts1 < classification_type_counts1[cutoff_value]].index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    application1_df['CLASSIFICATION'] = application1_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
application1_df['CLASSIFICATION'].value_counts()

C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: CLASSIFICATION, dtype: int64

In [10]:
# Convert categorical data to numeric with `pd.get_dummies`

# Use pd.get_dummies to one-hot encode the categorical columns
application1_df = pd.get_dummies(application1_df)

In [11]:
# Split our preprocessed data into our features and target arrays
X1 = application1_df.drop(columns=['IS_SUCCESSFUL'])  # Features
y1 = application1_df['IS_SUCCESSFUL']  # Target

# Split the preprocessed data into a training and testing dataset
X_train1, X_test1, y_train1, y_test1 = train_test_split(X1, y1, random_state=1)

In [12]:
# Create a StandardScaler instances
scaler1 = StandardScaler()

# Fit the StandardScaler
X_scaler1 = scaler1.fit(X_train1)

# Scale the data
X_train_scaled1 = X_scaler1.transform(X_train1)
X_test_scaled1 = X_scaler1.transform(X_test1)
print(X_test_scaled1.shape)
print(X_train_scaled1.shape)

(8575, 40)
(25724, 40)


## <span style="color: orange;">Compile, Train and Evaluate the Model - Model 1</span>

In [13]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn_model1 = tf.keras.models.Sequential()

# First hidden layer
nn_model1.add(tf.keras.layers.Dense(units=80, activation="relu", input_dim=40))

# Second hidden layer
nn_model1.add(tf.keras.layers.Dense(units=30, activation="relu"))

# Output layer
nn_model1.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 80)                3280      
                                                                 
 dense_1 (Dense)             (None, 30)                2430      
                                                                 
 dense_2 (Dense)             (None, 1)                 31        
                                                                 
Total params: 5741 (22.43 KB)
Trainable params: 5741 (22.43 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
# Compile the model
nn_model1.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Define a callback to save the model's weights every five epochs
# Define the checkpoint path and filename
checkpoint_path_model1 = "checkpoints_model1/weights.{epoch:02d}.hdf5"

# Create a ModelCheckpoint callback that saves the model's weights every five epochs
checkpoint_callback_model1 = ModelCheckpoint(
    filepath=checkpoint_path_model1,
    save_weights_only=True,
    save_freq='epoch',
    period=5
)



In [15]:
# Train the model
fit_model1 = nn_model1.fit(X_test_scaled1, y_test1, epochs=100, callbacks=[checkpoint_callback_model1])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [16]:
# Evaluate the model using the test data
model1_loss, model1_accuracy = nn_model1.evaluate(X_test_scaled1,y_test1,verbose=2)
print(f"Loss: {model1_loss}, Accuracy: {model1_accuracy}")

268/268 - 0s - loss: 0.5218 - accuracy: 0.7479 - 374ms/epoch - 1ms/step
Loss: 0.5217640399932861, Accuracy: 0.7478716969490051


<span style="color: red;">______________________________________________________________________________________________________________________________</span>

# <span style="color: red;">MODEL 2 - Add More Hidden Layers and Neurons</span>

## <span style="color: orange;">Preprocessing - Model 2</span>

In [17]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint # Import the ModelCheckpoint callback from tensorflow.keras.callbacks

#  Import and read the charity_data.csv.
import pandas as pd 
application2_df = pd.read_csv("https://static.bc-edx.com/data/dla-1-2/m21/lms/starter/charity_data.csv")
application2_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [18]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application2_df=application2_df.drop(columns=['EIN','NAME'])

In [19]:
# Determine the number of unique values in each column.
application2_df.nunique()

APPLICATION_TYPE            17
AFFILIATION                  6
CLASSIFICATION              71
USE_CASE                     5
ORGANIZATION                 4
STATUS                       2
INCOME_AMT                   9
SPECIAL_CONSIDERATIONS       2
ASK_AMT                   8747
IS_SUCCESSFUL                2
dtype: int64

In [20]:
# Look at APPLICATION_TYPE value counts for binning
application2_type_counts = application2_df['APPLICATION_TYPE'].value_counts()
print(application2_type_counts)

T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64


In [21]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`

# Choose cutoff value
cutoff_value = 'T10'

# Create a list of application types to be replaced
application2_types_to_replace = application2_type_counts[application2_type_counts < application2_type_counts[cutoff_value]].index.tolist()

# Replace in dataframe
for app in application2_types_to_replace:
    application2_df['APPLICATION_TYPE'] = application2_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
application2_df['APPLICATION_TYPE'].value_counts()

T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: APPLICATION_TYPE, dtype: int64

In [22]:
# Look at CLASSIFICATION value counts for binning
classification_type_counts1 = application2_df['CLASSIFICATION'].value_counts()
print(classification_type_counts1)

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: CLASSIFICATION, Length: 71, dtype: int64


In [23]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
# Choose a cutoff value
cutoff_value = 'C2100'

# Create a list of classifications to be replaced with "Other"
classifications_to_replace = classification_type_counts1[classification_type_counts1 < classification_type_counts1[cutoff_value]].index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    application2_df['CLASSIFICATION'] = application2_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
application2_df['CLASSIFICATION'].value_counts()

C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: CLASSIFICATION, dtype: int64

In [24]:
# Convert categorical data to numeric with `pd.get_dummies`

# Use pd.get_dummies to one-hot encode the categorical columns
application2_df = pd.get_dummies(application2_df)

In [25]:
# Split our preprocessed data into our features and target arrays
X2 = application2_df.drop(columns=['IS_SUCCESSFUL'])  # Features
y2 = application2_df['IS_SUCCESSFUL']  # Target

# Split the preprocessed data into a training and testing dataset
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, random_state=1)

In [26]:
# Create a StandardScaler instances
scaler2 = StandardScaler()

# Fit the StandardScaler
X_scaler2 = scaler2.fit(X_train2)

# Scale the data
X_train_scaled2 = X_scaler2.transform(X_train2)
X_test_scaled2 = X_scaler2.transform(X_test2)
print(X_test_scaled2.shape)
print(X_train_scaled2.shape)

(8575, 43)
(25724, 43)


## <span style="color: orange;">Compile, Train and Evaluate the Model - Model 2</span>

In [27]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn_model2 = tf.keras.models.Sequential()

# First hidden layer
nn_model2.add(tf.keras.layers.Dense(units=100, activation="relu", input_dim=43))

# Second hidden layer
nn_model2.add(tf.keras.layers.Dense(units=80, activation="relu"))

# Third hidden layer
nn_model2.add(tf.keras.layers.Dense(units=60, activation="relu"))

# Fourth hidden layer
nn_model2.add(tf.keras.layers.Dense(units=30, activation="relu"))

# Fifth hidden layer
nn_model2.add(tf.keras.layers.Dense(units=10, activation="relu"))

# Output layer
nn_model2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 100)               4400      
                                                                 
 dense_4 (Dense)             (None, 80)                8080      
                                                                 
 dense_5 (Dense)             (None, 60)                4860      
                                                                 
 dense_6 (Dense)             (None, 30)                1830      
                                                                 
 dense_7 (Dense)             (None, 10)                310       
                                                                 
 dense_8 (Dense)             (None, 1)                 11        
                                                                 
Total params: 19491 (76.14 KB)
Trainable params: 19491

In [28]:
# Compile the model
nn_model2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Define a callback to save the model's weights every five epochs
# Define the checkpoint path and filename
checkpoint_path_model2 = "checkpoints_model2/weights.{epoch:02d}.hdf5"

# Create a ModelCheckpoint callback that saves the model's weights every five epochs
checkpoint_callback_model2 = ModelCheckpoint(
    filepath=checkpoint_path_model2,
    save_weights_only=True,
    save_freq='epoch',
    period=5
)



In [29]:
# Train the model
fit_model2 = nn_model2.fit(X_test_scaled2, y_test2, epochs=100, callbacks=[checkpoint_callback_model2])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [30]:
# Evaluate the model using the test data
model2_loss, model2_accuracy = nn_model2.evaluate(X_test_scaled2,y_test2,verbose=2)
print(f"Loss: {model2_loss}, Accuracy: {model2_accuracy}")

268/268 - 0s - loss: 0.5175 - accuracy: 0.7472 - 387ms/epoch - 1ms/step
Loss: 0.5175169110298157, Accuracy: 0.7471719980239868


<span style="color: red;">______________________________________________________________________________________________________________________________</span>

# <span style="color: red;">MODEL 3 - Bin 'ASK_AMT' Column and add EPOCs </span>

## <span style="color: orange;">Preprocessing - Model 3</span>

In [31]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint # Import the ModelCheckpoint callback from tensorflow.keras.callbacks

#  Import and read the charity_data.csv.
import pandas as pd 
application3_df = pd.read_csv("https://static.bc-edx.com/data/dla-1-2/m21/lms/starter/charity_data.csv")
application3_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [32]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application3_df=application3_df.drop(columns=['EIN','NAME'])
application3_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [33]:
# Determine the number of unique values in each column.
application3_df.nunique()

APPLICATION_TYPE            17
AFFILIATION                  6
CLASSIFICATION              71
USE_CASE                     5
ORGANIZATION                 4
STATUS                       2
INCOME_AMT                   9
SPECIAL_CONSIDERATIONS       2
ASK_AMT                   8747
IS_SUCCESSFUL                2
dtype: int64

In [34]:
# Look at APPLICATION_TYPE value counts for binning
application3_type_counts = application3_df['APPLICATION_TYPE'].value_counts()
print(application3_type_counts)

T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64


In [35]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`

# Choose cutoff value
cutoff_value = 'T10'

# Create a list of application types to be replaced
application3_types_to_replace = application3_type_counts[application3_type_counts < application3_type_counts[cutoff_value]].index.tolist()

# Replace in dataframe
for app in application3_types_to_replace:
    application3_df['APPLICATION_TYPE'] = application3_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
application3_df['APPLICATION_TYPE'].value_counts()

T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: APPLICATION_TYPE, dtype: int64

In [36]:
# Look at CLASSIFICATION value counts for binning
classification_type_counts1 = application3_df['CLASSIFICATION'].value_counts()
print(classification_type_counts1)

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: CLASSIFICATION, Length: 71, dtype: int64


In [37]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
# Choose a cutoff value
cutoff_value = 'C2100'

# Create a list of classifications to be replaced with "Other"
classifications_to_replace = classification_type_counts1[classification_type_counts1 < classification_type_counts1[cutoff_value]].index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    application3_df['CLASSIFICATION'] = application3_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
application3_df['CLASSIFICATION'].value_counts()

C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: CLASSIFICATION, dtype: int64

In [38]:
# Look at ASK_AMT value counts for binning
ASK_AMT_counts1 = application3_df['ASK_AMT'].value_counts()
print(ASK_AMT_counts1)

5000        25398
10478           3
15583           3
63981           3
6725            3
            ...  
5371754         1
30060           1
43091152        1
18683           1
36500179        1
Name: ASK_AMT, Length: 8747, dtype: int64


In [39]:
# Create bins for the ASK_AMT column and put in new column ASK_AMT_Binned
# Define bin edges
bin_edges = [0, 1000000, 10000000, float("inf")]

# Create labels for the bins
bin_labels = ["1", "2", "3"]

# Use pd.cut() to create the 'ASK_AMT_Binned' column
application3_df['ASK_AMT_Binned'] = pd.cut(application3_df['ASK_AMT'], bins=bin_edges, labels=bin_labels, include_lowest=True)

# Display the DataFrame with the 'ASK_AMT_Binned' column
print(application3_df[['ASK_AMT', 'ASK_AMT_Binned']])

        ASK_AMT ASK_AMT_Binned
0          5000              1
1        108590              1
2          5000              1
3          6692              1
4        142590              1
...         ...            ...
34294      5000              1
34295      5000              1
34296      5000              1
34297      5000              1
34298  36500179              3

[34299 rows x 2 columns]


In [40]:
# Drop ASK_AMT column as it is replaced by the ASK_AMT_Binned column
application3_df = application3_df.drop('ASK_AMT', axis=1)
application3_df

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,IS_SUCCESSFUL,ASK_AMT_Binned
0,T10,Independent,C1000,ProductDev,Association,1,0,N,1,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,1,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,0,1
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,1,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,1,1
...,...,...,...,...,...,...,...,...,...,...
34294,T4,Independent,C1000,ProductDev,Association,1,0,N,0,1
34295,T4,CompanySponsored,C3000,ProductDev,Association,1,0,N,0,1
34296,T3,CompanySponsored,C2000,Preservation,Association,1,0,N,0,1
34297,T5,Independent,C3000,ProductDev,Association,1,0,N,1,1


In [41]:
# Check ASK_AMT_Binned column value counts for binning worked
application3_type_counts = application3_df['ASK_AMT_Binned'].value_counts()
print(application3_type_counts)

1    32722
2     1165
3      412
Name: ASK_AMT_Binned, dtype: int64


In [42]:
# Convert categorical data to numeric with `pd.get_dummies`

# Use pd.get_dummies to one-hot encode the categorical columns
application3_df = pd.get_dummies(application3_df)

In [43]:
# Split our preprocessed data into our features and target arrays
X3 = application3_df.drop(columns=['IS_SUCCESSFUL'])  # Features
y3 = application3_df['IS_SUCCESSFUL']  # Target

# Split the preprocessed data into a training and testing dataset
X_train3, X_test3, y_train3, y_test3 = train_test_split(X3, y3, random_state=1)

In [44]:
# Create a StandardScaler instances
scaler3 = StandardScaler()

# Fit the StandardScaler
X_scaler3 = scaler3.fit(X_train3)

# Scale the data
X_train_scaled3 = X_scaler3.transform(X_train3)
X_test_scaled3 = X_scaler3.transform(X_test3)
print(X_test_scaled3.shape)
print(X_train_scaled3.shape)

(8575, 45)
(25724, 45)


## <span style="color: orange;">Compile, Train and Evaluate the Model - Model 3</span>

In [45]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn_model3 = tf.keras.models.Sequential()

# First hidden layer
nn_model3.add(tf.keras.layers.Dense(units=100, activation="relu", input_dim=45))

# Second hidden layer
nn_model3.add(tf.keras.layers.Dense(units=80, activation="relu"))

# Output layer
nn_model3.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model3.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 100)               4600      
                                                                 
 dense_10 (Dense)            (None, 80)                8080      
                                                                 
 dense_11 (Dense)            (None, 1)                 81        
                                                                 
Total params: 12761 (49.85 KB)
Trainable params: 12761 (49.85 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [46]:
# Compile the model
nn_model3.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Define a callback to save the model's weights every five epochs
# Define the checkpoint path and filename
checkpoint_path_model3 = "checkpoints_model3/weights.{epoch:02d}.hdf5"

# Create a ModelCheckpoint callback that saves the model's weights every five epochs
checkpoint_callback_model3 = ModelCheckpoint(
    filepath=checkpoint_path_model3,
    save_weights_only=True,
    save_freq='epoch',
    period=5
)



In [47]:
# Train the model
fit_model3 = nn_model3.fit(X_test_scaled3, y_test3, epochs=150, callbacks=[checkpoint_callback_model3])

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


In [48]:
# Evaluate the model using the test data
model3_loss, model3_accuracy = nn_model3.evaluate(X_test_scaled3,y_test3,verbose=2)
print(f"Model 3 Loss: {model3_loss}, Model 3 Accuracy: {model3_accuracy}")

268/268 - 0s - loss: 0.5150 - accuracy: 0.7493 - 350ms/epoch - 1ms/step
Model 3 Loss: 0.515017032623291, Model 3 Accuracy: 0.7492711544036865


<span style="color: red;">______________________________________________________________________________________________________________________________</span>

# <span style="color: red;">Summary Results</span>

In [49]:
print(f"Model 1 Loss: {model1_loss}, Model 1 Accuracy: {model1_accuracy}")
print(f"Model 2 Loss: {model2_loss}, Model 2 Accuracy: {model2_accuracy}")
print(f"Model 3 Loss: {model3_loss}, Model 3 Accuracy: {model3_accuracy}")

Model 1 Loss: 0.5217640399932861, Model 1 Accuracy: 0.7478716969490051
Model 2 Loss: 0.5175169110298157, Model 2 Accuracy: 0.7471719980239868
Model 3 Loss: 0.515017032623291, Model 3 Accuracy: 0.7492711544036865


### <span style="color: blue;">Model 3 is the best model</span>


In [50]:
# Export best model to HDF5 file
nn_model3.save("AlphabetSoupCharity_Optimisation.h5")

  saving_api.save_model(
