# Optimization
---
This file contains the optimized methods (2) by manual and 1 by the top recommended model using `Keras Tuner`.

## Preprocessing

In [114]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


What variable(s) are the target(s) for the model?
*   IS_SUCCESSFUL

What variable(s) are the feature(s) for your model?
*   <b>For OPT1:</b> Removal of a feature(s), add additional target, and/or adding one additional hidden layer. 
*   <b>For OPT2:</b> APPLICATION_TYPE, AFFILIATION, CLASSIFICATION, USE_CASE, ORGANIZATION, STATUS, INCOME_AMT, SPECIAL_CONSIDERATIONS, and ASK_AMT
    *   With more restricted binning and 2 additional layers with first having 15 neurons and second having 10 neurons using same activation as original model. Also increase the number of epochs to 50 from 100.
*   <b>For OPT3:</b> Same number of features from original model and use the top model suggested by Keras Tuner



In [115]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(columns = ['EIN', 'NAME'])
application_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


### Custom Binning Function

In [116]:
def col_to_bin(df, col, filter_rng, replacer):
    
    # Set up the list for values to replace
    value_to_replace = [val for val in df[col].value_counts()[df[col].value_counts() < filter_rng].index]
    
    # Loop through and replace the matching values with the replacer value
    for val in value_to_replace:
        df[col] = df[col].replace(val, replacer)
    
    return df[col]

# Optimization 1
---

In [172]:
opt_1_df = application_df.drop(columns = ['ASK_AMT'])
opt_1_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,1


In [173]:
# Binning using the same binning as original model
# Classification
opt_1_df['CLASSIFICATION'] = col_to_bin(opt_1_df, 'CLASSIFICATION', 1000, 'Other')

# Application Type
opt_1_df['APPLICATION_TYPE'] = col_to_bin(opt_1_df, 'APPLICATION_TYPE', 500, 'Other')

In [174]:
# Print the value counts for Classification
opt_1_df['CLASSIFICATION'].value_counts()

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: count, dtype: int64

In [175]:
# Print the value counts for Application Type
opt_1_df['APPLICATION_TYPE'].value_counts()

APPLICATION_TYPE
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: count, dtype: int64

In [176]:
# Convert categorical data to numeric
opt_1_cat_df = pd.get_dummies(opt_1_df, dtype = 'int')
opt_1_cat_df.head()

Unnamed: 0,STATUS,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,1,1,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
3,1,1,0,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
4,1,1,0,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


In [177]:
# Split the preprocessed data for opt_1
y_opt_1 = opt_1_cat_df[['IS_SUCCESSFUL']]
X_opt_1 = opt_1_cat_df.drop(columns = ['IS_SUCCESSFUL'])

# Preview the shape of our features and target
print(y_opt_1.shape)
print(X_opt_1.shape)

# Preview the first five entries of target variable
print(y_opt_1[:5])

# Preview the features data
display(X_opt_1.head())

(34299, 1)
(34299, 42)
   IS_SUCCESSFUL
0              1
1              1
2              0
3              1
4              1


Unnamed: 0,STATUS,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,APPLICATION_TYPE_T8,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,1,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,1,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,1,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
4,1,0,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


In [178]:
# Split the preprocessed data into a training and testing dataset
X_train_opt1, X_test_opt1, y_train_opt1, y_test_opt1 = train_test_split(X_opt_1, y_opt_1, random_state = 1)

In [179]:
# Create a StandardScaler instances
scaler_opt1 = StandardScaler()

# Fit the scaler
X_scaler_opt1 = scaler_opt1.fit(X_train_opt1)

# Scale the data
X_train_opt1_scaled = X_scaler_opt1.transform(X_train_opt1)
X_test_opt1_scaled = X_scaler_opt1.transform(X_test_opt1)

### Compile, Train and Evaluate the Optimized Model 1

In [180]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
# Get total number of features
input_features_opt1 = len(X_opt_1.columns)

nn_opt1 = tf.keras.models.Sequential()

# First hidden layer
nn_opt1.add(tf.keras.layers.Dense(units = 80, activation = 'relu', input_dim = input_features_opt1))

# Second hidden layer
nn_opt1.add(tf.keras.layers.Dense(units = 30, activation = 'relu'))

# Third hidden layer
nn_opt1.add(tf.keras.layers.Dense(units = 15, activation = 'relu'))

# Output layer
nn_opt1.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

# Check the structure of the model
nn_opt1.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [181]:
# Compile the model
nn_opt1.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [182]:
# Train the model
fit_model_opt1 = nn_opt1.fit(X_train_opt1_scaled, y_train_opt1, epochs = 100)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 577us/step - accuracy: 0.7097 - loss: 0.5859
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 588us/step - accuracy: 0.7301 - loss: 0.5547
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 582us/step - accuracy: 0.7294 - loss: 0.5537
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 578us/step - accuracy: 0.7367 - loss: 0.5459
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 573us/step - accuracy: 0.7375 - loss: 0.5466
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 574us/step - accuracy: 0.7327 - loss: 0.5507
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 572us/step - accuracy: 0.7379 - loss: 0.5435
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 581us/step - accuracy: 0.7313 - loss: 0.5464
Epoch 9/100
[1m

[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 572us/step - accuracy: 0.7411 - loss: 0.5369
Epoch 68/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 574us/step - accuracy: 0.7450 - loss: 0.5320
Epoch 69/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 571us/step - accuracy: 0.7412 - loss: 0.5333
Epoch 70/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 571us/step - accuracy: 0.7435 - loss: 0.5325
Epoch 71/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 584us/step - accuracy: 0.7432 - loss: 0.5320
Epoch 72/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 576us/step - accuracy: 0.7373 - loss: 0.5378
Epoch 73/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 576us/step - accuracy: 0.7399 - loss: 0.5375
Epoch 74/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 572us/step - accuracy: 0.7398 - loss: 0.5368
Epoch 75/100
[1m804/

In [183]:
# Evaluate the model using the test data
model_loss_opt1, model_accuracy_opt1 = nn_opt1.evaluate(X_test_opt1_scaled, y_test_opt1, verbose = 2)
print(f"Loss: {model_loss_opt1}, Accuracy: {model_accuracy_opt1}")

268/268 - 0s - 731us/step - accuracy: 0.7283 - loss: 0.5557
Loss: 0.5557341575622559, Accuracy: 0.7282798886299133


### Conclusion
---
Optimized model 1 by dropping 1 feature `SPECIAL_CONSIDERATIONS` and adding additional hidden layer yielded a slight increase in accuracy but also a slight increase in loss. This is probably due to the dropping of the feature which indicates that by retaining the same number of features and increasing the number of layers and possibly neurons would help increase the accuracy and decrease in loss. Overall, this model is less reliable than the original model.

In `Optimized model 2` we will look at a more restricted binning with more layers and neurons as well as number of epochs (100 to 150) to see how this model performs against the `original model` and `optimized model 1`.


# Optimization 2
---

In [16]:
opt_2_df = application_df.copy()
opt_2_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [17]:
# Binning with further restriction on the Application, thus reducing the number of features after categorization
# Classification
opt_2_df['CLASSIFICATION'] = col_to_bin(opt_2_df, 'CLASSIFICATION', 1000, 'Other')

# Application Type (restricting further to only 6 category)
opt_2_df['APPLICATION_TYPE'] = col_to_bin(opt_2_df, 'APPLICATION_TYPE', 1000, 'Other')

In [18]:
# Look at CLASSIFICATION value counts for binning
opt_2_df['CLASSIFICATION'].value_counts()

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: count, dtype: int64

In [19]:
# Print the value counts for Application Type
opt_2_df['APPLICATION_TYPE'].value_counts()

APPLICATION_TYPE
T3       27037
Other     2266
T4        1542
T6        1216
T5        1173
T19       1065
Name: count, dtype: int64

In [20]:
# Convert categorical data to numeric
opt_2_df = pd.get_dummies(opt_2_df, dtype = 'int')
opt_2_df.head()

Unnamed: 0,STATUS,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,AFFILIATION_CompanySponsored,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,1,108590,1,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,1,5000,0,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,1,0
3,1,6692,1,0,0,1,0,0,0,1,...,0,1,0,0,0,0,0,0,1,0
4,1,142590,1,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


In [21]:
# Split the preprocessed data for opt_1
y_opt_2 = opt_2_df['IS_SUCCESSFUL']
X_opt_2 = opt_2_df.drop(columns = 'IS_SUCCESSFUL')

# Preview the shape of our features and target
print(y_opt_2.shape)
print(X_opt_2.shape)

# Preview the first five entries of target variable
print(y_opt_2[:5])

# Preview the features data
display(X_opt_2.head())

(34299,)
(34299, 40)
0    1
1    1
2    0
3    1
4    1
Name: IS_SUCCESSFUL, dtype: int64


Unnamed: 0,STATUS,ASK_AMT,APPLICATION_TYPE_Other,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,AFFILIATION_CompanySponsored,AFFILIATION_Family/Parent,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,1,108590,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,1,5000,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,1,0
3,1,6692,0,0,1,0,0,0,1,0,...,0,1,0,0,0,0,0,0,1,0
4,1,142590,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


In [22]:
# Split the preprocessed data into a training and testing dataset
X_train_opt2, X_test_opt2, y_train_opt2, y_test_opt2 = train_test_split(X_opt_2, y_opt_2, random_state = 1)

In [23]:
# Create a StandardScaler instances
scaler_opt2 = StandardScaler()

# Fit the scaler
X_scaler_opt2 = scaler_opt2.fit(X_train_opt2)

# Scale the data
X_train_opt2_scaled = X_scaler_opt2.transform(X_train_opt2)
X_test_opt2_scaled = X_scaler_opt2.transform(X_test_opt2)

### Compile, Train and Evaluate the Optimized Model 2

In [24]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
# Get total number of features
input_features_opt2 = len(X_opt_2.columns)

nn_opt2 = tf.keras.models.Sequential()

# First hidden layer
nn_opt2.add(tf.keras.layers.Dense(units = 80, activation = 'relu', input_dim = input_features_opt2))

# Second hidden layer
nn_opt2.add(tf.keras.layers.Dense(units = 30, activation = 'relu'))

# Third hidden layer
nn_opt2.add(tf.keras.layers.Dense(units = 15, activation = 'relu'))

# Fourth hidden layer
nn_opt2.add(tf.keras.layers.Dense(units = 10, activation = 'relu'))

# Output layer
nn_opt2.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

# Check the structure of the model
nn_opt2.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [25]:
# Compile the model
nn_opt2.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [26]:
# Train the model
fit_model_opt2 = nn_opt2.fit(X_train_opt2_scaled, y_train_opt2, epochs = 150)

Epoch 1/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 608us/step - accuracy: 0.6879 - loss: 0.6023
Epoch 2/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 586us/step - accuracy: 0.7271 - loss: 0.5583
Epoch 3/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 591us/step - accuracy: 0.7273 - loss: 0.5568
Epoch 4/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 587us/step - accuracy: 0.7311 - loss: 0.5526
Epoch 5/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 587us/step - accuracy: 0.7325 - loss: 0.5521
Epoch 6/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 585us/step - accuracy: 0.7261 - loss: 0.5566
Epoch 7/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 585us/step - accuracy: 0.7247 - loss: 0.5606
Epoch 8/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 587us/step - accuracy: 0.7300 - loss: 0.5544
Epoch 9/150
[1m

[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 671us/step - accuracy: 0.7371 - loss: 0.5391
Epoch 68/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 594us/step - accuracy: 0.7398 - loss: 0.5377
Epoch 69/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 676us/step - accuracy: 0.7373 - loss: 0.5393
Epoch 70/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 588us/step - accuracy: 0.7416 - loss: 0.5328
Epoch 71/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 586us/step - accuracy: 0.7365 - loss: 0.5410
Epoch 72/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 582us/step - accuracy: 0.7367 - loss: 0.5424
Epoch 73/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 582us/step - accuracy: 0.7391 - loss: 0.5419
Epoch 74/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 598us/step - accuracy: 0.7390 - loss: 0.5375
Epoch 75/150
[1m804/

[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583us/step - accuracy: 0.7407 - loss: 0.5351
Epoch 133/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 585us/step - accuracy: 0.7372 - loss: 0.5390
Epoch 134/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583us/step - accuracy: 0.7419 - loss: 0.5343
Epoch 135/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 580us/step - accuracy: 0.7365 - loss: 0.5392
Epoch 136/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 604us/step - accuracy: 0.7410 - loss: 0.5373
Epoch 137/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 640us/step - accuracy: 0.7437 - loss: 0.5297
Epoch 138/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 588us/step - accuracy: 0.7373 - loss: 0.5369
Epoch 139/150
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583us/step - accuracy: 0.7421 - loss: 0.5357
Epoch 140/150


In [27]:
# Evaluate the model using the test data
model_loss_opt2, model_accuracy_opt2 = nn_opt2.evaluate(X_test_opt2_scaled, y_test_opt2, verbose = 2)
print(f"Loss: {model_loss_opt2}, Accuracy: {model_accuracy_opt2}")

268/268 - 0s - 771us/step - accuracy: 0.7286 - loss: 0.5682
Loss: 0.5682134628295898, Accuracy: 0.7286297082901001


### Conclusion
---
In this `Optimized model 2`, it seems to have been worse than the first optimized model and the original model. There was only a very miniscule increase in accuracy but with an increase of 2% in loss output versus the first optimized model. This clearly indicated that by losing features, we are at risk of loss increase with insignificant accuracy increase, even with the help of adding additional layers.

As a result, I believe that we should keep the same preprocessed data config as the original model and simply increase the number of layers and neurons. However, to further optimize the third model, we will take advantage of `Keras Tuner` to help with model suggestion based on the preprocessed data. With that said, this will be the approach to our `third optimization` trial.


# Optimization 3
---

In [28]:
opt_3_df = application_df.copy()
opt_3_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [29]:
# Binning using the same binning as original model
# Classification
opt_3_df['CLASSIFICATION'] = col_to_bin(opt_3_df, 'CLASSIFICATION', 1000, 'Other')

# Application Type
opt_3_df['APPLICATION_TYPE'] = col_to_bin(opt_3_df, 'APPLICATION_TYPE', 500, 'Other')

In [30]:
# Look at CLASSIFICATION value counts for binning
opt_3_df['CLASSIFICATION'].value_counts()

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: count, dtype: int64

In [31]:
# Print the value counts for Application Type
opt_3_df['APPLICATION_TYPE'].value_counts()

APPLICATION_TYPE
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: count, dtype: int64

In [32]:
# Convert categorical data to numeric with `pd.get_dummies`
opt_3_df = pd.get_dummies(opt_3_df, dtype = 'int')
opt_3_df.head()

Unnamed: 0,STATUS,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,1,108590,1,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,1,5000,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
3,1,6692,1,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,1,0
4,1,142590,1,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,1,0


In [33]:
opt_3_df.columns

Index(['STATUS', 'ASK_AMT', 'IS_SUCCESSFUL', 'APPLICATION_TYPE_Other',
       'APPLICATION_TYPE_T10', 'APPLICATION_TYPE_T19', 'APPLICATION_TYPE_T3',
       'APPLICATION_TYPE_T4', 'APPLICATION_TYPE_T5', 'APPLICATION_TYPE_T6',
       'APPLICATION_TYPE_T7', 'APPLICATION_TYPE_T8',
       'AFFILIATION_CompanySponsored', 'AFFILIATION_Family/Parent',
       'AFFILIATION_Independent', 'AFFILIATION_National', 'AFFILIATION_Other',
       'AFFILIATION_Regional', 'CLASSIFICATION_C1000', 'CLASSIFICATION_C1200',
       'CLASSIFICATION_C2000', 'CLASSIFICATION_C2100', 'CLASSIFICATION_C3000',
       'CLASSIFICATION_Other', 'USE_CASE_CommunityServ', 'USE_CASE_Heathcare',
       'USE_CASE_Other', 'USE_CASE_Preservation', 'USE_CASE_ProductDev',
       'ORGANIZATION_Association', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation', 'ORGANIZATION_Trust', 'INCOME_AMT_0',
       'INCOME_AMT_1-9999', 'INCOME_AMT_10000-24999',
       'INCOME_AMT_100000-499999', 'INCOME_AMT_10M-50M', 'INCOME_AMT_1M-5M

In [34]:
# Split the preprocessed data for opt_1
y_opt_3 = opt_3_df['IS_SUCCESSFUL']
X_opt_3 = opt_3_df.drop(columns = 'IS_SUCCESSFUL')

# Preview the shape of our features and target
print(y_opt_3.shape)
print(X_opt_3.shape)

# Preview the first five entries of target variable
print(y_opt_3[:5])

# Preview the features data
display(X_opt_3.head())

(34299,)
(34299, 43)
0    1
1    1
2    0
3    1
4    1
Name: IS_SUCCESSFUL, dtype: int64


Unnamed: 0,STATUS,ASK_AMT,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,1,108590,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,1,5000,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
3,1,6692,0,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
4,1,142590,0,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


In [35]:
# Split the preprocessed data into a training and testing dataset
X_train_opt3, X_test_opt3, y_train_opt3, y_test_opt3 = train_test_split(X_opt_3, y_opt_3, random_state = 1)

In [36]:
# Create a StandardScaler instances
scaler_opt3 = StandardScaler()

# Fit the scaler
X_scaler_opt3 = scaler_opt3.fit(X_train_opt3)

# Scale the data
X_train_opt3_scaled = X_scaler_opt3.transform(X_train_opt3)
X_test_opt3_scaled = X_scaler_opt3.transform(X_test_opt3)

In [42]:
# Create a method that creates a new Sequential model with yperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()
    
    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation', ['relu', 'sigmoid', 'tanh'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(
        units = hp.Int(
            'first_units',
            min_value = 1,
            max_value = 30,
            step = 5
        ), 
        activation = activation
    ))
    
    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        nn_model.add(tf.keras.layers.Dense(
            units = hp.Int(
                'units_' + str(i),
                min_value = 1,
                max_value = 30,
                step =  5
            ),
            activation = activation
        ))
    
    # Allow kerastuner to add the Output layer
    nn_model.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))
    
    # Compile the model
    nn_model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    
    return nn_model

In [38]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective = 'val_accuracy',
    max_epochs = 20,
    hyperband_iterations = 2
)

Reloading Tuner from .\untitled_project\tuner0.json


In [39]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_opt3_scaled,
             y_train_opt3,
             epochs = 20,
             validation_data = (X_test_opt3_scaled,y_test_opt3)
            )

Trial 60 Complete [00h 00m 15s]
val_accuracy: 0.7331778407096863

Best val_accuracy So Far: 0.7334110736846924
Total elapsed time: 00h 08m 46s


In [57]:
# Get top 3 model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
    print(param.values)

{'activaiton': 'sigmoid', 'first_units': 1, 'num_layers': 2, 'units_0': 21, 'units_1': 26, 'units_2': 6, 'units_3': 11, 'units_4': 16, 'tuner/epochs': 20, 'tuner/initial_epoch': 7, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0012'}
{'activaiton': 'sigmoid', 'first_units': 21, 'num_layers': 5, 'units_0': 21, 'units_1': 16, 'units_2': 21, 'units_3': 16, 'units_4': 16, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}
{'activaiton': 'sigmoid', 'first_units': 21, 'num_layers': 5, 'units_0': 21, 'units_1': 16, 'units_2': 21, 'units_3': 16, 'units_4': 16, 'tuner/epochs': 20, 'tuner/initial_epoch': 7, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0042'}


In [68]:
# Get top 1 model hyperparameters and print the values
top_1_hyper = tuner.get_best_hyperparameters(2)[1]
top_1_hyper.values

{'activaiton': 'sigmoid',
 'first_units': 21,
 'num_layers': 5,
 'units_0': 21,
 'units_1': 16,
 'units_2': 21,
 'units_3': 16,
 'units_4': 16,
 'tuner/epochs': 3,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 2,
 'tuner/round': 0}

In [129]:
# Define the model - using the top hyper
# Get total number of features
input_features_opt3 = len(X_opt_3.columns)

nn_opt3 = tf.keras.models.Sequential()

# First hidden layer
nn_opt3.add(tf.keras.layers.Dense(units = 21, activation = 'sigmoid', input_dim = input_features_opt3))

# Second hidden layer
nn_opt3.add(tf.keras.layers.Dense(units = 26, activation = 'relu'))

# Output layer
nn_opt3.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

# Check the structure of the model
nn_opt3.summary()

In [130]:
# Compile the model
nn_opt3.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [131]:
# Train the model
fit_model_opt3 = nn_opt3.fit(X_train_opt3_scaled, y_train_opt3, epochs = 100)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 551us/step - accuracy: 0.6592 - loss: 0.6244
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 554us/step - accuracy: 0.7211 - loss: 0.5708
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 593us/step - accuracy: 0.7213 - loss: 0.5684
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583us/step - accuracy: 0.7291 - loss: 0.5559
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 558us/step - accuracy: 0.7321 - loss: 0.5500
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 543us/step - accuracy: 0.7316 - loss: 0.5531
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 534us/step - accuracy: 0.7347 - loss: 0.5485
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 563us/step - accuracy: 0.7353 - loss: 0.5465
Epoch 9/100
[1m

[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 538us/step - accuracy: 0.7396 - loss: 0.5365
Epoch 68/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 539us/step - accuracy: 0.7355 - loss: 0.5403
Epoch 69/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 538us/step - accuracy: 0.7350 - loss: 0.5409
Epoch 70/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 536us/step - accuracy: 0.7368 - loss: 0.5398
Epoch 71/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 532us/step - accuracy: 0.7334 - loss: 0.5424
Epoch 72/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 533us/step - accuracy: 0.7373 - loss: 0.5388
Epoch 73/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 534us/step - accuracy: 0.7373 - loss: 0.5378
Epoch 74/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 532us/step - accuracy: 0.7348 - loss: 0.5444
Epoch 75/100
[1m804/

In [132]:
# Evaluate the model using the test data
model_loss_opt3, model_accuracy_opt3 = nn_opt3.evaluate(X_test_opt3_scaled, y_test_opt3, verbose = 2)
print(f"Loss: {model_loss_opt3}, Accuracy: {model_accuracy_opt3}")

268/268 - 0s - 710us/step - accuracy: 0.7287 - loss: 0.5510
Loss: 0.5510286688804626, Accuracy: 0.7287463545799255
