## Preprocessing

In [1]:
# Dependencies.
# Data Cleaning/Engineering.
import pandas as pd
import numpy as np

# Data Scaling/Splitting.
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# NN Model.
import tensorflow as tf
import keras_tuner as kt

# Visualization.
import matplotlib.pyplot as plt
import hvplot.pandas
import seaborn as sns
from bokeh.resources import INLINE


In [2]:
# Read in data.
raw_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
raw_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [3]:
# Check out the data.
display(raw_df.info())
display(raw_df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34299 entries, 0 to 34298
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   EIN                     34299 non-null  int64 
 1   NAME                    34299 non-null  object
 2   APPLICATION_TYPE        34299 non-null  object
 3   AFFILIATION             34299 non-null  object
 4   CLASSIFICATION          34299 non-null  object
 5   USE_CASE                34299 non-null  object
 6   ORGANIZATION            34299 non-null  object
 7   STATUS                  34299 non-null  int64 
 8   INCOME_AMT              34299 non-null  object
 9   SPECIAL_CONSIDERATIONS  34299 non-null  object
 10  ASK_AMT                 34299 non-null  int64 
 11  IS_SUCCESSFUL           34299 non-null  int64 
dtypes: int64(4), object(8)
memory usage: 3.1+ MB


None

Unnamed: 0,EIN,STATUS,ASK_AMT,IS_SUCCESSFUL
count,34299.0,34299.0,34299.0,34299.0
mean,519185200.0,0.999854,2769199.0,0.532406
std,245147200.0,0.012073,87130450.0,0.498956
min,10520600.0,0.0,5000.0,0.0
25%,274848200.0,1.0,5000.0,0.0
50%,465631700.0,1.0,5000.0,1.0
75%,752611700.0,1.0,7742.0,1.0
max,996086900.0,1.0,8597806000.0,1.0


In [4]:
# Drop the EIN and Name columns.
df = raw_df.drop(columns = ['EIN', 'NAME'])
df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [5]:
# Find unique values among all columns.
df.nunique()

APPLICATION_TYPE            17
AFFILIATION                  6
CLASSIFICATION              71
USE_CASE                     5
ORGANIZATION                 4
STATUS                       2
INCOME_AMT                   9
SPECIAL_CONSIDERATIONS       2
ASK_AMT                   8747
IS_SUCCESSFUL                2
dtype: int64

In [6]:
# Determine number of data points for columns containing more than 10 unique values.
df.APPLICATION_TYPE.value_counts()

APPLICATION_TYPE
T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: count, dtype: int64

In [7]:
# Create a cutoff for APPLICATION_TYPE and submit points under the cutoff as Other.
cutoff = ['T3', 'T4', 'T6', 'T5', 'T19'] # Note: could have made this 'automatic' but such a small amt that easier to just hard code.
df['APPLICATION_TYPE'] = df.APPLICATION_TYPE.map(lambda x: x if x in cutoff else "Other")
df.APPLICATION_TYPE.value_counts()

APPLICATION_TYPE
T3       27037
Other     2266
T4        1542
T6        1216
T5        1173
T19       1065
Name: count, dtype: int64

In [8]:
# Look at the next column.
display(df.CLASSIFICATION.value_counts())

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: count, Length: 71, dtype: int64

In [9]:
# Create a cutoff and assign all values below it to Other.
cutoff = ['C1000', 'C2000', 'C1200', 'C3000', 'C2100'] # Note: could have made this 'automatic' but such a small amt that easier to just hard code.
df['CLASSIFICATION'] = df.CLASSIFICATION.map(lambda x: x if x in cutoff else "Other")
df.CLASSIFICATION.value_counts()

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: count, dtype: int64

In [10]:
# Convert categories to numerical data with get_dummies.
df_sub = pd.get_dummies(
    df,
    prefix = ['APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE', 'ORGANIZATION', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS'],
    columns = ['APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE', 'ORGANIZATION', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS'],
    dtype = 'int64'
)

df_sub.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34299 entries, 0 to 34298
Data columns (total 41 columns):
 #   Column                        Non-Null Count  Dtype
---  ------                        --------------  -----
 0   STATUS                        34299 non-null  int64
 1   ASK_AMT                       34299 non-null  int64
 2   IS_SUCCESSFUL                 34299 non-null  int64
 3   APPLICATION_TYPE_Other        34299 non-null  int64
 4   APPLICATION_TYPE_T19          34299 non-null  int64
 5   APPLICATION_TYPE_T3           34299 non-null  int64
 6   APPLICATION_TYPE_T4           34299 non-null  int64
 7   APPLICATION_TYPE_T5           34299 non-null  int64
 8   APPLICATION_TYPE_T6           34299 non-null  int64
 9   AFFILIATION_CompanySponsored  34299 non-null  int64
 10  AFFILIATION_Family/Parent     34299 non-null  int64
 11  AFFILIATION_Independent       34299 non-null  int64
 12  AFFILIATION_National          34299 non-null  int64
 13  AFFILIATION_Other             3

In [11]:
# Isolate features and target.
features = df_sub.drop(columns = ['IS_SUCCESSFUL'])
target = df_sub.loc[:, 'IS_SUCCESSFUL']

In [12]:
# Create X and y variables.
X = features
y = target

display(X.head())
display(y.head())

Unnamed: 0,STATUS,ASK_AMT,APPLICATION_TYPE_Other,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,AFFILIATION_CompanySponsored,AFFILIATION_Family/Parent,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,1,108590,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,1,5000,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,1,0
3,1,6692,0,0,1,0,0,0,1,0,...,0,1,0,0,0,0,0,0,1,0
4,1,142590,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


0    1
1    1
2    0
3    1
4    1
Name: IS_SUCCESSFUL, dtype: int64

In [13]:
# Create a train_test_split.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 1, stratify = y)
print(X_train.shape, X_test.shape)

(25724, 40) (8575, 40)


In [14]:
# Scale the training and testing features.
scaler = StandardScaler()

scaler.fit(X_train)
X_train_sc = scaler.transform(X_train)
X_test_sc = scaler.transform(X_test)

# Model 1

In [15]:
# Now, going to create the first NN model.
nn_model_1 = tf.keras.models.Sequential()

# First layer.
nn_model_1.add(tf.keras.layers.Dense(units = 7, activation = 'relu', input_dim = len(X.columns)))

# Output layer (uses a Sigmoid activation for probability).
nn_model_1.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Summarize this.
nn_model_1.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
# Make custom callback to save every five epochs.
class MyModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
    def on_epoch_end(self, epoch, logs = None):
        if (epoch + 1) % 5 == 0:
            super().on_epoch_end(epoch, logs)

custom_callback = MyModelCheckpoint(
    filepath = 'model_checkpoints/nn_1_weights_{epoch:02d}.weights.h5',
    save_weights_only = True,
    save_freq = 'epoch',
)

In [17]:
# Compile and fit model.
nn_model_1.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = ["accuracy"])

# Fit that model!
fit_1 = nn_model_1.fit(
    X_train_sc,
    y_train,
    epochs = 100,
    callbacks = [custom_callback]
)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 286us/step - accuracy: 0.6417 - loss: 0.7038
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 274us/step - accuracy: 0.7123 - loss: 0.5797
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273us/step - accuracy: 0.7179 - loss: 0.5723
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283us/step - accuracy: 0.7250 - loss: 0.5638
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 284us/step - accuracy: 0.7295 - loss: 0.5569
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 274us/step - accuracy: 0.7303 - loss: 0.5572
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 272us/step - accuracy: 0.7264 - loss: 0.5599
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273us/step - accuracy: 0.7281 - loss: 0.5574
Epoch 9/100
[1m

[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 281us/step - accuracy: 0.7323 - loss: 0.5524
Epoch 68/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277us/step - accuracy: 0.7331 - loss: 0.5448
Epoch 69/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273us/step - accuracy: 0.7306 - loss: 0.5521
Epoch 70/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 282us/step - accuracy: 0.7320 - loss: 0.5504
Epoch 71/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 287us/step - accuracy: 0.7348 - loss: 0.5483
Epoch 72/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275us/step - accuracy: 0.7266 - loss: 0.5549
Epoch 73/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277us/step - accuracy: 0.7335 - loss: 0.5495
Epoch 74/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273us/step - accuracy: 0.7286 - loss: 0.5529
Epoch 75/100
[1m804/

In [19]:
# Plot loss and accuracy over time.
v_df = pd.DataFrame(fit_1.history)
v_df.index += 1

plot = v_df.hvplot.line(y = 'loss', color = 'firebrick') + v_df.hvplot.line(y = 'accuracy', color = 'cornflowerblue')

hvplot.save(plot, "/private/var/christina/Downloads/model_1_metrics.png")

plot

In [20]:
# Evaluate the model.
model_1_loss, model_1_acc = nn_model_1.evaluate(X_test_sc, y_test, verbose = 2)
print(
f"""
Model 1 Metrics
--------------------------------------------------
Loss: {model_1_loss}
Accuracy: {model_1_acc}
"""
)

268/268 - 0s - 437us/step - accuracy: 0.7292 - loss: 0.5573

Model 1 Metrics
--------------------------------------------------
Loss: 0.5573484897613525
Accuracy: 0.7292128205299377



# Model 2

In [21]:
# Now, going to create the NN model.
nn_model_2 = tf.keras.models.Sequential()

# First layer.
nn_model_2.add(tf.keras.layers.Dense(units = 5, activation = 'relu', input_dim = len(X.columns)))

# Second Layer.
nn_model_2.add(tf.keras.layers.Dense(units = 3, activation = 'relu'))

# Output layer (uses a Sigmoid activation for probability).
nn_model_2.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Summarize this.
nn_model_2.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
# Compile and fit model.
nn_model_2.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = ["accuracy"])

# Custom callback.
custom_callback = MyModelCheckpoint(
    filepath = 'model_checkpoints/nn_2_weights_{epoch:02d}.weights.h5',
    save_weights_only = True,
    save_freq = 'epoch',
)

# Fit that model!
fit_2 = nn_model_2.fit(
    X_train_sc,
    y_train,
    epochs = 100,
    callbacks = [custom_callback]
)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 301us/step - accuracy: 0.5604 - loss: 0.7481
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300us/step - accuracy: 0.7171 - loss: 0.5977
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312us/step - accuracy: 0.7180 - loss: 0.5817
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 309us/step - accuracy: 0.7216 - loss: 0.5776
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 314us/step - accuracy: 0.7224 - loss: 0.5722
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302us/step - accuracy: 0.7242 - loss: 0.5655
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 308us/step - accuracy: 0.7270 - loss: 0.5624
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 298us/step - accuracy: 0.7275 - loss: 0.5630
Epoch 9/100
[1m

[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302us/step - accuracy: 0.7313 - loss: 0.5507
Epoch 68/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 296us/step - accuracy: 0.7319 - loss: 0.5517
Epoch 69/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295us/step - accuracy: 0.7311 - loss: 0.5518
Epoch 70/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 308us/step - accuracy: 0.7298 - loss: 0.5548
Epoch 71/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303us/step - accuracy: 0.7314 - loss: 0.5532
Epoch 72/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297us/step - accuracy: 0.7302 - loss: 0.5520
Epoch 73/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293us/step - accuracy: 0.7303 - loss: 0.5533
Epoch 74/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305us/step - accuracy: 0.7302 - loss: 0.5548
Epoch 75/100
[1m804/

In [23]:
# Plot loss and accuracy over time.
v_df = pd.DataFrame(fit_2.history)
v_df.index += 1

plot = v_df.hvplot.line(y = 'loss', color = 'firebrick') + v_df.hvplot.line(y = 'accuracy', color = 'cornflowerblue')

hvplot.save(plot, "/private/var/christina/Downloads/model_2_metrics.png")

plot

In [24]:
# Evaluate the model.
model_2_loss, model_2_acc = nn_model_2.evaluate(X_test_sc, y_test, verbose = 2)
print(
f"""
Model 1 Metrics
--------------------------------------------------
Loss: {model_2_loss}
Accuracy: {model_2_acc}
"""
)

268/268 - 0s - 461us/step - accuracy: 0.7287 - loss: 0.5575

Model 1 Metrics
--------------------------------------------------
Loss: 0.5574648976325989
Accuracy: 0.7287463545799255



# Model 3

In [25]:
# Now, going to create the NN model.
nn_model_3 = tf.keras.models.Sequential()

# First layer.
nn_model_3.add(tf.keras.layers.Dense(units = 100, activation = 'tanh', input_dim = len(X.columns)))

# Second Layer.
nn_model_3.add(tf.keras.layers.Dense(units = 100, activation = 'tanh'))

# Third Layer.
nn_model_3.add(tf.keras.layers.Dense(units = 100, activation = 'tanh'))

# Output layer (uses a Sigmoid activation for probability).
nn_model_3.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Summarize this.
nn_model_3.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
# Compile and fit model.
nn_model_3.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = ["accuracy"])

# Custom callback.
custom_callback = MyModelCheckpoint(
    filepath = 'model_checkpoints/nn_4_weights_{epoch:02d}.weights.h5',
    save_weights_only = True,
    save_freq = 'epoch',
)

# Fit that model!
fit_3 = nn_model_3.fit(
    X_train_sc,
    y_train,
    epochs = 100,
    callbacks = [custom_callback]
)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 457us/step - accuracy: 0.7041 - loss: 0.5921
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 457us/step - accuracy: 0.7229 - loss: 0.5696
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 442us/step - accuracy: 0.7236 - loss: 0.5649
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 448us/step - accuracy: 0.7290 - loss: 0.5572
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 449us/step - accuracy: 0.7307 - loss: 0.5550
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 442us/step - accuracy: 0.7351 - loss: 0.5469
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 448us/step - accuracy: 0.7308 - loss: 0.5524
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 441us/step - accuracy: 0.7287 - loss: 0.5531
Epoch 9/100
[1m

[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 439us/step - accuracy: 0.7477 - loss: 0.5313
Epoch 68/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 456us/step - accuracy: 0.7401 - loss: 0.5364
Epoch 69/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 436us/step - accuracy: 0.7413 - loss: 0.5346
Epoch 70/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 444us/step - accuracy: 0.7399 - loss: 0.5347
Epoch 71/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 442us/step - accuracy: 0.7384 - loss: 0.5401
Epoch 72/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 438us/step - accuracy: 0.7438 - loss: 0.5333
Epoch 73/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 436us/step - accuracy: 0.7395 - loss: 0.5368
Epoch 74/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 443us/step - accuracy: 0.7392 - loss: 0.5376
Epoch 75/100
[1m804/

In [27]:
# Plot loss and accuracy over time.
v_df = pd.DataFrame(fit_3.history)
v_df.index += 1

plot = v_df.hvplot.line(y = 'loss', color = 'firebrick') + v_df.hvplot.line(y = 'accuracy', color = 'cornflowerblue')

hvplot.save(plot, "/private/var/christina/Downloads/model_3_metrics.png")

plot

In [28]:
# Evaluate the model.
model_3_loss, model_3_acc = nn_model_3.evaluate(X_test_sc , y_test, verbose = 2)
print(
f"""
Model 1 Metrics
--------------------------------------------------
Loss: {model_3_loss}
Accuracy: {model_3_acc}
"""
)

268/268 - 0s - 492us/step - accuracy: 0.7264 - loss: 0.5658

Model 1 Metrics
--------------------------------------------------
Loss: 0.5657696723937988
Accuracy: 0.7264139652252197



# Model 4

In [29]:
from keras.models import Sequential
from keras.layers import Dense, Input

In [30]:
# Function to use for hyperparameter tuner.
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow the tuner to choose which activation function to use.
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow the tuner to decide the number of nodes in the first layer.
    nn_model.add(
        tf.keras.layers.Dense(
            units = hp.Int(
                'first_units',
                min_value = 1,
                max_value = 10,
                step = 2),
            activation = activation,
            input_dim = len(X.columns)))

    # Allow the tuner to flesh out the rest of the model at will.
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))

    # Make the output layer.
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    return nn_model

In [31]:
tuner = kt.Hyperband(
    create_model,
    objective = 'val_accuracy',
    max_epochs = 25,
    hyperband_iterations = 2
)

Reloading Tuner from ./untitled_project/tuner0.json


In [32]:
tuner.search(
    X_train_sc,
    y_train,
    epochs = 25,
    validation_data = (X_test_sc, y_test)
)


In [33]:
# Get the best model from the tuner.
best_param = tuner.get_best_hyperparameters(1)[0]
print(best_param.values)

# Evaluate the best model against the test data.
nn_model_4 = tuner.get_best_models(1)[0]
model_4_loss, model_4_acc = nn_model_4.evaluate(X_test_sc, y_test, verbose = 2)
print(
f"""
Model 1 Metrics
--------------------------------------------------
Loss: {model_4_loss}
Accuracy: {model_4_acc}
"""
)

# Custom callback.
custom_callback = MyModelCheckpoint(
    filepath = 'model_checkpoints/nn_5_weights_{epoch:02d}.weights.h5',
    save_weights_only = True,
    save_freq = 'epoch',
)

# Fit that model!
fit_4 = nn_model_4.fit(
    X_train_sc,
    y_train,
    epochs = 100,
    callbacks = [custom_callback]
)

{'activation': 'tanh', 'first_units': 5, 'num_layers': 5, 'units_0': 3, 'units_1': 5, 'units_2': 5, 'units_3': 3, 'units_4': 7, 'units_5': 5, 'tuner/epochs': 25, 'tuner/initial_epoch': 9, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0053'}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


268/268 - 0s - 600us/step - accuracy: 0.7312 - loss: 0.5603

Model 1 Metrics
--------------------------------------------------
Loss: 0.5602715015411377
Accuracy: 0.731195330619812

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 401us/step - accuracy: 0.7280 - loss: 0.5583
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 391us/step - accuracy: 0.7312 - loss: 0.5542
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 391us/step - accuracy: 0.7310 - loss: 0.5544
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 396us/step - accuracy: 0.7321 - loss: 0.5541
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 408us/step - accuracy: 0.7306 - loss: 0.5550
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 393us/step - accuracy: 0.7286 - loss: 0.5557
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39

[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 427us/step - accuracy: 0.7272 - loss: 0.5577
Epoch 66/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 395us/step - accuracy: 0.7313 - loss: 0.5489
Epoch 67/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 393us/step - accuracy: 0.7313 - loss: 0.5511
Epoch 68/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 399us/step - accuracy: 0.7302 - loss: 0.5517
Epoch 69/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 392us/step - accuracy: 0.7325 - loss: 0.5499
Epoch 70/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 403us/step - accuracy: 0.7357 - loss: 0.5484
Epoch 71/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 398us/step - accuracy: 0.7278 - loss: 0.5543
Epoch 72/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 392us/step - accuracy: 0.7315 - loss: 0.5500
Epoch 73/100
[1m804/

In [34]:
# Plot loss and accuracy over time.
v_df = pd.DataFrame(fit_4.history)
v_df.index += 1

plot = v_df.hvplot.line(y = 'loss', color = 'firebrick') + v_df.hvplot.line(y = 'accuracy', color = 'cornflowerblue')

hvplot.save(plot, "/private/var/christina/Downloads/model_4_metrics.png")

plot


In [35]:
# Metrics for updated model 5.
model_4_loss, model_4_acc = nn_model_4.evaluate(X_test_sc, y_test, verbose = 2)
print(
f"""
Model 1 Metrics
--------------------------------------------------
Loss: {model_4_loss}
Accuracy: {model_4_acc}
"""
)

268/268 - 0s - 322us/step - accuracy: 0.7311 - loss: 0.5593

Model 1 Metrics
--------------------------------------------------
Loss: 0.5592790842056274
Accuracy: 0.7310787439346313



# Model 5

In [116]:
# Now, going to create the NN model.
nn_model_5 = tf.keras.models.Sequential()

# First layer.
nn_model_5.add(tf.keras.layers.Dense(units = 10, activation = 'relu', input_dim = len(X.columns)))

# Second layer.
nn_model_5.add(tf.keras.layers.Dense(units = 8, activation = 'relu'))

# Third layer.
nn_model_5.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))

# Fourth layer.
nn_model_5.add(tf.keras.layers.Dense(units = 4, activation = 'relu'))

# Fifth layer.
nn_model_5.add(tf.keras.layers.Dense(units = 2, activation = 'relu'))
                                     
# Output layer (uses a Sigmoid activation for probability).
nn_model_5.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Summarize this.
nn_model_5.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [127]:
# Custom callback.
custom_callback = MyModelCheckpoint(
    filepath = 'model_checkpoints/nn_5_weights_{epoch:02d}.weights.h5',
    save_weights_only = True,
    save_freq = 'epoch',
)

In [128]:
# Compile and fit model.
nn_model_5.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = ["accuracy"])

# Fit that model!
fit_5 = nn_model_5.fit(
    X_train_sc,
    y_train,
    epochs = 100,
    callbacks = [custom_callback]
)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 377us/step - accuracy: 0.7396 - loss: 0.5388
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375us/step - accuracy: 0.7355 - loss: 0.5439
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 368us/step - accuracy: 0.7356 - loss: 0.5456
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 367us/step - accuracy: 0.7375 - loss: 0.5447
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 409us/step - accuracy: 0.7351 - loss: 0.5441
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 367us/step - accuracy: 0.7384 - loss: 0.5402
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 368us/step - accuracy: 0.7341 - loss: 0.5475
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 377us/step - accuracy: 0.7360 - loss: 0.5433
Epoch 9/100
[1m

[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 381us/step - accuracy: 0.7347 - loss: 0.5439
Epoch 68/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382us/step - accuracy: 0.7389 - loss: 0.5407
Epoch 69/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 377us/step - accuracy: 0.7376 - loss: 0.5404
Epoch 70/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 396us/step - accuracy: 0.7394 - loss: 0.5398
Epoch 71/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385us/step - accuracy: 0.7370 - loss: 0.5402
Epoch 72/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382us/step - accuracy: 0.7327 - loss: 0.5473
Epoch 73/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 378us/step - accuracy: 0.7366 - loss: 0.5409
Epoch 74/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 373us/step - accuracy: 0.7392 - loss: 0.5428
Epoch 75/100
[1m804/

In [129]:
# Plot loss and accuracy over time.
v_df = pd.DataFrame(fit_5.history)
v_df.index += 1

plot = v_df.hvplot.line(y = 'loss', color = 'firebrick') + v_df.hvplot.line(y = 'accuracy', color = 'cornflowerblue')

hvplot.save(plot, "/private/var/christina/Downloads/model_5_metrics.png")

plot

In [130]:
# Evaluate the model.
model_5_loss, model_5_acc = nn_model_5.evaluate(X_test_sc, y_test, verbose = 2)
print(
f"""
Model 5 Metrics
--------------------------------------------------
Loss: {model_5_loss}
Accuracy: {model_5_acc}
"""
)

268/268 - 0s - 454us/step - accuracy: 0.7303 - loss: 0.5604

Model 5 Metrics
--------------------------------------------------
Loss: 0.5604257583618164
Accuracy: 0.7302623987197876

