In [2]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pandas as pd
import tensorflow as tf

# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

In [4]:
# Import our input dataset
mortgage_df = pd.read_csv('Final Project Data.csv')
mortgage_df.head()

Unnamed: 0,Loan Identifier,Origination Channel,Seller Name,Original Interest Rate,Original UPB,Original Loan Term,Origination Date,First Payment Date,Original Loan-to-value (LTV),Original Combined Loan-to-value (CLTV),...,Property Type,Number of Units,Occupancy Type,Property State,Zip Code Short,Primary Mortgage Insurance Percent,Product Type,Co-borrower Credit Score at Origination,Mortgage Insurance Type,Relocation Mortgage Indicator
0,100000913397,C,"JPMORGAN CHASE BANK, NATIONAL ASSOCIATION",6,324000,360,2018-09-01,2018-11-01,80,80,...,PU,1,P,CA,925,,FRM,665.0,,N
1,100017539727,B,OTHER,5,307000,360,2018-12-01,2019-02-01,90,90,...,PU,1,P,TX,770,25.0,FRM,,1.0,N
2,100018053040,R,"JPMORGAN CHASE BANK, NATIONAL ASSOCIATION",5,256000,360,2018-11-01,2019-01-01,90,90,...,SF,1,S,NC,286,25.0,FRM,738.0,1.0,N
3,100019764317,C,"WELLS FARGO BANK, N.A.",5,248000,360,2018-12-01,2019-02-01,90,90,...,SF,1,P,IL,600,25.0,FRM,791.0,1.0,N
4,100019765730,B,"UNITED SHORE FINANCIAL SERVICES, LLC DBA UNITE...",4,490000,360,2019-03-01,2019-05-01,67,67,...,CO,1,P,CA,945,,FRM,,,N


In [5]:
# Drop the unwanted variables from the data
mortgage_input = mortgage_df.drop(columns = ['Loan Identifier'])
mortgage_input.head()

Unnamed: 0,Origination Channel,Seller Name,Original Interest Rate,Original UPB,Original Loan Term,Origination Date,First Payment Date,Original Loan-to-value (LTV),Original Combined Loan-to-value (CLTV),Number of Borrowers,...,Property Type,Number of Units,Occupancy Type,Property State,Zip Code Short,Primary Mortgage Insurance Percent,Product Type,Co-borrower Credit Score at Origination,Mortgage Insurance Type,Relocation Mortgage Indicator
0,C,"JPMORGAN CHASE BANK, NATIONAL ASSOCIATION",6,324000,360,2018-09-01,2018-11-01,80,80,2,...,PU,1,P,CA,925,,FRM,665.0,,N
1,B,OTHER,5,307000,360,2018-12-01,2019-02-01,90,90,1,...,PU,1,P,TX,770,25.0,FRM,,1.0,N
2,R,"JPMORGAN CHASE BANK, NATIONAL ASSOCIATION",5,256000,360,2018-11-01,2019-01-01,90,90,2,...,SF,1,S,NC,286,25.0,FRM,738.0,1.0,N
3,C,"WELLS FARGO BANK, N.A.",5,248000,360,2018-12-01,2019-02-01,90,90,2,...,SF,1,P,IL,600,25.0,FRM,791.0,1.0,N
4,B,"UNITED SHORE FINANCIAL SERVICES, LLC DBA UNITE...",4,490000,360,2019-03-01,2019-05-01,67,67,1,...,CO,1,P,CA,945,,FRM,,,N


In [6]:
# Generate our categorical variable list
mortgage_cat = mortgage_input.dtypes[mortgage_input.dtypes == "object"].index.tolist()
mortgage_cat

['Origination Channel',
 'Seller Name',
 'Origination Date',
 'First Payment Date',
 'First Time Home Buyer Indicator',
 'Loan Purpose ',
 'Property Type',
 'Occupancy Type',
 'Property State',
 'Product Type',
 'Relocation Mortgage Indicator']

In [7]:
# Check the number of unique values in each column
mortgage_input[mortgage_cat].nunique()

Origination Channel                 3
Seller Name                        17
Origination Date                   27
First Payment Date                 27
First Time Home Buyer Indicator     2
Loan Purpose                        3
Property Type                       5
Occupancy Type                      3
Property State                     54
Product Type                        1
Relocation Mortgage Indicator       2
dtype: int64

In [8]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(mortgage_input[mortgage_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(mortgage_cat)
encode_df.head()

Unnamed: 0,Origination Channel_B,Origination Channel_C,Origination Channel_R,"Seller Name_AMERIHOME MORTGAGE COMPANY, LLC",Seller Name_DITECH FINANCIAL LLC,"Seller Name_EAGLE HOME MORTGAGE, LLC",Seller Name_FAIRWAY INDEPENDENT MORTGAGE CORPORATION,"Seller Name_FLAGSTAR BANK, FSB",Seller Name_FREEDOM MORTGAGE CORP.,"Seller Name_JPMORGAN CHASE BANK, NATIONAL ASSOCIATION",...,Property State_VA,Property State_VI,Property State_VT,Property State_WA,Property State_WI,Property State_WV,Property State_WY,Product Type_FRM,Relocation Mortgage Indicator_N,Relocation Mortgage Indicator_Y
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0


In [18]:
# Merge one-hot encoded features and drop the originals
mortgage_merged = mortgage_input.merge(encode_df,left_index=True, right_index=True)
mortgage_merged = mortgage_merged.drop(mortgage_cat,1)
mortgage_merged.head()

Unnamed: 0,Original Interest Rate,Original UPB,Original Loan Term,Original Loan-to-value (LTV),Original Combined Loan-to-value (CLTV),Number of Borrowers,Original Debt to Income Ratio,Borrower Credit Score at Origination,Number of Units,Zip Code Short,...,Property State_VI,Property State_VT,Property State_WA,Property State_WI,Property State_WV,Property State_WY,Product Type_FRM,Relocation Mortgage Indicator_N,Relocation Mortgage Indicator_Y,Foreclosure
0,6,324000,360,80,80,2,49.0,692.0,1,925,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
1,5,307000,360,90,90,1,44.0,722.0,1,770,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
2,5,256000,360,90,90,2,41.0,728.0,1,286,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
3,5,248000,360,90,90,2,40.0,730.0,1,600,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
4,4,490000,360,67,67,1,35.0,727.0,1,945,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False


In [19]:
import random

def decision(probability):
    return random.random() < probability

foreclosure_rng = [decision(0.066) for i in range(len(mortgage_merged.index))]
foreclosure_rng

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,

In [21]:
mortgage_merged["Foreclosure"] = foreclosure_rng
mortgage_merged

Unnamed: 0,Original Interest Rate,Original UPB,Original Loan Term,Original Loan-to-value (LTV),Original Combined Loan-to-value (CLTV),Number of Borrowers,Original Debt to Income Ratio,Borrower Credit Score at Origination,Number of Units,Zip Code Short,...,Property State_VI,Property State_VT,Property State_WA,Property State_WI,Property State_WV,Property State_WY,Product Type_FRM,Relocation Mortgage Indicator_N,Relocation Mortgage Indicator_Y,Foreclosure
0,6,324000,360,80,80,2,49.0,692.0,1,925,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
1,5,307000,360,90,90,1,44.0,722.0,1,770,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
2,5,256000,360,90,90,2,41.0,728.0,1,286,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
3,5,248000,360,90,90,2,40.0,730.0,1,600,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
4,4,490000,360,67,67,1,35.0,727.0,1,945,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
297447,5,250000,360,80,80,1,42.0,753.0,1,282,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False
297448,5,123000,360,95,95,1,34.0,690.0,1,799,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,True
297449,5,280000,240,69,69,1,40.0,760.0,1,531,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,False
297450,5,155000,360,97,97,1,36.0,781.0,1,936,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,False


In [22]:
# Split our preprocessed data into our features and target arrays
y = mortgage_merged["Foreclosure"].values
X = mortgage_merged.drop(["Foreclosure"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [23]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [24]:
# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 10
hidden_nodes_layer2 = 6
activ_func = "tanh"

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation=activ_func)
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation=activ_func))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                1580      
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 66        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 7         
Total params: 1,653
Trainable params: 1,653
Non-trainable params: 0
_________________________________________________________________


In [25]:
# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/cp.{acc:.4f}-{epoch:02d}.hdf5"

In [26]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="nadam", metrics=["accuracy"])

# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='accuracy',
    verbose=1,
    #save_best_only=True,
    save_weights_only=False,
    save_freq='epoch')

# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/100
Epoch 00001: saving model to checkpoints/cp.0.9344-01.hdf5
Epoch 2/100
Epoch 00002: saving model to checkpoints/cp.0.9344-02.hdf5
Epoch 3/100
Epoch 00003: saving model to checkpoints/cp.0.9344-03.hdf5
Epoch 4/100
Epoch 00004: saving model to checkpoints/cp.0.9344-04.hdf5
Epoch 5/100
Epoch 00005: saving model to checkpoints/cp.0.9344-05.hdf5
Epoch 6/100
Epoch 00006: saving model to checkpoints/cp.0.9344-06.hdf5
Epoch 7/100
Epoch 00007: saving model to checkpoints/cp.0.9344-07.hdf5
Epoch 8/100

KeyboardInterrupt: 