# Import Libraries and Data

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

import sys
# Importing files from src directory
sys.path.append('../')
from src.create_features import create_feature_matrix
from src.reduce_features import get_top_features

In [2]:
# File Paths
DATA_PATH = '../data'
MODEL_PATH = '../src/models'

feature_matrix_path = f'{DATA_PATH}/processed/HOLDOUT_feature_matrix.csv'
top_feature_matrix_path = f'{DATA_PATH}/processed/HOLDOUT_SNN_feature_matrix.csv'
prediction_path = f'{DATA_PATH}/processed/holdout_predictions_group1.csv'

In [3]:
# Import Datasets
cons = pd.read_parquet(f'{DATA_PATH}/raw/q2_consDF_HOLDOUT_notags_final.pqt')
acct = pd.read_parquet(f'{DATA_PATH}/raw/q2_acctDF_HOLDOUT_final.pqt')
inflows = pd.read_parquet(f'{DATA_PATH}/raw/q2_inflows_HOLDOUT_final.pqt')
outflows = pd.read_parquet(f'{DATA_PATH}/raw/q2_outflows_HOLDOUT_final.pqt')

In [4]:
# Additional Util Functions to import/create data
def get_feature_matrix():
    try:
        feature_matrix = pd.read_csv(feature_matrix_path)
    except:
        feature_matrix = create_feature_matrix(cons, acct, inflows, outflows)
        feature_matrix.to_csv(feature_matrix_path, index=False)
    return feature_matrix

def get_top_feature_matrix():
    try:
        top_feature_matrix = pd.read_csv(top_feature_matrix_path)
    except:
        feature_matrix = get_feature_matrix()
        include_y = False
        top_feature_matrix = get_top_features(include_y, feature_matrix)
        top_feature_matrix.to_csv(top_feature_matrix_path, index=False)
    return top_feature_matrix

In [5]:
feature_matrix = get_top_feature_matrix()
feature_matrix.head()

Unnamed: 0,prism_consumer_id,maxSELF_TRANSFER,maxACCOUNT_FEES,maxGIFTS_DONATIONS,maxEDUCATION,minHEALTHCARE_MEDICAL,maxOVERDRAFT,SELF_TRANSFER_mean,maxAUTOMOTIVE,EDUCATION_slp,...,SELF_TRANSFER_count,PRR,medianMORTGAGE,unsufficient_balance,maxGENERAL_MERCHANDISE,ATM_CASH_prop,FOOD_AND_BEVERAGES_mean,ENTERTAINMENT_count,BILLS_UTILITIES_slp,INSURANCE_count
0,1568,600.0,34.0,0.0,0.0,7.58,0.0,109.85409,150.0,0.0,...,6.0,0.384615,0.0,5,130.19,0.100398,20.73514,1.8,0.0,1.0
1,598,1500.0,0.0,100.0,0.0,4.81,0.0,925.0,318.6,0.0,...,1.0,0.615385,0.0,2,106.99,0.038417,50.494167,1.428571,0.0,1.166667
2,1489,12000.0,104.95,0.0,0.0,4.36,0.0,2274.358974,553.5,0.0,...,3.230769,0.384615,0.0,13,5717.36,0.065047,172.21775,1.0,0.0,2.583333
3,821,14000.0,69.0,294.0,0.0,1.99,0.0,799.877408,52.89,0.0,...,7.0,0.538462,0.0,6,400.0,0.0,33.156015,2.555556,0.0,0.0
4,50,0.0,38.7,0.0,0.0,3.0,0.0,0.0,985.84,0.0,...,0.0,0.461538,0.0,5,521.55,0.075944,24.168468,1.769231,0.0,3.307692


In [6]:
X_tensor = torch.tensor(
    feature_matrix.iloc[:,1:].to_numpy(), 
    dtype=torch.float32
)

# Define Model

In [7]:
model = nn.Sequential(
        nn.Linear(X_tensor.shape[1], 12),
        nn.ReLU(),
        nn.Linear(12, 24),
        nn.ReLU(),
        nn.Linear(24, 24),
        nn.ReLU(),
        nn.Linear(24, 12),
        nn.ReLU(),
        nn.Linear(12, 6),
        nn.ReLU(),
        nn.Linear(6, 1),
        nn.Sigmoid()
    )
model.load_state_dict(torch.load(f'{MODEL_PATH}/nn_base_params.pt'))

<All keys matched successfully>

# Make Prediction

In [8]:
pred_prob = model(X_tensor).detach().numpy()[:,0]

In [9]:
pred_df = pd.DataFrame({
    'prism_consumer_id':feature_matrix['prism_consumer_id'], 
    'prediction':pred_prob}
)
pred_df.head()

Unnamed: 0,prism_consumer_id,prediction
0,1568,0.01989167
1,598,0.0
2,1489,0.0
3,821,3.690235e-31
4,50,0.0


In [10]:
pred_df.to_csv(prediction_path, index=False)