This notebook will adjust all dollar amounts by inflation rates, and convert loan interest rates to be relative to the prime rate at the time of loan issuance.

In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from pathlib import Path
pd.set_option('display.max_columns', 50)

In [2]:
data_path = 'data/'

In [3]:
train_df = pd.read_pickle(data_path+'api_train_df.pkl')
test_df = pd.read_pickle(data_path+'api_test_df.pkl')

To help with generalizing over time, convert income and payment amounts to inflation adjusted amounts. Also to generalize to different interest rate envornments over time, convert interest rates to interest relative to the prime rate

Prime rates and inflation rates downloaded from st louis Fed.

prime rate data: https://fred.stlouisfed.org/series/MPRIME
inflation rates: https://fred.stlouisfed.org/series/CPIAUCSL

In [4]:
# load prime rate
prime_rate = pd.read_csv(data_path+'MPRIME.csv')
prime_rate['DATE'] = pd.to_datetime(prime_rate['DATE'])

train_df = train_df.merge(prime_rate, left_on='issue_d', right_on='DATE')
test_df = test_df.merge(prime_rate, left_on='issue_d', right_on='DATE')

train_df['issue_m'] = train_df['issue_d'].dt.month
test_df['issue_m'] = test_df['issue_d'].dt.month

train_df['int_rate'] = train_df['int_rate'] - train_df['MPRIME']
test_df['int_rate'] = test_df['int_rate'] - test_df['MPRIME']


In [5]:
to_adj = ['annual_inc','loan_amnt','installment',
          'revol_bal', 'avg_cur_bal','bc_open_to_buy',
          'total_il_high_credit_limit','total_bc_limit',
          'total_rev_hi_lim','tot_hi_cred_lim','total_bal_ex_mort',
          'tot_cur_bal','tot_coll_amt','delinq_amnt']

In [6]:
# load inflation info
inflation = pd.read_csv(data_path+'CPIAUCSL.csv')
inflation['DATE'] = pd.to_datetime(inflation['DATE'])

# use prev month inflation because current month inflation will not be available at time of investment
# number provided is CPI index benchmarked to past date
inflation['prev_mo_cpi'] = inflation['CPIAUCSL'].shift()

train_df = train_df.merge(inflation, left_on='issue_d', right_on='DATE')
test_df = test_df.merge(inflation, left_on='issue_d', right_on='DATE')

In [7]:
inflation_adj_2016 = inflation[inflation['DATE']=='2016-01-01']['CPIAUCSL'].values[0]

In [8]:
inflation_adj_2016

237.833

In [9]:
# adjust all dollar amounts to be relative to jan 2016
for val in to_adj:
    train_df[val] = train_df[val]/train_df['prev_mo_cpi']
    train_df[val] = train_df[val]*inflation_adj_2016
    train_df[val] = train_df[val].round(decimals=2)
    
    test_df[val] = test_df[val]/test_df['prev_mo_cpi']
    test_df[val] = test_df[val]*inflation_adj_2016
    test_df[val] = test_df[val].round(decimals=2)

In [10]:
train_df.head()

Unnamed: 0,id,member_id,loan_amnt,term,int_rate,installment,grade,sub_grade,emp_length,home_ownership,annual_inc,verification_status,desc,purpose,zip_code,addr_state,initial_list_status,emp_title,acc_now_delinq,acc_open_past_24mths,bc_open_to_buy,percent_bc_gt_75,bc_util,dti,delinq_2yrs,...,sec_app_revol_util,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,revol_bal_joint,open_act_il,sec_app_open_act_il,issue_d,loan_status,total_pymnt,total_rec_int,total_rec_late_fee,total_rec_prncp,recoveries,collection_recovery_fee,last_pymnt_d,loan_length,returns,DATE_x,MPRIME,issue_m,DATE_y,CPIAUCSL,prev_mo_cpi
0,10129506,,21131.68,36,10.28,717.42,B,B5,10+ years,RENT,82799.61,Verified,Borrower added on 12/31/13 > My goal is to p...,debt_consolidation,100xx,NY,f,Operations Manager,0.0,9.0,6919.61,50.0,54.6,16.73,0.0,...,,,,,,,,,2013-12-01,Fully Paid,23926.640008,3126.64,0.0,20800.0,0.0,0.0,May-2015,516.0,0.104131,2013-12-01,3.25,12,2013-12-01,234.719,234.1
1,10148122,,12191.35,36,4.37,379.9,A,A3,3 years,MORTGAGE,98038.81,Not Verified,Borrower added on 12/31/13 > Bought a new ho...,debt_consolidation,782xx,TX,f,Systems Engineer,0.0,4.0,2479.92,100.0,83.5,12.61,0.0,...,,,,,,,,,2013-12-01,Fully Paid,13397.539998,1397.54,0.0,12000.0,0.0,0.0,Jun-2016,913.0,0.045026,2013-12-01,3.25,12,2013-12-01,234.719,234.1
2,10149342,,27481.34,36,7.74,899.58,B,B2,10+ years,OWN,55877.04,Verified,Borrower added on 12/31/13 > Combining high ...,debt_consolidation,481xx,MI,w,Team Leadern Customer Ops & Systems,0.0,3.0,16735.68,25.0,53.9,22.87,0.0,...,,,,,,,,,2013-12-01,Fully Paid,31752.53,4702.53,0.0,27050.0,0.0,0.0,Jul-2016,943.0,0.064005,2013-12-01,3.25,12,2013-12-01,234.719,234.1
3,10129454,,12191.35,36,7.74,399.07,B,B2,4 years,RENT,60956.77,Not Verified,Borrower added on 12/31/13 > I would like to...,debt_consolidation,281xx,NC,f,Project Manager,0.0,8.0,15458.64,0.0,15.9,4.62,0.0,...,,,,,,,,,2013-12-01,Fully Paid,13988.609996,1988.61,0.0,12000.0,0.0,0.0,Apr-2016,852.0,0.067896,2013-12-01,3.25,12,2013-12-01,234.719,234.1
4,10149488,,4876.54,36,7.74,159.64,B,B2,2 years,MORTGAGE,40231.47,Source Verified,Borrower added on 12/31/13 > Just bought a h...,home_improvement,782xx,TX,w,Surgical Technician,0.0,0.0,21907.86,0.0,16.1,2.49,0.0,...,,,,,,,,,2013-12-01,Fully Paid,5157.519457,357.52,0.0,4800.0,0.0,0.0,Sep-2014,274.0,0.100428,2013-12-01,3.25,12,2013-12-01,234.719,234.1


In [11]:
train_df.drop(columns=['DATE_x','DATE_y'], inplace=True)
test_df.drop(columns=['DATE_x','DATE_y'], inplace=True)

In [12]:
train_df.to_pickle(data_path+'train_adj.pkl')
test_df.to_pickle(data_path+'test_adj.pkl')