# Portfolio Analysis for ROI CatBoost portfolio

## Set up data set

In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.append('../lending_club') # path the the directory
import config
from model_prep import divide_by_term, split_data

In [2]:
accepted_loans = pd.read_parquet(config.DATAPATH / 'approved.parquet', engine='fastparquet')
accepted_60 = divide_by_term(accepted_loans, term_length=60)
accepted_36 = divide_by_term(accepted_loans, term_length=36)
accepted_60 = accepted_60.loc[:, config.SELECTED_FEATURES + [config.TARGET_COL]]
accepted_36 = accepted_36.loc[:, config.SELECTED_FEATURES + [config.TARGET_COL]]

## CatBoost Modeling 

In [3]:
from catboost import CatBoostClassifier

In [4]:
best_60 = CatBoostClassifier().load_model('CatBoost60.model')
best_36 = CatBoostClassifier().load_model('CatBoost36.model')

## ROI on the Entire Portfolio

In [6]:
accepted_60['prediction'] = best_60.predict(accepted_60.drop(['addr_state',
                                                              'emp_title',
                                                               config.TARGET_COL], axis=1))
accepted_36['prediction'] = best_36.predict(accepted_36.drop(['addr_state',
                                                              'emp_title',
                                                               config.TARGET_COL], axis=1))
combined = pd.concat([accepted_60,accepted_36],axis=0)

In [10]:
portfolio_df = combined.loc[combined['prediction']==1,:]
PnL_series = accepted_loans.PnL
portfolio_df = portfolio_df.merge(PnL_series, how='inner', left_index=True, right_index=True)
print(portfolio_df.PnL.sum()/portfolio_df.loan_amnt.sum())

0.12441867617314067


In [11]:
portfolio_df.to_pickle(config.DATAPATH / 'accepted_portfolio.pickle')