# Portfolio Analysis for ROI of the 36 month portfolio

## Set up data set

In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.append('../lending_club') # path the the directory
import config
from model_prep import divide_by_term, split_data

In [2]:
accepted_loans = pd.read_parquet(config.DATAPATH / 'approved.parquet', engine='fastparquet')
accepted_36 = divide_by_term(accepted_loans,term_length=36)
accepted_36 = accepted_36.loc[:, config.SELECTED_FEATURES + [config.TARGET_COL]]

In [3]:
X_train, X_test, y_train, y_test = split_data(accepted_36.drop(['emp_title','addr_state'],axis=1), random_state=42)

In [4]:
dummify_vars = config.VARS_TO_DUMMIFY.copy()

## CatBoost Modeling 

In [5]:
from catboost import CatBoostClassifier

In [6]:
best_cbc = CatBoostClassifier().load_model('CatBoost36.model')

## Confusion Matrix

In [7]:
from sklearn.metrics import confusion_matrix,classification_report
confusion_matrix(y_test,best_cbc.predict(X_test))

array([[ 12339,  12842],
       [ 50129, 108626]], dtype=int64)

In [8]:
confusion_matrix(y_train,best_cbc.predict(X_train))

array([[ 44269,  14786],
       [101979, 268150]], dtype=int64)

In [9]:
print(classification_report(y_test,best_cbc.predict(X_test)))

              precision    recall  f1-score   support

           0       0.20      0.49      0.28     25181
           1       0.89      0.68      0.78    158755

    accuracy                           0.66    183936
   macro avg       0.55      0.59      0.53    183936
weighted avg       0.80      0.66      0.71    183936



## ROI on the Train Portfolio

In [14]:
from model_analysis import calculate_PnL_return
print(round(calculate_PnL_return(best_cbc,X_train,y_train,accepted_loans.PnL)*100,2))

11.57


## ROI on the Test Portfolio

In [11]:
print(round(calculate_PnL_return(best_cbc,X_test,y_test,accepted_loans.PnL)*100,2))

8.7


## ROI on the Entire Portfolio

In [13]:
print(round(calculate_PnL_return(best_cbc,
                                 pd.concat([X_train,X_test], axis=0),
                                 pd.concat([y_train,y_test], axis=0),
                                 accepted_loans.PnL)*100,2))

10.71
