# Portfolio Analysis for ROI of the 60 month portfolio

## Set up data set

In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.append('../lending_club') # path the the directory
import config
from model_prep import divide_by_term, split_data

In [2]:
accepted_loans = pd.read_parquet(config.DATAPATH / 'approved.parquet', engine='fastparquet')
accepted_60 = divide_by_term(accepted_loans,term_length=60)
accepted_60 = accepted_60.loc[:, config.SELECTED_FEATURES + [config.TARGET_COL]]

In [3]:
X_train, X_test, y_train, y_test = split_data(accepted_60.drop(['emp_title','addr_state'],axis=1), random_state=42)

In [4]:
dummify_vars = config.VARS_TO_DUMMIFY.copy()

## CatBoost Modeling 

In [5]:
from catboost import CatBoostClassifier

In [6]:
best_cbc = CatBoostClassifier().load_model('CatBoost60.model')

## Confusion Matrix

In [10]:
from sklearn.metrics import confusion_matrix,classification_report
confusion_matrix(y_test,best_cbc.predict(X_test))

array([[1035, 2376],
       [2562, 8362]], dtype=int64)

In [11]:
confusion_matrix(y_train,best_cbc.predict(X_train))

array([[ 6268,  1579],
       [ 2702, 22899]], dtype=int64)

In [12]:
print(classification_report(y_test,best_cbc.predict(X_test)))

              precision    recall  f1-score   support

           0       0.29      0.30      0.30      3411
           1       0.78      0.77      0.77     10924

    accuracy                           0.66     14335
   macro avg       0.53      0.53      0.53     14335
weighted avg       0.66      0.66      0.66     14335



## ROI on the Train Portfolio

In [16]:
from model_analysis import calculate_PnL_return
print(round(calculate_PnL_return(best_cbc,X_train,y_train,accepted_loans.PnL)*100,2))

28.99


## ROI on the Test Portfolio

In [17]:
print(round(calculate_PnL_return(best_cbc,X_test,y_test,accepted_loans.PnL)*100,2))

17.85


## ROI on the Entire Portfolio

In [18]:
print(round(calculate_PnL_return(best_cbc,
                                 pd.concat([X_train,X_test], axis=0),
                                 pd.concat([y_train,y_test], axis=0),
                                 accepted_loans.PnL)*100,2))

25.6
