# Portfolio Simulator

When training machine learning models it is essential to perform cross validation to ensure we're not overfitting. To that end, I have constructed a portfolio simulator to test our models on loans they have not seen before.

The portfolio simulator works as follows: {UPDATE}

The necessary classes and functions are contained in the `src/portfolio.py` file.

In [1]:
import pandas as pd
import numpy as np
import datetime
from src.portfolio import *

In [2]:
payments = pd.read_pickle('data/df_payments_cleaned_all.pkl.bz2', compression='bz2')
payments.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,IssuedDate,mths_since_issue
RECEIVED_D,LOAN_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-08-01,169200115,840.02002,34773.839844,2020-06-01,2
2020-08-01,169216811,217.649994,1819.47998,2020-06-01,2
2020-08-01,169224081,652.97998,19557.210938,2020-06-01,2
2020-08-01,169225459,1394.140015,1560.839966,2020-06-01,2
2020-08-01,169232425,8095.52002,0.0,2020-06-01,2


Next let's read in the predictions that are trained models have already made for the loans in the testing set.

In [231]:
predictions_xgb = pd.read_pickle('data/model_xgb_predictions.pkl.bz2')
predictions_random = pd.read_pickle('data/model_random_pick.pkl.bz2')
predictions_high_interest = pd.read_pickle('data/model_high_interest_rate.pkl.bz2')
predictions_low_interest = pd.read_pickle('data/model_low_interest_rate.pkl.bz2')

In [58]:
payments2 = payments.reset_index(1)

In [86]:
payments2.loc['2020-07-01']

Unnamed: 0_level_0,LOAN_ID,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,IssuedDate,mths_since_issue
RECEIVED_D,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-01,288279,413.880005,3211.699951,2016-03-01,52
2020-07-01,364688,470.149994,3987.159912,2016-04-01,51
2020-07-01,401654,289.209991,3194.330078,2016-07-01,48
2020-07-01,556854,644.669983,9586.780273,2016-11-01,44
2020-07-01,568842,342.850006,3254.929932,2016-05-01,50
...,...,...,...,...,...
2020-07-01,169233515,127.239998,3902.830078,2020-06-01,1
2020-07-01,169233741,986.090027,30130.419922,2020-06-01,1
2020-07-01,169234187,480.920013,14638.580078,2020-06-01,1
2020-07-01,169235363,362.730011,9426.540039,2020-06-01,1


In [227]:
min_roi = 1
myPortfolio = Portfolio(20000, 25, datetime.date(2017,8,1), predictions_xgb, payments, min_roi=roi)
#while myPortfolio.date < datetime.date(2020,8,1):
#    myPortfolio.simulate_month()
#    print(myPortfolio.total_balance)
#rois.append(roi)
#balances.append(myPortfolio.total_balance)

In [228]:
myPortfolio.get_loans_available_for_current_date(myPortfolio.all_loans_available)

Unnamed: 0_level_0,id,loan_amnt,predicted_roi
issue_d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-08-01,114844590,15400,-6.257183
2017-08-01,113880484,5500,2.972790
2017-08-01,115705737,5000,3.279590
2017-08-01,115412547,6000,1.677537
2017-08-01,115402601,3000,5.930532
...,...,...,...
2017-08-01,116804539,3000,5.551862
2017-08-01,115120907,4800,0.662874
2017-08-01,116670505,10000,5.156517
2017-08-01,116736999,9600,2.011774


In [130]:
balances

[]

In [184]:
from dateutil.relativedelta import relativedelta

class Loan:
    def __init__(self, loan_id, borrowed_amount, investment_amount):
            self.id = int(loan_id)
            self.size = borrowed_amount
            self.initial_investment = min(investment_amount, borrowed_amount)
            self.fractional_investment = self.initial_investment / self.size
            self.principal_balance = min(investment_amount, borrowed_amount)
            self.status = 'Current'
            self.months_since_last_payment = 0
    
    def default(self):
        self.status = 'Default'
        self.principal_balance = 0

    def update_investment_principal_balance(self, overall_principal):
        self.principal_balance = overall_principal * self.fractional_investment
        
    def add_one_month_since_payment(self):
        self.months_since_last_payment += 1

class Portfolio:
    def __init__(self, starting_balance, investment_per_loan, start_date, loans_df, payments_df, min_roi=5.0):
        self.active_loans = []
        self.defaulted_loans = []
        self.cash_balance = starting_balance
        self.total_balance = starting_balance
        self.invested_principal_balance = 0
        self.investment_per_loan = investment_per_loan
        self.date = start_date
        self.min_roi = min_roi
        self.all_loans_available = loans_df
        self.all_payments_data = payments_df

    def update_invested_principal_balance(self):
        self.invested_principal_balance = sum([loan.principal_balance for loan in self.active_loans])

    def increment_date_by_one_month(self):
        self.date += relativedelta(months=1)

    def purchase_loans(self, loans):
        for loan in loans:
            self.active_loans.append(loan)
            self.cash_balance -= loan.initial_investment

    def convert_df_rows_to_loans(self, df):
        loans = []
        rows = df.to_dict(orient='records')
        for row in rows:
            loans.append(Loan(row['id'], row['loan_amnt'], self.investment_per_loan))
        return loans

    def get_loans_available_for_current_date(self, loans_df):
        '''
        date parameter needs to be of type datetime.date
        '''
        return self.all_loans_available.loc[str(self.date)]
    
    def get_loans_over_required_roi_threshold(self, df):
        return df[df['predicted_roi'] >= self.min_roi]

    def get_top_loans_to_buy(self, loans):
        # We want to take as many loans as we can from the top predicted roi.
        n = int(self.cash_balance // self.investment_per_loan)
        sorted_loans = loans.sort_values(by='predicted_roi', ascending=False)
        return sorted_loans.head(n)
    
    def buy_loans_for_current_month(self):
        loans = self.get_loans_available_for_current_date(self.all_loans_available)
        loans = self.get_loans_over_required_roi_threshold(loans)
        loans_to_buy = self.get_top_loans_to_buy(loans)
        loan_objects = self.convert_df_rows_to_loans(loans_to_buy)
        self.purchase_loans(loan_objects)
        
    def get_payments_for_current_month(self):
        active_loan_ids = [loan.id for loan in self.active_loans]
        #latest_payments = self.all_payments_data.loc[self.date].loc[active_loan_ids,:].dropna()
        apfd = self.all_payments_data.loc[str(self.date)]
        # The line below is for when we have a multi index
        #latest_payments = apfd.loc[apfd.index.get_level_values(1).isin(active_loan_ids),:]
        # The line below is for when we don't have a multi index.
        latest_payments = apfd.loc[apfd['LOAN_ID'].isin(active_loan_ids), :]
        return latest_payments
    
    def apply_payments(self, payments_for_month):
        for loan in self.active_loans:
            # The line below is when we have a multi index.
            #if loan.id in payments_for_month.index.get_level_values(1):
            # The line below is when we don't have a multi index.
            if loan.id in payments_for_month.index:
                total_payments = 0
                end_principal_total = 0
                try:
                    # In case we have more than 1 payment per month
                    #total_payment = sum(payments_for_month.loc[loan.id].RECEIVED_AMT_INVESTORS)
                    total_payments = payments_for_month.loc[loan.id, 'RECEIVED_AMT_INVESTORS'].sum()
                    end_principal_total = min(payments_for_month.loc[loan.id].PBAL_END_PERIOD_INVESTORS)
                except:
                    total_payments = payments_for_month.loc[loan.id].RECEIVED_AMT_INVESTORS
                    end_principal_total = payments_for_month.loc[loan.id].PBAL_END_PERIOD_INVESTORS
                    
                self.update_portfolio_cash_balance(loan.fractional_investment * total_payments)
                loan.update_investment_principal_balance(end_principal_total)
                loan.months_since_last_payment = 0
                
    def get_and_apply_payments_for_current_month(self):
        payments = self.get_payments_for_current_month()
        self.apply_payments(payments)
                
    def update_portfolio_cash_balance(self, payment):
        self.cash_balance += payment
        
    def add_one_month_since_loan_payment(self):
        for loan in self.active_loans:
            loan.add_one_month_since_payment()
            
    def clear_defaulted_loans(self):
        for loan in self.active_loans:
            if loan.months_since_last_payment > 4:
                self.defaulted_loans.append(loan)
                loan.default()
        self.active_loans = [loan for loan in self.active_loans if loan.status != 'Default']
    
    def update_portfolio_total_balance(self):
        self.total_balance = self.invested_principal_balance + self.cash_balance
        
    def simulate_month(self):
        self.buy_loans_for_current_month()
        self.get_and_apply_payments_for_current_month()
        self.add_one_month_since_loan_payment()
        self.clear_defaulted_loans()
        self.update_invested_principal_balance()
        self.update_portfolio_total_balance()
        self.increment_date_by_one_month()

In [202]:
myPortfolio = Portfolio(20000, 25, datetime.date(2017,8,1), predictions_xgb, payments2, min_roi=roi)

In [203]:
myPortfolio.get_loans_available_for_current_date(myPortfolio.all_loans_available)

Unnamed: 0_level_0,id,loan_amnt,predicted_roi
issue_d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-08-01,114844590,15400,-6.257183
2017-08-01,113880484,5500,2.972790
2017-08-01,115705737,5000,3.279590
2017-08-01,115412547,6000,1.677537
2017-08-01,115402601,3000,5.930532
...,...,...,...
2017-08-01,116804539,3000,5.551862
2017-08-01,115120907,4800,0.662874
2017-08-01,116670505,10000,5.156517
2017-08-01,116736999,9600,2.011774


In [204]:
myPortfolio.buy_loans_for_current_month()

In [205]:
len(myPortfolio.active_loans)

800

In [206]:
800*25

20000

In [207]:
myPortfolio.get_and_apply_payments_for_current_month()
myPortfolio.add_one_month_since_loan_payment()
myPortfolio.clear_defaulted_loans()
myPortfolio.update_invested_principal_balance()
myPortfolio.update_portfolio_total_balance()
myPortfolio.increment_date_by_one_month()

In [191]:
myPortfolio.date

datetime.date(2017, 9, 1)

In [192]:
myPortfolio.cash_balance

0

In [208]:
myPortfolio.get_and_apply_payments_for_current_month()

In [209]:
myPortfolio.cash_balance

0

In [195]:
def apply_payments(self, payments_for_month):
        for loan in self.active_loans:
            if loan.id in payments_for_month.index:
                total_payments = 0
                end_principal_total = 0
                try:
                    # In case we have more than 1 payment per month
                    #total_payment = sum(payments_for_month.loc[loan.id].RECEIVED_AMT_INVESTORS)
                    total_payments = payments_for_month.loc[loan.id, 'RECEIVED_AMT_INVESTORS'].sum()
                    end_principal_total = min(payments_for_month.loc[loan.id].PBAL_END_PERIOD_INVESTORS)
                except:
                    total_payments = payments_for_month.loc[loan.id].RECEIVED_AMT_INVESTORS
                    end_principal_total = payments_for_month.loc[loan.id].PBAL_END_PERIOD_INVESTORS
                    
                self.update_portfolio_cash_balance(loan.fractional_investment * total_payments)
                loan.update_investment_principal_balance(end_principal_total)
                loan.months_since_last_payment = 0

In [210]:
payments = myPortfolio.get_payments_for_current_month()

In [211]:
payments.head()

Unnamed: 0_level_0,LOAN_ID,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,IssuedDate,mths_since_issue
RECEIVED_D,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-09-01,112748566,99.559998,2524.601074,2017-08-01,1
2017-09-01,112780028,111.279999,3509.929932,2017-08-01,1
2017-09-01,112781445,124.150002,3900.350098,2017-08-01,1
2017-09-01,112789580,163.490005,4881.968262,2017-08-01,1
2017-09-01,112794476,342.170013,9775.163086,2017-08-01,1


In [200]:
apfd = myPortfolio.all_payments_data.loc[str(myPortfolio.date)]
apfd

Unnamed: 0_level_0,LOAN_ID,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,IssuedDate,mths_since_issue
RECEIVED_D,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-09-01,55716,139.149994,2458.739502,2016-08-01,13
2017-09-01,56121,245.160004,3747.781494,2016-01-01,20
2017-09-01,56705,354.890015,4979.461914,2015-11-01,22
2017-09-01,65419,500.000000,4117.140137,2015-06-01,27
2017-09-01,66128,505.600006,13724.662109,2015-03-01,30
...,...,...,...,...,...
2017-09-01,119397119,664.000000,4336.000000,2017-09-01,1
2017-09-01,119402360,550.000000,6225.000000,2017-09-01,1
2017-09-01,119408927,79.110001,11920.889648,2017-09-01,1
2017-09-01,119689954,514.000000,14486.000000,2017-09-01,1


In [201]:
latest_payments = apfd.loc[apfd.index.isin(active_loan_ids), :]
latest_payments

Unnamed: 0_level_0,LOAN_ID,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,IssuedDate,mths_since_issue
RECEIVED_D,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [180]:
active_loan_ids = [loan.id for loan in myPortfolio.active_loans]

In [226]:
latest_payments = apfd.loc[apfd['LOAN_ID'].isin(active_loan_ids), :]
payments_for_month = latest_payments
payments_for_month.set_index('LOAN_ID')

Unnamed: 0_level_0,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,IssuedDate,mths_since_issue
LOAN_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
112748566,99.559998,2524.601074,2017-08-01,1
112780028,111.279999,3509.929932,2017-08-01,1
112781445,124.150002,3900.350098,2017-08-01,1
112789580,163.490005,4881.968262,2017-08-01,1
112794476,342.170013,9775.163086,2017-08-01,1
...,...,...,...,...
117555816,134.050003,3908.016602,2017-08-01,1
117558583,141.020004,3530.620117,2017-08-01,1
117561241,61.950001,1950.066650,2017-08-01,1
117840699,100.000000,2925.000000,2017-08-01,1
