In [55]:
from io import BytesIO
import boto3
import datetime
import multiprocessing as mp
import pickle
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [56]:
payments = pd.read_pickle('data/cleaned_payments_data_indexed_sorted.pkl.bz2')

In [57]:
payments.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,mths_since_issue
RECEIVED_D,LOAN_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-07-01,72176,7.189307,219.55983,1
2007-07-01,73582,7.289357,219.637436,1
2007-07-01,74505,7.25625,219.611313,1
2007-07-01,77792,3.975833,121.962997,1
2007-07-01,81085,9.03231,268.539795,1


In [58]:
loans_available = pd.read_pickle('data/test_predictions_xgb.pkl.bz2')
loans_available.head()

Unnamed: 0_level_0,id,loan_amnt,predicted_roi
issue_d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-12-01,68407277,3600.0,6.819523
2015-12-01,68495092,8650.0,-12.377034
2015-12-01,68466961,28000.0,5.547388
2015-12-01,68466916,25000.0,4.509906
2015-12-01,68354783,9600.0,4.441494


# Loan Class

In [59]:
from dateutil.relativedelta import relativedelta

class Loan:
    def __init__(self, loan_id, borrowed_amount, investment_amount):
            self.id = int(loan_id)
            self.size = borrowed_amount
            self.initial_investment = min(investment_amount, borrowed_amount)
            self.fractional_investment = self.initial_investment / self.size
            self.principal_balance = min(investment_amount, borrowed_amount)
            self.status = 'Current'
            self.months_since_last_payment = 0
    
    def default(self):
        self.status = 'Default'
        self.principal_balance = 0

    def update_investment_principal_balance(self, overall_principal):
        self.principal_balance = overall_principal * self.fractional_investment
        
    def add_one_month_since_payment(self):
        self.months_since_last_payment += 1

# Portfolio Class

In [60]:
class Portfolio:
    def __init__(self, starting_balance, investment_per_loan, start_date, loans_df, payments_df, min_roi=5.0):
        self.active_loans = []
        self.defaulted_loans = []
        self.cash_balance = starting_balance
        self.total_balance = starting_balance
        self.invested_principal_balance = 0
        self.investment_per_loan = investment_per_loan
        self.date = start_date
        self.min_roi = min_roi
        self.all_loans_available = loans_df
        self.all_payments_data = payments_df

    def update_invested_principal_balance(self):
        self.invested_principal_balance = sum([loan.principal_balance for loan in self.active_loans])

    def increment_date_by_one_month(self):
        self.date += relativedelta(months=1)

    def purchase_loans(self, loans):
        for loan in loans:
            self.active_loans.append(loan)
            self.cash_balance -= loan.initial_investment

    def convert_df_rows_to_loans(self, df):
        loans = []
        rows = df.to_dict(orient='records')
        for row in rows:
            loans.append(Loan(row['id'], row['loan_amnt'], self.investment_per_loan))
        return loans

    def get_loans_available_for_current_date(self, loans_df):
        '''
        date parameter needs to be of type datetime.date
        '''
        return self.all_loans_available.loc[self.date]
    
    def get_loans_over_required_roi_threshold(self, df):
        return df[df['predicted_roi'] >= self.min_roi]

    def get_top_loans_to_buy(self, loans):
        # We want to take as many loans as we can from the top predicted roi.
        n = int(self.cash_balance // self.investment_per_loan)
        sorted_loans = loans.sort_values(by='predicted_roi', ascending=False)
        return sorted_loans.head(n)
    
    def buy_loans_for_current_month(self):
        loans = self.get_loans_available_for_current_date(self.all_loans_available)
        loans = self.get_loans_over_required_roi_threshold(loans)
        loans_to_buy = self.get_top_loans_to_buy(loans)
        loan_objects = self.convert_df_rows_to_loans(loans_to_buy)
        self.purchase_loans(loan_objects)
        
    def get_payments_for_current_month(self):
        active_loan_ids = [loan.id for loan in self.active_loans]
        latest_payments = self.all_payments_data.loc[self.date].loc[active_loan_ids].dropna()
        return latest_payments
    
    def apply_payments(self, payments_for_month):
        for loan in self.active_loans:
            if loan.id in payments_for_month.index:
                total_payment = payments_for_month.loc[loan.id].RECEIVED_AMT_INVESTORS
                self.update_portfolio_cash_balance(loan.fractional_investment * total_payment)
                end_principal_total = payments_for_month.loc[loan.id].PBAL_END_PERIOD_INVESTORS
                loan.update_investment_principal_balance(end_principal_total)
                loan.months_since_last_payment = 0
                
    def get_and_apply_payments_for_current_month(self):
        payments = self.get_payments_for_current_month()
        self.apply_payments(payments)
                
    def update_portfolio_cash_balance(self, payment):
        self.cash_balance += payment
        
    def add_one_month_since_loan_payment(self):
        for loan in self.active_loans:
            loan.add_one_month_since_payment()
            
    def clear_defaulted_loans(self):
        for loan in self.active_loans:
            if loan.months_since_last_payment > 4:
                self.defaulted_loans.append(loan)
                loan.default()
        self.active_loans = [loan for loan in self.active_loans if loan.status != 'Default']
        
    def update_invested_principal_balance(self):
        self.invested_principal_balance = sum([loan.principal_balance for loan in self.active_loans])
    
    def update_portfolio_total_balance(self):
        self.total_balance = self.invested_principal_balance + self.cash_balance
        
    def simulate_month(self):
        self.buy_loans_for_current_month()
        self.get_and_apply_payments_for_current_month()
        self.add_one_month_since_loan_payment()
        self.clear_defaulted_loans()
        self.update_invested_principal_balance()
        self.update_portfolio_total_balance()
        self.increment_date_by_one_month()

In [61]:
myPortfolio = Portfolio(20000, 25, datetime.date(2015,10,1), loans_available, payments)

In [62]:
myPortfolio.simulate_month()
len(myPortfolio.active_loans)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


800

In [63]:
myPortfolio.buy_loans_for_current_month()
len(myPortfolio.active_loans)

802

In [64]:
payments = myPortfolio.get_payments_for_current_month()
payments.head()

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


Unnamed: 0_level_0,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,mths_since_issue
LOAN_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
61422649,51.459999,1370.928345,1.0
62358032,74.32,1958.996704,1.0
61539420,68.739998,1764.244995,1.0
61543430,40.029999,1172.26001,1.0
63424162,51.75,1466.5625,1.0


In [65]:
def apply_payments(self, payments_for_month):
    for loan in self.active_loans:
        if loan.id in payments_for_month.index:
            total_payment = payments_for_month.loc[loan.id].RECEIVED_AMT_INVESTORS
            self.update_portfolio_cash_balance(loan.fractional_investment * total_payment)
            end_principal_total = payments_for_month.loc[loan.id].PBAL_END_PERIOD_INVESTORS
            loan.update_investment_principal_balance(end_principal_total)
            loan.months_since_last_payment = 0

In [66]:
myPortfolio.cash_balance

10.990114785451738

In [78]:
payments.loc[61523442].PBAL_END_PERIOD_INVESTORS.min()


33002.598

In [67]:
output = []
for loan in myPortfolio.active_loans:
    if loan.id in payments.index:
        total_payments = payments.loc[loan.id].RECEIVED_AMT_INVESTORS
        for payment in total_payments:
            cash_received = loan.fractional_investment * total_payment
            myPortfolio.update_portfolio_cash_balance(cash_received)

61422649 51.459999084472656
62358032 74.31999969482422
61539420 68.73999786376953
61543430 40.029998779296875
63424162 51.75
61502859 34.18000030517578
61319959 62.939998626708984
60940147 96.62000274658203
60742685 98.9800033569336
60665846 66.86000061035156
60497507 125.08000183105469
60657517 118.7699966430664
63424208 49.11000061035156
62236029 35.650001525878906
62165376 54.68000030517578
61331600 106.29000091552734
62326817 120.08000183105469
62012581 60.38999938964844
62227058 64.68000030517578
62105271 29.760000228881836
61218145 33.0
61514128 35.939998626708984
61464591 124.7699966430664
61018667 94.02999877929688
62207125 122.37999725341797
63395858 126.44000244140625
60753415 65.47000122070312
61362250 58.93000030517578
61372829 103.94000244140625
60794097 78.56999969482422
61508869 51.75
63395828 100.05999755859375
62157398 121.94000244140625
60843991 79.72000122070312
61551011 49.33000183105469
61424627 102.5199966430664
61512439 34.970001220703125
61363046 151.80000305175

61543264 86.41000366210938
61374357 192.74000549316406
61422337 124.7699966430664
62127661 1129.18994140625
61441610 65.98999786376953
61479957 144.3800048828125
61412494 200.22999572753906
60617156 46.93000030517578
62694368 119.4000015258789
61368412 120.26000213623047
63335308 432.0899963378906
62884745 982.02001953125
61442333 855.6699829101562
61399744 359.75
60833453 LOAN_ID
60833453    229.139999
60833453    197.839996
Name: RECEIVED_AMT_INVESTORS, dtype: float32


ValueError: cannot reindex from a duplicate axis

In [None]:
myPortfolio.cash_balance

In [None]:
payments.index

In [8]:
while myPortfolio.date < datetime.date(2016,10,1):
    myPortfolio.simulate_month()
    print(myPortfolio.total_balance)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


20000.436376304624


ValueError: cannot reindex from a duplicate axis