In [1]:
from io import BytesIO
import boto3
import datetime
import multiprocessing as mp
import pickle
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
payments = pd.read_pickle('data/cleaned_payments_data_indexed_sorted.pkl.bz2')

In [3]:
payments.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,mths_since_issue
RECEIVED_D,LOAN_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-07-01,72176,7.189307,219.55983,1
2007-07-01,73582,7.289357,219.637436,1
2007-07-01,74505,7.25625,219.611313,1
2007-07-01,77792,3.975833,121.962997,1
2007-07-01,81085,9.03231,268.539795,1


In [4]:
loans_available = pd.read_pickle('data/test_predictions_xgb.pkl.bz2')
loans_available.head()

Unnamed: 0_level_0,id,loan_amnt,predicted_roi
issue_d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-12-01,68407277,3600.0,6.819523
2015-12-01,68495092,8650.0,-12.377034
2015-12-01,68466961,28000.0,5.547388
2015-12-01,68466916,25000.0,4.509906
2015-12-01,68354783,9600.0,4.441494


# Loan Class

In [27]:
from dateutil.relativedelta import relativedelta

class Loan:
    def __init__(self, loan_id, borrowed_amount, investment_amount):
            self.id = int(loan_id)
            self.size = borrowed_amount
            self.initial_investment = min(investment_amount, borrowed_amount)
            self.fractional_investment = self.initial_investment / self.size
            self.principal_balance = min(investment_amount, borrowed_amount)
            self.status = 'Current'
            self.months_since_last_payment = 0
    
    def default(self):
        self.status = 'Default'
        self.principal_balance = 0

    def update_investment_principal_balance(self, overall_principal):
        self.principal_balance = overall_principal * self.fractional_investment
        
    def add_one_month_since_payment(self):
        self.months_since_last_payment += 1

# Portfolio Class

In [60]:
class Portfolio:
    def __init__(self, starting_balance, investment_per_loan, start_date, loans_df, payments_df, min_roi=5.0):
        self.active_loans = []
        self.defaulted_loans = []
        self.cash_balance = starting_balance
        self.invested_principal_balance = 0
        self.investment_per_loan = investment_per_loan
        self.date = start_date
        self.min_roi = min_roi
        self.all_loans_available = loans_df
        self.all_payments_data = payments_df

    def update_invested_principal_balance(self):
        self.invested_principal_balance = sum([loan.principal_balance for loan in self.active_loans])

    def increment_date_by_one_month(self):
        self.date += relativedelta(months=1)

    def purchase_loans(self, loans):
        for loan in loans:
            self.active_loans.append(loan)
            self.cash_balance -= loan.initial_investment

    def convert_df_rows_to_loans(self, df):
        loans = []
        rows = df.to_dict(orient='records')
        for row in rows:
            loans.append(Loan(row['id'], row['loan_amnt'], self.investment_per_loan))
        return loans

    def get_loans_available_for_current_date(self, loans_df):
        '''
        date parameter needs to be of type datetime.date
        '''
        return self.all_loans_available.loc[self.date]
    
    def get_loans_over_required_roi_threshold(self, df):
        return df[df['predicted_roi'] >= self.min_roi]

    def get_top_loans_to_buy(self, loans):
        # We want to take as many loans as we can from the top predicted roi.
        n = self.cash_balance // self.investment_per_loan
        sorted_loans = loans.sort_values(by='predicted_roi', ascending=False)
        return sorted_loans.head(n)
    
    def buy_loans_for_current_month(self):
        loans = self.get_loans_available_for_current_date(self.all_loans_available)
        loans = self.get_loans_over_required_roi_threshold(loans)
        loans_to_buy = self.get_top_loans_to_buy(loans)
        loan_objects = self.convert_df_rows_to_loans(loans_to_buy)
        self.purchase_loans(loan_objects)
        
    def get_payments_for_current_month(self):
        active_loan_ids = [loan.id for loan in myPortfolio.active_loans]
        latest_payments = self.all_payments_data.loc[myPortfolio.date].loc[active_loan_ids].dropna()
        return latest_payments
    
    def apply_payments(self, payments_for_month):
        for loan in self.active_loans:
            if loan.id in payments_for_month.index:
                total_payment = payments_for_month.loc[loan.id].RECEIVED_AMT_INVESTORS
                self.update_portfolio_cash_balance(loan.fractional_investment * total_payment)
                end_principal_total = payments_for_month.loc[loan.id].PBAL_END_PERIOD_INVESTORS
                loan.update_investment_principal_balance(end_principal_total)
                loan.months_since_last_payment = 0
                
    def get_and_apply_payments_for_current_month(self):
        payments = self.get_payments_for_current_month()
        self.apply_payments(payments)
                
    def update_portfolio_cash_balance(self, payment):
        self.cash_balance += payment

In [61]:
myPortfolio = Portfolio(10000, 50, datetime.date(2015,10,1), loans_available, payments)

In [62]:
myPortfolio.active_loans

[]

In [63]:
myPortfolio.buy_loans_for_current_month()

In [64]:
len(myPortfolio.active_loans)

200

In [65]:
myPortfolio.cash_balance

0

In [66]:
myPortfolio.get_and_apply_payments_for_current_month()
myPortfolio.cash_balance

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


5.555555555555555