In [2]:
import multiprocessing as mp
import numpy as np
from numpy_financial import irr
import pickle
import boto3
import pandas as pd
from io import BytesIO

In [9]:
def read_pickle_from_s3(filename, bucket='loan-analysis-data'):
    s3 = boto3.client('s3')
    obj = s3.get_object(Bucket=bucket, Key=filename)
    data = obj['Body'].read()
    f = BytesIO(data)
    file = pickle.load(f)
    return file

In [21]:
def read_dataframe_from_s3(filename, bucket='loan-analysis-data'):
    s3 = boto3.client('s3')
    obj = s3.get_object(Bucket=bucket, Key=filename)
    data = obj['Body'].read()
    f = BytesIO(data)
    df = pd.read_pickle(f, compression='bz2') 
    return df

In [18]:
loan_amounts = read_pickle_from_s3('loan_amounts.pickle')

In [19]:
training_loan_ids = read_pickle_from_s3('training_loan_ids.pickle')

In [22]:
df_payments = read_dataframe_from_s3('df_payments_training_loans.pkl.bz2')
df_payments.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,IssuedDate,RECEIVED_AMT_INVESTORS,PBAL_END_PERIOD_INVESTORS,mths_since_issue
RECEIVED_D,LOAN_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-02-01,395561,2010-01-01,849.835388,22141.585938,1
2010-02-01,420666,2010-01-01,291.996674,8524.081055,1
2010-02-01,462538,2010-01-01,662.18927,19144.474609,1
2010-02-01,465492,2010-01-01,237.115005,6888.518555,1
2010-02-01,466800,2010-01-01,417.496155,12306.734375,1


In [23]:
def get_one_loan_payment_data(df_payments, loan_id):
    '''
    Function to extract payments made by a single loan ID. 

    Args:
        payments_training_loans (dataframe): The dataframe containing all loan payments data for our training loans.
            Only training loans are relevant since ROI needs to be calculated as our label to use in model training.
        loan_id (int): The loan ID that we want to get payments for.

    Returns:
        DataFrame: Returns a dataframe containing payment history for a single loan.

    Todo: Add in description of the format the payments_training_loans dataframe should be in.
    '''
    try:
        # Loan ID must be passed in as a list to ensure we get a dataframe back and not a series.
        # Otherwise a series is returned when we have a loan where only 1 payment has been made.
        return df_payments.loc[pd.IndexSlice[:, loan_id], ['RECEIVED_AMT_INVESTORS', 'mths_since_issue']]
    except:
        # Need to return an empty dataframe if no payments were found for the given loan_id.
        return pd.DataFrame()

In [26]:
def convert_monthly_return_to_annual(irr):
    return (1 + irr)**12 - 1    

In [27]:
def get_roi_for_loan_id(loan_id):
    starting_loan_balance = loan_amounts[loan_id]
    loan_payments = get_one_loan_payment_data(df_payments, loan_id)
    if len(loan_payments) == 0:
        return -100
    max_months = loan_payments['mths_since_issue'].max()
    payments = np.zeros(max_months+1)
    payments[0] = -starting_loan_balance
    for payment, month in zip(loan_payments['RECEIVED_AMT_INVESTORS'], loan_payments['mths_since_issue']):
        payments[month] += payment
    irr_monthly = irr(payments)
    irr_annual = convert_monthly_return_to_annual(irr_monthly)
    return 100 * irr_annual

In [29]:
get_roi_for_loan_id(466800)

8.209543086516513

In [6]:
num_cpus = mp.cpu_count()
pool = mp.Pool(processes=num_cpus)

In [None]:
results = pool.map(get_roi_for_loan_id, list(training_loan_ids)[:1000])

In [None]:
loan_rois = dict(zip(list(training_loan_ids)[:1000], results))

In [8]:
bucket = 'loan-analysis-data'
key = 'loan_rois.pickle'
pickle_byte_obj = pickle.dumps(loan_rois) 
s3_resource = boto3.resource('s3')
s3_resource.Object(bucket, key).put(Body=pickle_byte_obj)

{'ResponseMetadata': {'RequestId': 'DC9FFA4B5188EB0C',
  'HostId': 'rtXDKJp6pVw/0susCMORaQqXeStJRKSHjMn/h6zY38J47kWKaidoJinWnymTsNEV71AdlwwWjxk=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'rtXDKJp6pVw/0susCMORaQqXeStJRKSHjMn/h6zY38J47kWKaidoJinWnymTsNEV71AdlwwWjxk=',
   'x-amz-request-id': 'DC9FFA4B5188EB0C',
   'date': 'Thu, 13 Aug 2020 16:26:37 GMT',
   'etag': '"05a161cece359adefaedea21340765eb"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"05a161cece359adefaedea21340765eb"'}

In [12]:
ec2 = boto3.resource('ec2')
loan_instance = ['i-05c63d902d7d04e7b']
ec2.instances.filter(InstanceIds=loan_instance).stop()

[{'StoppingInstances': [{'CurrentState': {'Code': 80, 'Name': 'stopped'},
    'InstanceId': 'i-05c63d902d7d04e7b',
    'PreviousState': {'Code': 80, 'Name': 'stopped'}}],
  'ResponseMetadata': {'RequestId': '7bf106fc-9e43-435b-a9d4-54a4416c73a0',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amzn-requestid': '7bf106fc-9e43-435b-a9d4-54a4416c73a0',
    'content-type': 'text/xml;charset=UTF-8',
    'content-length': '578',
    'date': 'Thu, 13 Aug 2020 16:28:20 GMT',
    'server': 'AmazonEC2'},
   'RetryAttempts': 0}}]