In [1]:
__author__ = 'HK Dambanemuya'
__version__ = 'Python Interactive'

import numpy as np
import pandas as pd
import pickle
from datetime import datetime
from tqdm import tqdm, tqdm_notebook
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Take first n bids 
def filter_bids_by_n_sample(n):
    return pd.concat([listing_bids_map[key].head(n) for key in list(listing_bids_map.keys())], axis=0)


In [2]:
bids = pd.read_csv("../../Data/lending/bid_notick.txt", sep="|")
# Filter winning bids
bids = bids[bids.Status.isin(["Winning", "Partially Participating"])]
# Remove current listings
bids = bids[bids.ListingStatus.isin(["Expired", "Completed"])]
# Filter relevant columns
bids = bids[["ListingKey", "Bid_Key", "MemberKey", "CreationDate", "Amount", "ListingStatus"]]
# Convert creation date to datetime format
bids["CreationDate"] = pd.to_datetime(bids.CreationDate)
# Variable /YYYY/MM to allow monthly time unit analysis
bids["time_trunc"] = bids.CreationDate.map(lambda x: x.strftime('%Y%m'))
# Rename status colunmn for consistency with other projects
bids = bids.rename(index=str, columns={"ListingStatus": "Status"})
# Exclude these bids in preliminary analysis to correspond to final listing list
with open("../../Data/lending/final_listings", 'rb') as f:
    filtered = pickle.load(f)
bids = bids[bids['ListingKey'].isin(filtered)] 

In [3]:
listings = pd.read_csv('../../Data/lending/listing.txt', sep="|")
# Select relevant columns
columns = ["Lst_Key", "AmountRequested", "AmountFunded", "StartDate", "EndDate", "Status", "BorrowerRate", "ProsperScore", "Duration", "CreditGrade", "DebtToIncomeRatio", "IsBorrowerHomeowner"]
listings = listings[columns]
# Rename columns
listings = listings.rename(index=str, columns={"Lst_Key": "ProjectID",
                                               "AmountFunded": "AmountRaised"})
# Pre-process credit information
creditmap = {'AA':8, 'A':7, 'B':6, 'C':5, 'D':4, 'E':3, 'HR':2, 'NC':1}
listings['CreditGrade'] = listings.CreditGrade.fillna(9)
listings = listings.applymap(lambda s: creditmap.get(s) if s in creditmap else s)
# Drop records with no credit information
listings = listings[listings['CreditGrade']<9] 
# Remove current listings
listings = listings[listings.Status.isin(["Expired", "Completed"])]
# Remove listings that raised $0
listings = listings[listings.AmountRaised > 0]
# Show sample listings

In [4]:
# Create listing Key Variable
listing_keys = list(set(bids['ListingKey']))
assert len(listing_keys) == len(listings)
len(listing_keys)

143770

In [5]:
# Map ProjectID to AmountRequested 
amount_requested_map = dict(zip(listings.ProjectID, listings.AmountRequested))

# Map Listing Keys to Bids Dataframes(15 hours)
listing_bids_map = {key:bids.query(f"ListingKey == '{key}'").sort_values(by='CreationDate') for key in listing_keys}    
    


In [6]:
filtered_map ={}

for i in tqdm_notebook(range(1,11)):
    var_df = filter_bids_by_n_sample(i)
    filtered_map[f'bids_{i}'] = var_df
    var_df.to_csv(f"../../Data/lending/filtered/bids_{i}.csv", index=False)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


