In [1]:
import pandas as pd
import numpy as np
from ambry import library
l = library()

p = l.get('ffiec.gov-hmda-2010e-lar-2010-ca').partition
str(p.identity.fqname)


'ffiec.gov-hmda-2010e-lar-2010-ca-0.0.7~p02X00X007'

In [2]:
ts = l.get('ffiec.gov-hmda-2010e-ts').partition.pandas

In [3]:
def get_ts(year):
    ts_name = ts[ts.activity_year == year][['respondent_id','respondent_name']].copy()
    ts_name.respondent_id = ts_name.respondent_id.astype(str)
    ts_name = ts_name.drop_duplicates(subset=['respondent_name']).set_index(['respondent_id'])
    return ts_name

def get_lar(year):
    # 'ffiec.gov-hmda-2010e-lar-2012-ca'
    lar =  (l.get('ffiec.gov-hmda-2010e-lar-{}-ca'.format(year)).partition
            .select("SELECT * FROM lar WHERE county_code = 73")
            .pandas )
    
    lar['tract'] = lar.census_tract_number # Save the trouble of changing column names later. 
    return lar



In [4]:
def generate_share_frame(df, ts_name, group):
    dfx = pd.DataFrame()
    dfx['count'] = df.groupby('respondent_id').count()['id']
    dfx['count_pct'] = np.round(dfx['count'] / dfx['count'].sum() * 100.0, 2)

    dfx['amount'] = df.groupby('respondent_id').sum()['loan_amount']
    dfx['amount_pct'] = np.round(dfx['amount'] / dfx['amount'].sum() * 100.0,2)

    dfx['amount_avg'] = np.round(df.groupby('respondent_id').mean()['loan_amount'],0)

    dfx['amount_median'] = np.round(df.groupby('respondent_id').median()['loan_amount'],0)
    
    dfx = (dfx.merge(ts_name,left_index = True, right_index = True).set_index(['respondent_name'] ))

    return dfx
    

In [5]:
def build_market_share_report(lar, year):
    ts_name = get_ts(year)
    
    report = generate_share_frame(lar,ts_name,'all')
    
    #no_refi_share = generate_share_frame(lar[ (lar.loan_action == 1) & (lar.loan_purpose != 2)], ts_name, 'Excl. Home Imp.')
    
    #report = report.join(no_refi_share)
    
    #report.columns.set_levels([u'All Loans', u'Excluding Home Imp.'], level = 0, inplace = True)
    #[u'count', u'count_pct', u'amount', u'amount_pct', u'amount_avg', u'amount_median']
    
    cols = [u'Count',u'Market Share % By Originations',u'Total Amount', u'Market Share % By Amount',
                      u'Average Loan Size', u'Median Loan Size']
    
    report.columns = cols
        
    # Set a row number
    #report['#'] = 0
    #report['#'] = range(1,len(report['#'])+1)
    #report = report.set_index('#', append=True).reorder_levels([1,0])

    report_sum = report.sum()

    # Reorder the columns
    report = report[cols]
    
    report = pd.concat([report, pd.DataFrame( {'Total':report_sum }).T])
    
    return report


In [6]:
def combined_report(lar, year):

    report1 = build_market_share_report(lar[(lar.action_type == 1) & (lar.loan_purpose == 1)], 
                                        year)
    report1.columns = [ 'Purchases: '+c for c in report1.columns]
    
    report2 = build_market_share_report(lar[(lar.action_type == 1) & (lar.loan_purpose != 2)],
                                        year)
    report2.columns = [ 'No Imprv: '+c for c in report2.columns]
    
    return report1.merge(report2, right_index = True, left_index = True).sort('Purchases: Count', ascending=False)[:31]
    

In [7]:
from IPython.html.widgets import FloatProgress
from IPython.display import display
from time import sleep

places = pd.read_csv('data/ca_tract_to_place.csv', delimiter='\t')
places = places[places.county == '06073']

n = 0
f = FloatProgress(min=0, max=4 * (len(list(places.placefp.unique()))+1 ))   
display(f)
f.value = 0

def ensure_dir(path):
    import os
    d = os.path.dirname(path)
    if not os.path.exists(d):
        os.makedirs(d)                  
                  
for year in (2010, 2011, 2012, 2013):
    lar = get_lar(year)
               
    # County files
    f.value += 1
    
    report = combined_report(lar, year)
                  
    #report.columns.names = ['{}, {}'.format("San Diego County", year), None]

    path = 'market-share/{}/county/san_diego-county.csv'.format(year)
    
    ensure_dir(path)
    
    report.to_csv(path)
    
    for placefp in list(places.placefp.unique()):
        
        f.value += 1
        
        larp = lar.merge(places, on='tract').copy()
        larp_sub = larp[larp.placefp == placefp]
        
        report = combined_report(larp_sub, year)
        
        raw_place_name = larp_sub.placenm.unique()[0]
        place_name = raw_place_name.replace(' ','_').replace(',','').lower()
        
        place_type = 'other'
        
        if 'cdp' in place_name:
            place_type = 'cdp'
        elif 'city' in place_name:
            place_type = 'city'
        
        path = "market-share/{}/{}/{}.csv".format(year,place_type,place_name)
        #report.columns.names = ['{}, {}'.format(raw_place_name, year), None]
       
        ensure_dir(path)
        
        report.to_csv(path)
        


