In [1]:
%load_ext autoreload
%autoreload 1
import sys; sys.path.append("..")
%aimport lib.util
from lib.util import reformat_tract, add_race_eth, add_purpose_type, add_income_group, add_all_category_columns
import pandas as pd
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1500
import numpy as np
from ambry import library
l = library()


In [2]:
ts = l.get('ffiec.gov-hmda-2010e-ts').partition.pandas

def get_ts(year):
    ts_name = ts[ts.activity_year == year][['respondent_id','respondent_name']].copy()
    ts_name.respondent_id = ts_name.respondent_id.astype(str)
    
    rep_map = { str(row['respondent_id']):row['respondent_name'] for i,row in ts_name.iterrows() }
    
    ts_name = ts_name.drop_duplicates(subset=['respondent_name']).set_index(['respondent_id'])
    
    return ts_name, rep_map


In [3]:
# Function to create each section of the standard report. 
def standard_report_section(df, column_name, totalcount, totalamount):
    dfo = df.groupby(column_name)
    dfo = dfo['loan_amount'].agg( {'median':np.median, 'count': np.size, 'amount': np.sum})
    dfo['%count'] = np.round(100*dfo['count']/totalcount,1)
    dfo['%amount'] = np.round(100*dfo['amount']/totalamount,1)
    return dfo[['count','%count','amount','%amount','median']]

In [4]:
def standard_report_group(df, group):
    # Calling the standard_report_section
    totalcount = df.loan_amount.count()
    totalamount = df.loan_amount.sum()

    # Comprehension version
    frames = [standard_report_section(df, column_name, totalcount, totalamount) for column_name in 
             ('purpose_type','race_eth_c','income_group_c','applicant_sex_c','occupancy_c')]

    table = pd.concat(frames, 
                      keys=['Loan Purpose and Type', 'Applicant Race/Eth', 'Applicant Income', 'Applicant Sex',
                            'Occupancy','Totals'])
    
    table =  table.T
    table['totals'] = [totalcount,None,totalamount,None,None]
    table['respondent'] = group
    
    return table

In [5]:
def build_report(year):
    
    print "Build ",year
    p = l.get('ffiec.gov-hmda-2010e-lar-2012-ca').partition
    df = p.select("SELECT * FROM lar WHERE county_code = 73").pandas

    add_race_eth(df) # Adds race_eth column
    add_purpose_type(df) # Adds purpose_type column Combines loan_purpose and loan_type in the same way as the summary report
    add_income_group(df) # Adds income_group column
    add_all_category_columns(df) # Add '_c' categorical columns

    ts_name, rep_map = get_ts(year)
    
    ordered_respondents = (df[['respondent_id','id']].groupby('respondent_id').count()
                           .sort('id',ascending=False).reset_index())
    
    groups = []

    for i, row in ordered_respondents.iterrows():

        if i > 6:
            break

        groups.append(standard_report_group(df[(df.msa_md  == 41740) & ( df.respondent_id == row['respondent_id'] )], 
                                           rep_map.get(str(row['respondent_id']),str(row['respondent_id']) )))

    report = pd.concat(groups).set_index('respondent',append=True).reorder_levels([1,0])

    report.T.to_csv('standard_summary_report_{}.csv'.format(year))
    report.to_csv('standard_summary_report_t_{}.csv'.format(year))

In [6]:
for year in (2010,2011,2012,2013):
    build_report(year)

Build  2010
Adding category column for  loan_type
Adding category column for  loan_purpose
Adding category column for  occupancy
Adding category column for  income_group
Adding category column for  applicant_sex
Adding category column for  race_eth
Build  2011
Adding category column for  loan_type
Adding category column for  loan_purpose
Adding category column for  occupancy
Adding category column for  income_group
Adding category column for  applicant_sex
Adding category column for  race_eth
Build  2012
Adding category column for  loan_type
Adding category column for  loan_purpose
Adding category column for  occupancy
Adding category column for  income_group
Adding category column for  applicant_sex
Adding category column for  race_eth
Build  2013
Adding category column for  loan_type
Adding category column for  loan_purpose
Adding category column for  occupancy
Adding category column for  income_group
Adding category column for  applicant_sex
Adding category column for  race_eth
