In [1]:
import os
import csv
import pandas as pd
import numpy as np

In [2]:
if not os.path.exists('../input_data_clean/bank_tract_clean_WITH_CENSUS.csv'):
        
    (
        pd.read_csv('../input_data_clean/census_clean.csv')
        .merge(pd.read_csv('../input_data_clean/bank_tract_clean.csv'),
               on = "census_tract", 
               how = "inner",
               validate = "1:m")
        .to_csv('../input_data_clean/bank_tract_clean_WITH_CENSUS.csv')
    )

In [3]:
data = pd.read_csv('../input_data_clean/bank_tract_clean_WITH_CENSUS.csv')

In [4]:
hmda_vars = ['num_approved_loans', 
             'num_applications',
             #'median__all_income',
             #'sum_rate_spread',
             #'mean_LTV'
            ]

In [5]:
data['pct_minority'] = (data['Tot.Pop'] - data['Tot.WhitePop'])/data['Tot.Pop'] * 100
#data['pct_minority'] = data['pct_minority'].apply(lambda x: "{:.2f%}".format(x))

In [6]:
#New dataframe that contains only the rows where the % of the non-white population is over 50%
majmin_tracts = data[data['pct_minority']>50]
major_minority_group_loan_table = majmin_tracts.groupby('pct_minority')[hmda_vars].mean()
major_minority_group_loan_table['loan_approvel_rate'] = major_minority_group_loan_table['num_approved_loans']/major_minority_group_loan_table['num_applications']
#major_minority_group_loan_table = major_minority_group_loan_table.sort_values(by='loan_approvel_rate', ascending=True)
major_minority_group_loan_table.sample(20)

Unnamed: 0_level_0,num_approved_loans,num_applications,loan_approvel_rate
pct_minority,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
75.523791,163.0,201.0,0.810945
60.423654,86.0,108.0,0.796296
67.938631,83.5,95.5,0.874346
77.064579,83.0,102.0,0.813725
56.149518,62.0,76.0,0.815789
66.307929,123.0,143.5,0.857143
59.73844,162.0,191.0,0.848168
67.535728,167.0,193.0,0.865285
75.589293,203.0,232.0,0.875
80.446804,104.5,121.5,0.860082


In [7]:
# 18.9% is the national average for hispanic/latino population
# Here, we consider anything over the national average of 18.9% a high hispanic population
# Hispanic pop in AZ = 32.3%, in CA = 39.4%
data['high_hisp_pop'] = ((data['Tot.Hispanic/Latino']/data['Tot.Pop']) > 0.189)
high_hispanic= data[data['high_hisp_pop']]

In [8]:
# high hispanic table
high_hisp_loan_table = data.groupby('high_hisp_pop')[hmda_vars].mean()
high_hisp_loan_table['loan_approvel_rate'] = high_hisp_loan_table['num_approved_loans']/high_hisp_loan_table['num_applications']
high_hisp_loan_table = high_hisp_loan_table.sort_values(by='loan_approvel_rate', ascending=False)
high_hisp_loan_table

Unnamed: 0_level_0,num_approved_loans,num_applications,loan_approvel_rate
high_hisp_pop,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,182.065429,207.535751,0.877273
True,150.863527,177.37027,0.850557


In [9]:
# finding the median income quintiles
# sort the median__all_income variable in ascending order
data.sort_values(by='median__all_income', inplace=True)

# divide the sorted median__all_income variable into 5 equal parts, or quintiles
data['Median_Income_Quintiles'] = pd.qcut(data['median__all_income'], q=5, labels=False)

# create a new variable called "Median_Income_Quintiles" based on the quintiles
data['Median_Income_Quintiles'] = pd.cut(data['Median_Income_Quintiles'], bins=5, labels=['Q1', 'Q2', 'Q3', 'Q4', 'Q5'])

In [10]:
Quintiles_loan_table = data.groupby('Median_Income_Quintiles')[hmda_vars].mean()
Quintiles_loan_table['loan_approvel_rate'] = Quintiles_loan_table['num_approved_loans']/Quintiles_loan_table['num_applications']
Quintiles_loan_table = Quintiles_loan_table.sort_values(by='loan_approvel_rate', ascending=False)
Quintiles_loan_table

Unnamed: 0_level_0,num_approved_loans,num_applications,loan_approvel_rate
Median_Income_Quintiles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Q5,111.763565,127.065768,0.879573
Q4,201.191447,228.849013,0.879145
Q3,219.339166,252.857335,0.867442
Q2,186.308158,219.666667,0.84814
Q1,117.792595,142.417431,0.827094


In [11]:
# find the minority percentage for each racial group
data['pct_White'] = data['Tot.WhitePop']/data['Tot.Pop'] * 100
data['pct_Black'] = data['Tot.BlackPop']/data['Tot.Pop'] * 100
data['pct_AmericIndian'] = data['Tot.AmericIndianPop']/data['Tot.Pop'] * 100
data['pct_Asian'] = data['Tot.AsianPop']/data['Tot.Pop'] * 100
data['pct_NativeHawaiianPacific'] = data['Tot.NativeHawaiianPacificPop']/data['Tot.Pop'] * 100
data['pct_OtherRaceAlone'] = data['Tot.OtherRaceAlonePop']/data['Tot.Pop'] * 100
data['pct_TwoOrMoreRace'] = data['Tot.TwoOrMoreRace']/data['Tot.Pop'] * 100
data['pct_TwoOrMoreRace(Some other race)'] = data['Tot.TwoOrMoreRace(Some other race)']/data['Tot.Pop'] * 100
data['pct_Hispanic/Latino'] = data['Tot.Hispanic/Latino']/data['Tot.Pop'] * 100
data['pct_NotHispanic/Latino'] = data['Tot.NotHispanic/Latino']/data['Tot.Pop'] * 100

In [12]:
# finding the majority group of minority and its name of the minority group

data['major_minority'] = data[['pct_Black', 'pct_AmericIndian', 'pct_Asian','pct_NativeHawaiianPacific',
                                        'pct_OtherRaceAlone','pct_TwoOrMoreRace','pct_TwoOrMoreRace(Some other race)',
                                        'pct_Hispanic/Latino']].idxmax(axis=1)
data['major_minority'] = data['major_minority'].apply(lambda x: 'Black' if x=='pct_Black' 
                                                                        else 'American Indian' if x=='pct_AmericIndian' 
                                                                        else 'Asian' if x=='pct_Asian' 
                                                                        else 'Native Hawaiian Pacific' if x=='pct_NativeHawaiianPacific' 
                                                                        else 'Other Race Alone' if x=='pct_OtherRaceAlone' 
                                                                        else 'Two Or More Race' if x=='pct_TwoOrMoreRace' 
                                                                        else 'Two Or More Race(Some other race)' if x=='pct_TwoOrMoreRace(Some other race)' 
                                                                        else 'Hispanic/Latino')

In [13]:
data['pct_major_minority'] = data[['pct_Black', 'pct_AmericIndian', 'pct_Asian','pct_NativeHawaiianPacific',
                                        'pct_OtherRaceAlone','pct_TwoOrMoreRace','pct_TwoOrMoreRace(Some other race)',
                                        'pct_Hispanic/Latino']].max(axis=1)

In [14]:
minority_group_loan_table = data.groupby('major_minority')[hmda_vars].mean()
minority_group_loan_table['loan_approvel_rate'] = minority_group_loan_table['num_approved_loans']/minority_group_loan_table['num_applications']
minority_group_loan_table = minority_group_loan_table.sort_values(by='loan_approvel_rate', ascending=False)
minority_group_loan_table

Unnamed: 0_level_0,num_approved_loans,num_applications,loan_approvel_rate
major_minority,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Asian,171.071038,194.124469,0.881244
Two Or More Race,151.939759,175.861446,0.863974
Hispanic/Latino,161.591724,188.744204,0.856141
Black,133.165644,157.067485,0.847824
Native Hawaiian Pacific,28.5,34.0,0.838235
Other Race Alone,89.333333,107.833333,0.828439
American Indian,65.290698,81.802326,0.798152


In [15]:
# finding the majority group of ethnicity and its name of the ethnic group

data['major_ethnicity'] = data[['pct_White','pct_Black', 'pct_AmericIndian', 'pct_Asian','pct_NativeHawaiianPacific',
                                        'pct_OtherRaceAlone','pct_TwoOrMoreRace','pct_TwoOrMoreRace(Some other race)',
                                        'pct_Hispanic/Latino']].idxmax(axis=1)
data['major_ethnicity'] = data['major_ethnicity'].apply(lambda x: 'White' if x=='pct_White' 
                                                                        else 'Black' if x=='pct_Black' 
                                                                        else 'American Indian' if x=='pct_AmericIndian' 
                                                                        else 'Asian' if x=='pct_Asian' 
                                                                        else 'Native Hawaiian Pacific' if x=='pct_NativeHawaiianPacific' 
                                                                        else 'Other Race Alone' if x=='pct_OtherRaceAlone' 
                                                                        else 'Two Or More Race' if x=='pct_TwoOrMoreRace' 
                                                                        else 'Two Or More Race(Some other race)' if x=='pct_TwoOrMoreRace(Some other race)' 
                                                                        else 'Hispanic/Latino')

In [16]:
data['pct_major_ethnicity'] = data[['pct_White','pct_Black', 'pct_AmericIndian', 'pct_Asian','pct_NativeHawaiianPacific',
                                        'pct_OtherRaceAlone','pct_TwoOrMoreRace','pct_TwoOrMoreRace(Some other race)',
                                        'pct_Hispanic/Latino']].max(axis=1)

In [17]:
# all ethnicity loan table
all_ethnicity_loan_table = data.groupby('major_ethnicity')[hmda_vars].mean()
all_ethnicity_loan_table['loan_approvel_rate'] = all_ethnicity_loan_table['num_approved_loans']/all_ethnicity_loan_table['num_applications']
all_ethnicity_loan_table = all_ethnicity_loan_table.sort_values(by='loan_approvel_rate', ascending=False)
all_ethnicity_loan_table

Unnamed: 0_level_0,num_approved_loans,num_applications,loan_approvel_rate
major_ethnicity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Other Race Alone,1.5,1.5,1.0
Asian,149.829376,170.868953,0.876867
White,184.108816,212.398206,0.86681
Hispanic/Latino,98.340163,119.269057,0.824524
Black,118.936306,145.044586,0.819998
American Indian,13.23913,22.304348,0.593567


In [18]:
# finding the majority group of between hispanic and nonhispanic group

data['majority_between_hispanic_nonhispanic'] = data[['pct_Hispanic/Latino','pct_NotHispanic/Latino']].idxmax(axis=1)
data['majority_between_hispanic_nonhispanic'] = data['majority_between_hispanic_nonhispanic'].apply(lambda x: 'Hispanic/Latino' if x=='pct_Hispanic/Latino' 
                                                                                                    else 'Not-Hispanic/Latino')

In [19]:
data['pct_majority_between_hispanic_nonhispanic'] = data[['pct_Hispanic/Latino','pct_NotHispanic/Latino']].max(axis=1)

In [20]:
# hispanic/nonhispanic loan table
# in census data (hispanic pop + non hispanic pop) = total pop
# (Tot.WhitePop+Tot.BlackPop+Tot.AmericIndianPop+Tot.AsianPop=
#  +Tot.NativeHawaiianPacificPop+Tot.OtherRaceAlonePop+Tot.TwoOrMoreRace+Tot.TwoOrMoreRace(Some other race))= total population
hispanic_nonhispanic_loan_table = data.groupby('majority_between_hispanic_nonhispanic')[hmda_vars].mean()
hispanic_nonhispanic_loan_table['loan_approvel_rate'] = hispanic_nonhispanic_loan_table['num_approved_loans']/hispanic_nonhispanic_loan_table['num_applications']
hispanic_nonhispanic_loan_table = hispanic_nonhispanic_loan_table.sort_values(by='loan_approvel_rate', ascending=False)
hispanic_nonhispanic_loan_table

Unnamed: 0_level_0,num_approved_loans,num_applications,loan_approvel_rate
majority_between_hispanic_nonhispanic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Not-Hispanic/Latino,180.512924,207.741944,0.868929
Hispanic/Latino,110.705767,133.704545,0.827988


In [21]:
# majority_poor table 
data['majority_poor'] = data['RatioIncomeToPovertyLevel'] < 100
major_poor_loan_table = data.groupby('majority_poor')[hmda_vars].mean()
major_poor_loan_table['loan_approvel_rate'] = major_poor_loan_table['num_approved_loans']/major_poor_loan_table['num_applications']

major_poor_loan_table = major_poor_loan_table.sort_values(by='loan_approvel_rate', ascending=False)
major_poor_loan_table

Unnamed: 0_level_0,num_approved_loans,num_applications,loan_approvel_rate
majority_poor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,162.601149,188.772368,0.861361
True,2.892857,3.892857,0.743119
