In [22]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
pd.set_option('display.max_colwidth', None)
from statsmodels.iolib.summary2 import summary_col 

In [23]:
import warnings
# Ignore warnings by category
warnings.filterwarnings("ignore", category=DeprecationWarning)
# Ignore warnings by message
warnings.filterwarnings("ignore", message=".*")

In [24]:
bank_tract = pd.read_csv("../input_data_clean/bank_tract_clean_WITH_CENSUS.csv")
bank_tract['which_bank'] = bank_tract['which_bank'].str.replace(' ', '')

In [25]:
# Calculate the hispanic rate and whether it's above the median
bank_tract['hisp_rate'] = (bank_tract['HispanicLatinoPop'] / bank_tract['Tot.Pop']) * 100
bank_tract['hisp_over_med'] = bank_tract['hisp_rate'] > np.median(bank_tract['hisp_rate'].dropna())
bank_tract['hisp_over_med'] = bank_tract['hisp_over_med'].astype(int)

# Calculate the log of number of applications
bank_tract['log_num_apps'] = np.log(bank_tract['num_applications'])

In [26]:
def order_front(df, to_front):
    '''
    Moves columns in to_front to left of df.
    '''
    cols = list(df.columns)
    for c in to_front:
        cols.pop(cols.index(c))
    cols = to_front + cols
    return df[cols]

bank_tract = order_front(bank_tract,['which_bank','census_tract'])

## Linear Regression with Interaction Terms - median income as control

In [27]:
model1 = smf.ols('denial_rate ~ hisp_rate * which_bank + median_all_income', data=bank_tract).fit()
model2 = smf.ols('denial_rate ~ hisp_over_med * which_bank + median_all_income', data=bank_tract).fit()
model3 = smf.ols('log_num_apps ~ hisp_rate * which_bank + median_all_income', data=bank_tract).fit()
model4 = smf.ols('log_num_apps ~ hisp_over_med * which_bank + median_all_income', data=bank_tract).fit()

# now I'll format an output table
# I'd like to include extra info in the table (not just coefficients)
info_dict={'R-squared' : lambda x: f"{x.rsquared:.4f}",
           'Adj R-squared' : lambda x: f"{x.rsquared_adj:.4f}",
           'No. observations' : lambda x: f"{int(x.nobs):d}"}

# This summary col function combines a bunch of regressions into one nice table
print(summary_col(results=[model1,model2,model3,model4], # list the result obj here
                  float_format='%0.4f',
                  stars = True, # stars are easy way to see if anything is statistically significant
                  model_names=['Denial Rate','Denial Rate',' log_num_apps','log_num_apps'], # these are bad names, lol. Usually, just use the y variable name
                  info_dict=info_dict,
                  regressor_order=[ 'Intercept', "hisp_rate", "hisp_over_med", "which_bank[T.BankofWest]", "hisp_rate:which_bank[T.BankofWest]",
                                  "hisp_over_med:which_bank[T.BankofWest]", "median_all_income"]
                  )
     )


                                       Denial Rate I Denial Rate II  log_num_apps I log_num_apps I
--------------------------------------------------------------------------------------------------
Intercept                              0.1299***     0.1437***      4.3562***       4.1152***     
                                       (0.0028)      (0.0024)       (0.0143)        (0.0126)      
hisp_rate                              0.0011***                    -0.0133***                    
                                       (0.0001)                     (0.0003)                      
hisp_over_med                                        0.0496***                      -0.4828***    
                                                     (0.0032)                       (0.0167)      
which_bank[T.BankofWest]               0.1168***     0.0901***      -3.0934***      -2.9949***    
                                       (0.0048)      (0.0039)       (0.0243)        (0.0205)      
hisp_rate

## Linear Regression with Interaction Terms - median income and mean LTV as control

In [28]:
model1 = smf.ols('denial_rate ~ hisp_rate * which_bank + median_all_income + mean_LTV', data=bank_tract).fit()
model2 = smf.ols('denial_rate ~ hisp_over_med * which_bank + median_all_income+ mean_LTV', data=bank_tract).fit()
model3 = smf.ols('log_num_apps ~ hisp_rate * which_bank + median_all_income+ mean_LTV', data=bank_tract).fit()
model4 = smf.ols('log_num_apps ~ hisp_over_med * which_bank + median_all_income+ mean_LTV', data=bank_tract).fit()

# now I'll format an output table
# I'd like to include extra info in the table (not just coefficients)
info_dict={'R-squared' : lambda x: f"{x.rsquared:.4f}",
           'Adj R-squared' : lambda x: f"{x.rsquared_adj:.4f}",
           'No. observations' : lambda x: f"{int(x.nobs):d}"}

# This summary col function combines a bunch of regressions into one nice table
print(summary_col(results=[model1,model2,model3,model4], # list the result obj here
                  float_format='%0.4f',
                  stars = True, # stars are easy way to see if anything is statistically significant
                  model_names=['Denial Rate','Denial Rate',' log_num_apps','log_num_apps'], # these are bad names, lol. Usually, just use the y variable name
                  info_dict=info_dict,
                  regressor_order=[ 'Intercept', "hisp_rate", "hisp_over_med", "which_bank[T.BankofWest]", "hisp_rate:which_bank[T.BankofWest]",
                                  "hisp_over_med:which_bank[T.BankofWest]", "median_all_income", "mean_LTV"]
                  )
     )


                                       Denial Rate I Denial Rate II  log_num_apps I log_num_apps I
--------------------------------------------------------------------------------------------------
Intercept                              0.1144***     0.1263***      4.2558***       4.0557***     
                                       (0.0049)      (0.0048)       (0.0251)        (0.0253)      
hisp_rate                              0.0010***                    -0.0135***                    
                                       (0.0001)                     (0.0003)                      
hisp_over_med                                        0.0476***                      -0.4891***    
                                                     (0.0032)                       (0.0169)      
which_bank[T.BankofWest]               0.1169***     0.0907***      -3.0917***      -2.9929***    
                                       (0.0047)      (0.0039)       (0.0242)        (0.0205)      
hisp_rate