In [2]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
pd.set_option('display.max_colwidth', None)
from statsmodels.iolib.summary2 import summary_col 


In [3]:
import warnings
# Ignore warnings by category
warnings.filterwarnings("ignore", category=DeprecationWarning)
# Ignore warnings by message
warnings.filterwarnings("ignore", message=".*")

In [4]:
df = pd.read_csv("../input_data_clean/bank_tract_clean_WITH_CENSUS.csv")
df['which_bank'] = df['which_bank'].str.replace(' ', '')

In [5]:
# Calculate the hispanic rate and whether it's above the median
df['hisp_rate'] = (df['HispanicLatinoPop'] / df['Tot.Pop']) * 100
df['hisp_over_med'] = df['hisp_rate'] > np.median(df['hisp_rate'].dropna())
df['hisp_over_med'] = df['hisp_over_med'].astype(int)

# Calculate the log of number of applications
df['log_num_apps'] = np.log(df['num_applications'])

## Linear Regression With Interaction Terms - CA & AZ

In [13]:
# Build initial models
# Fit the linear regression models
model1 = smf.ols('denial_rate ~ hisp_rate * which_bank + median_all_income', data=df).fit()
model2 = smf.ols('denial_rate ~ hisp_over_med * which_bank + median_all_income', data=df).fit()
model3 = smf.ols('log_num_apps ~ hisp_rate * which_bank + median_all_income', data=df).fit()
model4 = smf.ols('log_num_apps ~ hisp_over_med * which_bank + median_all_income', data=df).fit()


# Create a summary table of the regression models
table = summary_col([model1, model2, model3, model4],
                    model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4'],
                    float_format='%.5f',
                    stars=True,
                    info_dict={'N': lambda x: "{0:d}".format(int(x.nobs)),
                               'R2': lambda x: "{:.2f}".format(x.rsquared)})

# Rename the table columns
table.columns = ['Dep. Variable', 'Model 1', 'Model 2', 'Model 3', 'Model 4']

# Display the table
print(table)


                                         Model 1     Model 2     Model 3     Model 4  
--------------------------------------------------------------------------------------
Intercept                              0.87010***  0.85632***  4.35621***  4.11517*** 
                                       (0.00280)   (0.00238)   (0.01426)   (0.01260)  
R-squared                              0.05296     0.04529     0.75889     0.73870    
R-squared Adj.                         0.05267     0.04500     0.75882     0.73862    
hisp_over_med                                      -0.04963***             -0.48278***
                                                   (0.00316)               (0.01669)  
hisp_over_med:which_bank[T.BankofWest]             0.07198***              0.13091*** 
                                                   (0.00574)               (0.03039)  
hisp_rate                              -0.00109***             -0.01331***            
                                       (0.

In [14]:
# Build initial models
# Fit the linear regression models
model1 = smf.ols('denial_rate ~ hisp_rate * which_bank + median_all_income + mean_LTV', data=df).fit()
model2 = smf.ols('denial_rate ~ hisp_over_med * which_bank + median_all_income + mean_LTV', data=df).fit()
model3 = smf.ols('log_num_apps ~ hisp_rate * which_bank + median_all_income + mean_LTV', data=df).fit()
model4 = smf.ols('log_num_apps ~ hisp_over_med * which_bank + median_all_income + mean_LTV', data=df).fit()


# Create a summary table of the regression models
table = summary_col([model1, model2, model3, model4],
                    model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4'],
                    float_format='%.5f',
                    stars=True,
                    info_dict={'N': lambda x: "{0:d}".format(int(x.nobs)),
                               'R2': lambda x: "{:.2f}".format(x.rsquared)})

# Rename the table columns
table.columns = ['Dep. Variable', 'Model 1', 'Model 2', 'Model 3', 'Model 4']

# Display the table
print(table)


                                         Model 1     Model 2     Model 3     Model 4  
--------------------------------------------------------------------------------------
Intercept                              0.88560***  0.87371***  4.25579***  4.05566*** 
                                       (0.00492)   (0.00479)   (0.02512)   (0.02535)  
R-squared                              0.05392     0.04655     0.75938     0.73895    
R-squared Adj.                         0.05357     0.04620     0.75929     0.73885    
hisp_over_med                                      -0.04756***             -0.48907***
                                                   (0.00319)               (0.01687)  
hisp_over_med:which_bank[T.BankofWest]             0.07024***              0.13601*** 
                                                   (0.00575)               (0.03045)  
hisp_rate                              -0.00105***             -0.01352***            
                                       (0.

In [None]:
# Build initial models
# Fit the linear regression models
model1 = smf.ols('denial_rate ~ hisp_rate * which_bank + median_all_income + mean_LTV', data=df).fit()
model2 = smf.ols('denial_rate ~ hisp_over_med * which_bank + median_all_income + mean_LTV', data=df).fit()
model3 = smf.ols('log_num_apps ~ hisp_rate * which_bank + median_all_income + mean_LTV', data=df).fit()
model4 = smf.ols('log_num_apps ~ hisp_over_med * which_bank + median_all_income + mean_LTV', data=df).fit()


# Create a summary table of the regression models
table = summary_col([model1, model2, model3, model4],
                    model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4'],
                    float_format='%.5f',
                    stars=True,
                    info_dict={'N': lambda x: "{0:d}".format(int(x.nobs)),
                               'R2': lambda x: "{:.2f}".format(x.rsquared)})

# Rename the table columns
table.columns = ['Dep. Variable', 'Model 1', 'Model 2', 'Model 3', 'Model 4']

# Display the table
print(table)

In [15]:
# Calculate the black rate and whether it's above the median
df['black_rate'] = (df['BlackPop'] / df['Tot.Pop']) * 100
df['black_over_med'] = df['black_rate'] > np.median(df['black_rate'].dropna())
df['black_over_med'] = df['black_over_med'].astype(int)


In [16]:
model1 = smf.ols('denial_rate ~ black_rate * which_bank + median_all_income + mean_LTV', data=df).fit()
model2 = smf.ols('denial_rate ~ black_over_med * which_bank + median_all_income + mean_LTV', data=df).fit()
model3 = smf.ols('log_num_apps ~ black_rate * which_bank + median_all_income + mean_LTV', data=df).fit()
model4 = smf.ols('log_num_apps ~ black_over_med * which_bank + median_all_income + mean_LTV', data=df).fit()


# Create a summary table of the regression models
table = summary_col([model1, model2, model3, model4],
                    model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4'],
                    float_format='%.5f',
                    stars=True,
                    info_dict={'N': lambda x: "{0:d}".format(int(x.nobs)),
                               'R2': lambda x: "{:.2f}".format(x.rsquared)})

# Rename the table columns
table.columns = ['Dep. Variable', 'Model 1', 'Model 2', 'Model 3', 'Model 4']

# Display the table
print(table)


                                          Model 1     Model 2     Model 3     Model 4  
---------------------------------------------------------------------------------------
Intercept                               0.86441***  0.86299***  3.93252***  3.91428*** 
                                        (0.00478)   (0.00487)   (0.02590)   (0.02645)  
R-squared                               0.03365     0.03070     0.72236     0.72059    
R-squared Adj.                          0.03329     0.03034     0.72225     0.72048    
black_over_med                                      -0.01365***             -0.07036***
                                                    (0.00318)               (0.01726)  
black_over_med:which_bank[T.BankofWest]             0.02340***              -0.12820***
                                                    (0.00574)               (0.03117)  
black_rate                              -0.00154***             -0.00966***            
                               

In [19]:
# Calculate the white rate and whether it's above the median
df['white_rate'] = (df['WhitePop'] / df['Tot.Pop']) * 100
df['white_over_med'] = df['white_rate'] > np.median(df['white_rate'].dropna())
df['white_over_med'] = df['white_over_med'].astype(int)


In [18]:
model1 = smf.ols('denial_rate ~ white_rate * which_bank + median_all_income + mean_LTV', data=df).fit()
model2 = smf.ols('denial_rate ~ white_over_med * which_bank + median_all_income + mean_LTV', data=df).fit()
model3 = smf.ols('log_num_apps ~ white_rate * which_bank + median_all_income + mean_LTV', data=df).fit()
model4 = smf.ols('log_num_apps ~ white_over_med * which_bank + median_all_income + mean_LTV', data=df).fit()


# Create a summary table of the regression models
table = summary_col([model1, model2, model3, model4],
                    model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4'],
                    float_format='%.5f',
                    stars=True,
                    info_dict={'N': lambda x: "{0:d}".format(int(x.nobs)),
                               'R2': lambda x: "{:.2f}".format(x.rsquared)})

# Rename the table columns
table.columns = ['Dep. Variable', 'Model 1', 'Model 2', 'Model 3', 'Model 4']

# Display the table
print(table)


                                          Model 1     Model 2     Model 3     Model 4  
---------------------------------------------------------------------------------------
Intercept                               0.85096***  0.85561***  3.32782***  3.75213*** 
                                        (0.00645)   (0.00488)   (0.03434)   (0.02619)  
R-squared                               0.02952     0.02920     0.73132     0.72733    
R-squared Adj.                          0.02916     0.02883     0.73122     0.72723    
mean_LTV                                -0.00044*** -0.00043*** -0.00134*** -0.00109***
                                        (0.00007)   (0.00007)   (0.00036)   (0.00036)  
median_all_income                       0.00001***  0.00001***  0.00004*    0.00004**  
                                        (0.00000)   (0.00000)   (0.00002)   (0.00002)  
which_bank[T.BankofWest]                -0.06490*** -0.05484*** -2.37853*** -2.77143***
                               