### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import os
import glob as gb
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.graphics.tsaplots import plot_pacf
from functools import reduce


from linearmodels import PanelOLS
from linearmodels import PooledOLS
from linearmodels import RandomEffects
import statsmodels.api as sm
from statsmodels.stats.diagnostic import het_white, het_breuschpagan
from statsmodels.stats.stattools import durbin_watson
import numpy.linalg as la
from scipy import stats
import numpy as np

### Set Path

In [2]:
path = 'C:\\Users\\padu\Dropbox (UNC Charlotte)\\Providence Adu’s files\\Data\\US Census Bureau NPA'
os.chdir(path)

### Read Data

In [3]:
Housing =  pd.read_csv("AllVariables.csv")
print(Housing.columns)

Index(['Unnamed: 0', 'NPA', 'MSAge2010', 'MSAge2011', 'MSAge2012', 'MSAge2013',
       'MSAge2014', 'MSAge2015', 'MSAge2016', 'MSAge2017',
       ...
       'PercentWhite2019', 'PercentHispanic2019',
       'PercentAdultsWithAtLeastBachelors2019', 'PercentOwnerOccupied2019',
       'PercentAsian2020', 'PercentBlack2020', 'PercentWhite2020',
       'PercentHispanic2020', 'PercentAdultsWithAtLeastBachelors2020',
       'PercentOwnerOccupied2020'],
      dtype='object', length=185)


### Account of Percent Without Bachelors for 2010 and 2011

In [4]:
Housing['PercentAdultsWithAtLeastBachelors2010'] = Housing['PercentAdultsWithAtLeastBachelors2012']
Housing['PercentAdultsWithAtLeastBachelors2011'] = Housing['PercentAdultsWithAtLeastBachelors2012']

### Percent New Residential Renovation

In [5]:
for row in Housing:
    if row.startswith('NewResReno'):
        Housing['PercNewResReno'+str(row[-4:])] = (Housing['NewResReno'+str(row[-4:])]/Housing['HousingUnits'+str(row[-4:])])*100


In [6]:
# print(list(Housing.columns))

### Percent New Residential Demolition

In [7]:
for row in Housing:
    if row.startswith('NewResDems'):
        Housing['PercNewResDems'+str(row[-4:])] = (Housing['NewResDems'+str(row[-4:])]/Housing['HousingUnits'+str(row[-4:])])*100


### Percent New Residential Construction

In [8]:
for row in Housing:
    if row.startswith('NewResCons'):
        Housing['PercNewResCons'+str(row[-4:])] = (Housing['NewResCons'+str(row[-4:])]/Housing['HousingUnits'+str(row[-4:])])*100


### Percent Housing Violations

In [9]:
for row in Housing:
    if row.startswith('HousingViolation'):
        Housing['ShareHousingViolation'+str(row[-4:])] = (Housing['HousingViolations'+str(row[-4:])]/
                                                          Housing['HousingUnits'+str(row[-4:])])*100

### Percent All Violations

In [10]:
for row in Housing:
    if row.startswith('ALLViolation'):
        Housing['ShareALLViolation'+str(row[-4:])] = (Housing['ALLViolations'+str(row[-4:])]/
                                                          Housing['HousingUnits'+str(row[-4:])])*100

### Percent Minority

In [11]:
for row in Housing:
    if row.startswith('PercentHispanic') or row.startswith('PercentAsian') or row.startswith('PercentBlack'):
        Housing['PercentMinority'+str(row[-4:])] = (Housing['PercentAsian'+str(row[-4:])]+
                                                    Housing['PercentHispanic'+str(row[-4:])]+
                                                   Housing['PercentBlack'+str(row[-4:])])

### Housing Investment

In [12]:
for row in Housing:
    if row.startswith('NewResCon') or row.startswith('NewResReno'):
        Housing['HousingInvestment'+str(row[-4:])] = (Housing['NewResCons'+str(row[-4:])]+
                                              Housing['NewResReno'+str(row[-4:])])

### Share of Housing Investment

In [13]:
for row in Housing:
    if row.startswith('PercNewResCon') or row.startswith('PercNewResReno'):
        Housing['ShareHousingInvestment'+str(row[-4:])] = (Housing['PercNewResCons'+str(row[-4:])]+
                                              Housing['PercNewResReno'+str(row[-4:])])

In [15]:
Housing.replace([np.inf, -np.inf], np.nan, inplace=True)

In [20]:
LatentHousing = pd.DataFrame(Housing)

['Unnamed: 0', 'NPA', 'MSAge2010', 'MSAge2011', 'MSAge2012', 'MSAge2013', 'MSAge2014', 'MSAge2015', 'MSAge2016', 'MSAge2017', 'MSAge2018', 'MSAge2019', 'MSAge2020', 'MHIncome2010', 'MHIncome2011', 'MHIncome2012', 'MHIncome2013', 'MHIncome2014', 'MHIncome2015', 'MHIncome2016', 'MHIncome2017', 'MHIncome2018', 'MHIncome2019', 'MHIncome2020', 'MGRent2010', 'MGRent2011', 'MGRent2012', 'MGRent2013', 'MGRent2014', 'MGRent2015', 'MGRent2016', 'MGRent2017', 'MGRent2018', 'MGRent2019', 'MGRent2020', 'HousingUnits2021', 'NewResReno2022', 'NewResReno2021', 'NewResReno2020', 'NewResReno2019', 'NewResReno2018', 'NewResReno2017', 'NewResReno2016', 'NewResReno2015', 'NewResReno2014', 'NewResReno2013', 'NewResReno2012', 'NewResReno2011', 'NewResReno2010', 'NewResDems2022', 'NewResDems2021', 'NewResDems2020', 'NewResDems2019', 'NewResDems2018', 'NewResDems2017', 'NewResDems2016', 'NewResDems2015', 'NewResDems2014', 'NewResDems2013', 'NewResDems2012', 'NewResDems2011', 'NewResDems2010', 'NewResCons2022',

### Filter columns for Melting
- exclude 2021 and 2022

In [25]:
ALLViolations = [row for row in LatentHousing if row.startswith('ALLViolations') and not row.endswith('2022') and not row.endswith('2021')]
ShareALLViolations = [row for row in LatentHousing if row.startswith('ShareALLViolation')and not row.endswith('2022') and not row.endswith('2021')]
HousingViolatins = [row for row in LatentHousing if row.startswith('HousingViolations') and not row.endswith('2022') and not row.endswith('2021')]
ShareHousingViolatins = [row for row in LatentHousing if row.startswith('ShareHousingViolation') and not row.endswith('2022') and not row.endswith('2021')]

PerNewResReno =  [row for row in LatentHousing if row.startswith('PercNewResReno') and not row.endswith('2022') and not row.endswith('2021')]
PerNewResCon  =  [row for row in LatentHousing if row.startswith('PercNewResCon') and not row.endswith('2022') and not row.endswith('2021')]
PerNewResDem  =  [row for row in LatentHousing if row.startswith('PercNewResDem') and not row.endswith('2022') and not row.endswith('2021')]

MSAge = [row for row in LatentHousing if row.startswith('MSAge') and not row.endswith('2022') and not row.endswith('2021')]
MHIncome = [row for row in LatentHousing if row.startswith('MHIncome') and not row.endswith('2022') and not row.endswith('2021')]
MGRent = [row for row in LatentHousing if row.startswith('MGRent') and not row.endswith('2022') and not row.endswith('2021')]

PercentBlack = [row for row in LatentHousing if row.startswith('PercentBlack') and not row.endswith('2022') and not row.endswith('2021')]
PercentWhite = [row for row in LatentHousing if row.startswith('PercentWhite') and not row.endswith('2022') and not row.endswith('2021')]
PercentAsian = [row for row in LatentHousing if row.startswith('PercentAsian') and not row.endswith('2022') and not row.endswith('2021')]
PercentHispanic = [row for row in LatentHousing if row.startswith('PercentHispanic') and not row.endswith('2022') and not row.endswith('2021')]

PercentOwnerOccupied = [row for row in LatentHousing if row.startswith('PercentOwnerOccupied') and not row.endswith('2022') and not row.endswith('2021')]
PercentAdultsWithAtLeastBachelors = [row for row in LatentHousing if row.startswith('PercentAdultsWithAtLeastBachelors') and not row.endswith('2022') and not row.endswith('2021')]


PercentMinority = [row for row in LatentHousing if row.startswith('PercentMinority') and not row.endswith('2022') and not row.endswith('2021')]
ShareHousingInvestment = [row for row in LatentHousing if row.startswith('ShareHousingInvestment') and not row.endswith('2022') and not row.endswith('2021')]

HousingInvestment = [row for row in LatentHousing if row.startswith('HousingInvestment') and not row.endswith('2022') and not row.endswith('2021')]

### Melt data for each individual category

In [26]:
AllviolationsPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= ALLViolations, value_name= 'AllViolations')
ShareALLViolationsPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= ShareALLViolations, value_name= 'ShareALLViolations')
HousingViolatinsPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= HousingViolatins, value_name= 'HousingViolations')
ShareHousingViolatinsPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= ShareHousingViolatins, value_name= 'ShareHousingViolations')

PerNewResRenoPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PerNewResReno, value_name= 'PerNewResReno')
PerNewResConPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PerNewResCon, value_name= 'PerNewResCon')
PerNewResDemPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PerNewResDem, value_name= 'PerNewResDem')

MSAgePooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= MSAge, value_name= 'MedianAge')
MHIncomePooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= MHIncome, value_name= 'MedianIncome')
MGRentPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= MGRent, value_name= 'MedianGrossRent')

PercentBlackPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PercentBlack, value_name= 'PercentBlack')
PercentWhitePooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PercentWhite, value_name= 'PercentWhite')
PercentAsianPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PercentAsian, value_name= 'PercentAsian')
PercentHispanicPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PercentHispanic, value_name= 'PercentHispanic')
PercentMinorityPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PercentHispanic, value_name= 'PercentMinority')


PercentOwnerOccupiedPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PercentOwnerOccupied, value_name= 'PercentOwnerOccupied')
PercentAdultsWithAtLeastBachelorsPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= PercentAdultsWithAtLeastBachelors, value_name= 'PercentAdultsWithAtLeastBachelors')

ShareHousingInvestmentPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= ShareHousingInvestment, value_name= 'ShareHousingInvestment')
HousingInvestmentPooled = pd.melt(LatentHousing, id_vars= 'NPA', value_vars= HousingInvestment, value_name= 'HousingInvestment')


### Make sure axis are the same for join

In [27]:
AllviolationsPooled.shape,ShareALLViolationsPooled.shape,HousingViolatinsPooled.shape,ShareHousingViolatinsPooled.shape,PerNewResRenoPooled.shape,PerNewResConPooled.shape,PerNewResDemPooled.shape,MSAgePooled.shape,MHIncomePooled.shape,MGRentPooled.shape,PercentBlackPooled.shape,PercentWhitePooled.shape,PercentAsianPooled.shape,PercentHispanicPooled.shape,PercentOwnerOccupiedPooled.shape,PercentAdultsWithAtLeastBachelorsPooled.shape,ShareHousingInvestmentPooled.shape,PercentMinorityPooled.shape

((5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3),
 (5082, 3))

### Extract year variable from columns

In [28]:
AllviolationsPooled['Year'] = [int(row[-4:]) for row in AllviolationsPooled['variable']]
ShareALLViolationsPooled['Year'] = [int(row[-4:]) for row in ShareALLViolationsPooled['variable']]
ShareHousingViolatinsPooled['Year'] = [int(row[-4:]) for row in ShareHousingViolatinsPooled['variable']]
HousingViolatinsPooled['Year'] = [int(row[-4:]) for row in HousingViolatinsPooled['variable']]

PercentBlackPooled['Year']  = [int(row[-4:]) for row in PercentBlackPooled['variable']]
PercentWhitePooled['Year'] =  [int(row[-4:]) for row in PercentWhitePooled['variable']]
PercentAsianPooled['Year'] =  [int(row[-4:]) for row in PercentAsianPooled['variable']]
PercentHispanicPooled['Year'] =  [int(row[-4:]) for row in PercentHispanicPooled['variable']]

PerNewResRenoPooled['Year'] = [int(row[-4:]) for row in PerNewResRenoPooled['variable']]
PerNewResConPooled['Year'] = [int(row[-4:]) for row in PerNewResConPooled['variable']]
PerNewResDemPooled['Year'] = [int(row[-4:]) for row in PerNewResDemPooled['variable']]


MSAgePooled['Year'] = [int(row[-4:]) for row in MSAgePooled['variable']]
MHIncomePooled['Year'] = [int(row[-4:]) for row in MHIncomePooled['variable']]
MGRentPooled['Year'] = [int(row[-4:]) for row in MGRentPooled['variable']]


PercentOwnerOccupiedPooled['Year'] = [int(row[-4:]) for row in PercentOwnerOccupiedPooled['variable']]
PercentAdultsWithAtLeastBachelorsPooled['Year'] = [int(row[-4:]) for row in PercentAdultsWithAtLeastBachelorsPooled['variable']]


PercentMinorityPooled['Year'] = [int(row[-4:]) for row in PercentMinorityPooled['variable']]
ShareHousingInvestmentPooled['Year'] = [int(row[-4:]) for row in ShareHousingInvestmentPooled['variable']]

HousingInvestmentPooled['Year'] = [int(row[-4:]) for row in HousingInvestmentPooled['variable']]


### Create a list of DataFrames for Join in reduce

In [29]:
PanelDataFrame = [AllviolationsPooled,ShareALLViolationsPooled,HousingViolatinsPooled,MSAgePooled,MHIncomePooled,
                  MGRentPooled,PerNewResRenoPooled,PerNewResDemPooled,PercentBlackPooled,PercentWhitePooled,PercentAsianPooled,
                 PercentHispanicPooled, PerNewResConPooled,ShareHousingViolatinsPooled,PercentOwnerOccupiedPooled,
                 PercentAdultsWithAtLeastBachelorsPooled,PercentMinorityPooled,ShareHousingInvestmentPooled,
                 HousingInvestmentPooled]


### Join all dataframes on NPA and Year

In [30]:
PooledData = reduce(lambda right,left: pd.merge(right, left, on = ['NPA', 'Year'], how = 'left'), PanelDataFrame)
PooledData.columns

Index(['NPA', 'variable_x', 'AllViolations', 'Year', 'variable_y',
       'ShareALLViolations', 'variable_x', 'HousingViolations', 'variable_y',
       'MedianAge', 'variable_x', 'MedianIncome', 'variable_y',
       'MedianGrossRent', 'variable_x', 'PerNewResReno', 'variable_y',
       'PerNewResDem', 'variable_x', 'PercentBlack', 'variable_y',
       'PercentWhite', 'variable_x', 'PercentAsian', 'variable_y',
       'PercentHispanic', 'variable_x', 'PerNewResCon', 'variable_y',
       'ShareHousingViolations', 'variable_x', 'PercentOwnerOccupied',
       'variable_y', 'PercentAdultsWithAtLeastBachelors', 'variable_x',
       'PercentMinority', 'variable_y', 'ShareHousingInvestment', 'variable',
       'HousingInvestment'],
      dtype='object')

#### Delete unneeded variable column

In [31]:
columns = [row for row in PooledData if row.startswith('variable')]
PooledData = PooledData.drop(columns, axis = 1)
PooledData

Unnamed: 0,NPA,AllViolations,Year,ShareALLViolations,HousingViolations,MedianAge,MedianIncome,MedianGrossRent,PerNewResReno,PerNewResDem,...,PercentWhite,PercentAsian,PercentHispanic,PerNewResCon,ShareHousingViolations,PercentOwnerOccupied,PercentAdultsWithAtLeastBachelors,PercentMinority,ShareHousingInvestment,HousingInvestment
0,2,187,2020,25.757576,7,30.0,54557.0,998.0,0.000000,0.000000,...,57.703428,0.495663,8.756712,0.000000,0.964187,34.513274,44.730539,8.756712,0.000000,0
1,3,309,2020,8.612040,4,34.0,98056.0,1458.0,2.257525,0.501672,...,88.362551,4.587840,1.827676,0.334448,0.111483,57.585335,88.409704,1.827676,2.591973,93
2,4,27,2020,6.352941,0,45.1,250001.0,,0.000000,0.000000,...,87.411348,4.166667,8.421986,0.000000,0.000000,96.912114,89.750000,8.421986,0.000000,0
3,5,156,2020,76.470588,6,32.4,27292.0,738.0,1.470588,0.000000,...,5.081301,0.000000,0.000000,1.470588,2.941176,15.740741,4.941860,0.000000,2.941176,6
4,6,448,2020,66.468843,23,38.0,38056.0,1004.0,0.000000,0.000000,...,19.011628,0.058140,6.511628,0.000000,3.412463,31.268012,13.625304,6.511628,0.000000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5077,471,0,2010,0.000000,0,37.4,62770.0,935.0,0.000000,0.000000,...,92.813370,3.231198,0.000000,0.000000,0.000000,46.934461,54.072682,0.000000,0.000000,0
5078,472,0,2010,0.000000,0,45.6,101330.0,1607.0,0.000000,0.000000,...,89.743590,0.827130,9.429280,0.000000,0.000000,90.557940,48.387097,9.429280,0.000000,0
5079,473,0,2010,0.000000,0,32.8,44692.0,627.0,0.000000,0.000000,...,53.319283,0.000000,3.635406,0.000000,0.000000,56.753555,46.423358,3.635406,0.000000,0
5080,474,0,2010,0.000000,0,43.3,73750.0,1135.0,0.000000,0.000000,...,92.319749,1.933124,3.030303,0.000000,0.000000,74.741676,49.409449,3.030303,0.000000,0


### Regressions AAG

In [36]:
PooledData.columns

Index(['NPA', 'AllViolations', 'Year', 'ShareALLViolations',
       'HousingViolations', 'MedianAge', 'MedianIncome', 'MedianGrossRent',
       'PerNewResReno', 'PerNewResDem', 'PercentBlack', 'PercentWhite',
       'PercentAsian', 'PercentHispanic', 'PerNewResCon',
       'ShareHousingViolations', 'PercentOwnerOccupied',
       'PercentAdultsWithAtLeastBachelors', 'PercentMinority',
       'ShareHousingInvestment', 'HousingInvestment'],
      dtype='object')

In [37]:
PooledData.set_index(['NPA', 'Year'], inplace=True)

In [38]:
PooledData = PooledData.sort_values(by=['NPA','Year'])
PooledData.columns

Index(['AllViolations', 'ShareALLViolations', 'HousingViolations', 'MedianAge',
       'MedianIncome', 'MedianGrossRent', 'PerNewResReno', 'PerNewResDem',
       'PercentBlack', 'PercentWhite', 'PercentAsian', 'PercentHispanic',
       'PerNewResCon', 'ShareHousingViolations', 'PercentOwnerOccupied',
       'PercentAdultsWithAtLeastBachelors', 'PercentMinority',
       'ShareHousingInvestment', 'HousingInvestment'],
      dtype='object')

In [39]:
corrmat = PooledData.corr()
f, ax = plt.subplots(figsize=(15, 10))
sns.heatmap(corrmat, vmax=0.9, square=True,fmt= '.1f',annot_kws={'size': 10}, 
            annot= True, cmap="Spectral")
plt.savefig('Correlation.jpg', bbox_inches='tight', dpi=1200)
plt.show();

In [40]:
Initialcol = PooledData.columns


Variables =  ['MedianIncome', 'MedianGrossRent','HousingInvestment','PercentMinority',
              'HousingViolations','PerNewResReno','PerNewResDem',
       'PercentBlack', 'PercentWhite', 'PercentAsian', 'PercentHispanic',
       'PerNewResCon', 'ShareHousingViolations', 'PercentOwnerOccupied',
       'PercentAdultsWithAtLeastBachelors', 'PercentMinority']

for row in Variables:
    VarLag = f'lag_{row}'
    PooledData[VarLag] = PooledData.groupby(level = 'NPA')[row].shift(1)
    
new_cols = set(PooledData.columns) - set(Initialcol)
print(new_cols)

{'lag_PercentWhite', 'lag_PercentMinority', 'lag_MedianGrossRent', 'lag_PerNewResDem', 'lag_PercentOwnerOccupied', 'lag_PercentHispanic', 'lag_PercentAsian', 'lag_PerNewResCon', 'lag_HousingInvestment', 'lag_MedianIncome', 'lag_PerNewResReno', 'lag_PercentBlack', 'lag_HousingViolations', 'lag_PercentAdultsWithAtLeastBachelors', 'lag_ShareHousingViolations'}


In [41]:
PooledData.replace([np.inf, -np.inf], np.nan, inplace=True)

In [42]:
PooledData = PooledData.dropna()

In [43]:
lags = [row for row in PooledData if row.startswith('lag')]
lags

['lag_MedianIncome',
 'lag_MedianGrossRent',
 'lag_HousingInvestment',
 'lag_PercentMinority',
 'lag_HousingViolations',
 'lag_PerNewResReno',
 'lag_PerNewResDem',
 'lag_PercentBlack',
 'lag_PercentWhite',
 'lag_PercentAsian',
 'lag_PercentHispanic',
 'lag_PerNewResCon',
 'lag_ShareHousingViolations',
 'lag_PercentOwnerOccupied',
 'lag_PercentAdultsWithAtLeastBachelors']

In [44]:
lags= ['lag_MedianIncome',
 'lag_PercentMinority',
 'lag_HousingViolations',
 'lag_MedianGrossRent',
 'lag_PerNewResReno',
 'lag_PerNewResDem',
 'lag_PerNewResCon',
 'lag_PercentOwnerOccupied']

In [45]:
PooledData.replace([np.nan, -np.nan], 0, inplace=True)

In [46]:
Exog = sm.add_constant(PooledData[lags])

### Pooled Housing Violations

In [47]:
#pooled regression for share of black applicants (black) w/ percentiles on RHS
pmod_housing = PooledOLS(PooledData.HousingViolations, Exog)
pblk_housing = pmod_housing.fit()
fitv = pblk_housing.predict().fitted_values
resid = pblk_housing.resids
print(pblk_housing)

                          PooledOLS Estimation Summary                          
Dep. Variable:      HousingViolations   R-squared:                        0.4047
Estimator:                  PooledOLS   R-squared (Between):              0.8539
No. Observations:                3878   R-squared (Within):              -0.1263
Date:                Wed, Sep 13 2023   R-squared (Overall):              0.4047
Time:                        11:50:02   Log-likelihood                -1.472e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      328.77
Entities:                         462   P-value                           0.0000
Avg Obs:                       8.3939   Distribution:                  F(8,3869)
Min Obs:                       0.0000                                           
Max Obs:                      10.0000   F-statistic (robust):             328.77
                            

In [48]:
#pooled regression for share of black applicants (black) w/ percentiles on RHS
pmod_housing = RandomEffects(PooledData.HousingViolations, Exog)
pblk_housing = pmod_housing.fit()
fitv = pblk_housing.predict().fitted_values
resid = pblk_housing.resids
print(pblk_housing)


                        RandomEffects Estimation Summary                        
Dep. Variable:      HousingViolations   R-squared:                        0.4047
Estimator:              RandomEffects   R-squared (Between):              0.8539
No. Observations:                3878   R-squared (Within):              -0.1263
Date:                Wed, Sep 13 2023   R-squared (Overall):              0.4047
Time:                        11:50:02   Log-likelihood                -1.472e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      328.77
Entities:                         462   P-value                           0.0000
Avg Obs:                       8.3939   Distribution:                  F(8,3869)
Min Obs:                       0.0000                                           
Max Obs:                      10.0000   F-statistic (robust):             328.77
                            

In [49]:
#pooled regression for share of black applicants (black) w/ percentiles on RHS
pmod_housing = PanelOLS(PooledData.HousingViolations, Exog,entity_effects=True)
pblk_housing = pmod_housing.fit(cov_type='clustered', cluster_entity=True)
fitv = pblk_housing.predict().fitted_values
resid = pblk_housing.resids
print(pblk_housing)

                          PanelOLS Estimation Summary                           
Dep. Variable:      HousingViolations   R-squared:                        0.0309
Estimator:                   PanelOLS   R-squared (Between):              0.3150
No. Observations:                3878   R-squared (Within):               0.0309
Date:                Wed, Sep 13 2023   R-squared (Overall):              0.1857
Time:                        11:50:02   Log-likelihood                -1.415e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      13.662
Entities:                         462   P-value                           0.0000
Avg Obs:                       8.3939   Distribution:                  F(8,3426)
Min Obs:                       0.0000                                           
Max Obs:                      10.0000   F-statistic (robust):             6.2734
                            