In [1]:
import pandas as pd
import numpy as np

import statsmodels.formula.api as smf

### Merging stock data

In [None]:
stkdata = pd.read_sas('stkdata.sas7bdat', index='DATE', format='sas7bdat', encoding='utf-8')
stkdata = stkdata[stkdata['TICKER'].isin(['CVX', 'JNJ', 'PFE'])]

mktdata = pd.read_sas('mktdata.sas7bdat', format='sas7bdat', encoding='utf-8')

In [None]:
Regdata = pd.merge(stkdata, mktdata, on='DATE')
Regdata['RETRF'] = Regdata['RET'] - Regdata['RF']

### Collect regression residuals to get squares of residuals

In [None]:
Regout = []

for TIC in ['CVX', 'JNJ', 'PFE']:
    tempdf = Regdata[Regdata['TICKER'] == TIC]
    mdl = smf.ols('RETRF ~ MKTRF', data=tempdf).fit()
    tempdf['resid'] = mdl.resid
    Regout.append(tempdf)
    
Regout = pd.concat(Regout)

In [None]:
Regout

In [None]:
Regout['e2'] = Regout['resid']**2
Regout['MKTRF2'] = Regout['MKTRF']**2

### White Test for Heteroskedasticity

In [None]:
for TIC in ['CVX']:
    tempdf = Regout[Regout['TICKER'] == TIC]
    mdl_White_Test = smf.ols('e2 ~ MKTRF+MKTRF2', data=tempdf).fit()
    print()
    print(TIC)
    print()    
    print(mdl_White_Test.summary())

### White Standard Error (SAS proc surveyreg's equivalent)

In [None]:
for TIC in ['CVX']:
    tempdf = Regdata[Regdata['TICKER'] == TIC]
    mdl_White_SE = smf.ols('RETRF ~ MKTRF + SMB + HML', data=tempdf).fit(cov_type='HC1')
    print()
    print(TIC)
    print()    
    print(mdl_White_SE.summary())
    print()
    hypotheses = '(SMB = 0), (HML=0)'
    f_test = mdl_White_SE.f_test(hypotheses)
    print(f_test)

### Lagrange Multiplier test for Serial Correlation

In [None]:
Regout = Regout.sort_values(by=['TICKER','DATE'])
Regout

In [None]:
for TIC in ['CVX']:
    tempdf = Regout[Regout['TICKER'] == TIC]
    tempdf['le'] = tempdf['resid'].shift(1)
    mdl_LM_Test = smf.ols('resid ~ MKTRF+le', data=tempdf).fit()
    print()
    print(TIC)
    print()    
    print(mdl_LM_Test.summary())

In [None]:
for TIC in ['CVX']:
    tempdf = Regout[Regout['TICKER'] == TIC]
    mdl_HAC_SE = smf.ols('RETRF ~ MKTRF + SMB + HML', data=tempdf).fit(cov_type='hac', cov_kwds={'maxlags':3})
    print()
    print(TIC)
    print()    
    print(mdl_HAC_SE.summary())
    print()
    hypotheses = '(SMB = 0), (HML=0)'
    f_test = mdl_HAC_SE.f_test(hypotheses)
    print(f_test)

### Panel data

In [None]:
nls_panel2 = pd.read_sas('nls_panel2.sas7bdat', format='sas7bdat')#, encoding='utf-8')

In [None]:
nls_panel2

In [None]:
for yr in [87, 88]:
    tempdf = nls_panel2[nls_panel2['year'] == yr]
    mdl_OLS_YR = smf.ols('lwage ~ educ+exper+exper2+black+south+union', data=tempdf).fit()
    print()
    print(yr)
    print()    
    print(mdl_OLS_YR.summary())

### Pooled regression with non-robust OLS standard errors

In [None]:
mdl_poolreg = smf.ols('lwage ~ educ+exper+exper2+black+south+union', data=nls_panel2).fit()
print(mdl_poolreg.summary())

### Pooled regression with Cluster-robust standard errors

In [None]:
mdl_poolreg_hac = smf.ols('lwage ~ educ+exper+exper2+black+south+union', data=nls_panel2).fit(cov_type='cluster', cov_kwds={'groups': nls_panel2['id']})
print(mdl_poolreg_hac.summary())

### Fixed-effects Model

In [None]:
!pip install linearmodels
from linearmodels import PanelOLS
from linearmodels import RandomEffects

!pip uninstall statsmodels
!pip install statsmodels
import statsmodels.api as sm

In [None]:
nls_panel2_new = nls_panel2.set_index(['id','year'])

xvar = sm.add_constant(nls_panel2_new[['educ', 'exper', 'exper2', 'black','south', 'union']])
yvar = nls_panel2_new['lwage']
                       
mdl_fixed_effects = PanelOLS(yvar,xvar,entity_effects = True, drop_absorbed=True).fit()
                       
print(mdl_fixed_effects)