# panel_fe_1

Slighly modified panel data estimation example from [Cunningham (2021) chapter 8](https://mixtape.scunning.com/panel-data.html?panelset=python-code) with different estimation routines.

In [1]:
import ssl
import numpy as np 
import pandas as pd 
import statsmodels.api as sm
import rpy2
%load_ext rpy2.ipython

In [2]:
%%R
library(estimatr)
library(plm)

## Prepare data

In [3]:
# Read data
ssl._create_default_https_context = ssl._create_unverified_context
def read_data(file): 
    return pd.read_stata("https://raw.github.com/scunning1975/mixtape/master/" + file)
sasp_safe = read_data("sasp_panel.dta")

In [4]:
sasp = sasp_safe.copy()
sasp = sasp.dropna()
sasp.sort_values('id', inplace=True)

# Create balanced data
times = len(sasp.session.unique())
in_all_times = sasp.groupby('id')['session'].apply(lambda x : len(x)==times).reset_index()
in_all_times.rename(columns={'session':'in_all_times'}, inplace=True)
balanced_sasp = pd.merge(in_all_times, sasp, how='left', on='id')
balanced_sasp = balanced_sasp[balanced_sasp.in_all_times]

# Dummy for column provider_second
provider_second = np.zeros(balanced_sasp.shape[0])
provider_second[balanced_sasp.provider_second == "2. Yes"] = 1
balanced_sasp.provider_second = provider_second

In [5]:
print("Length of frame {}".format(len(balanced_sasp)))
balanced_sasp.head(2)

Length of frame 1028


Unnamed: 0,id,in_all_times,session,age,age_cl,appearance_cl,bmi,schooling,asq_cl,provider_second,...,hispanic,other,white,asq,cohab,married,divorced,separated,nevermarried,widowed
3,6.0,True,3.0,29.0,45.0,4.0,30.893555,16.0,2025.0,0.0,...,0.0,0.0,1.0,841.0,1.0,0.0,0.0,0.0,0.0,0.0
4,6.0,True,1.0,29.0,32.5,6.0,30.893555,16.0,1056.25,0.0,...,0.0,0.0,1.0,841.0,1.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Prepare demeaned columns for demenaned OLS estimator
# Do not demean columns in exceptions
exceptions = ['session', 'id', 'in_all_times']
features = balanced_sasp.columns.to_list()
features = [x for x in features if x not in exceptions]
demean_features = ["demean_{}".format(x) for x in features]
balanced_sasp[demean_features] = balanced_sasp.groupby('id')[features].apply(lambda x : x - np.mean(x))

## Pooled OLS

In [7]:
# Escape white, widowed, nevermarried to avoid summy variable trap
formula = "lnw ~ " + " + ".join(sorted([x for x in features if x not in ["lnw", "white", "widowed", "nevermarried"]]))
print(formula)

pols = sm.OLS.from_formula(formula, data=balanced_sasp).fit()
print(pols.summary())

lnw ~ age + age_cl + appearance_cl + asian + asian_cl + asq + asq_cl + black + black_cl + bmi + cohab + divorced + hispanic + hispanic_cl + hot + llength + married + massage_cl + other + othrace_cl + provider_second + reg + schooling + separated + unsafe
                            OLS Regression Results                            
Dep. Variable:                    lnw   R-squared:                       0.303
Model:                            OLS   Adj. R-squared:                  0.285
Method:                 Least Squares   F-statistic:                     17.39
Date:                Sun, 11 Apr 2021   Prob (F-statistic):           3.97e-62
Time:                        19:03:48   Log-Likelihood:                -570.00
No. Observations:                1028   AIC:                             1192.
Df Residuals:                    1002   BIC:                             1320.
Df Model:                          25                                         
Covariance Type:            nonrob

## LSDV in Python *statsmodels*

This is least-squares dummy variable (LSDV) regression, which yield equivalent results with FE within estimator for those estimates we are interested in. Unlike the within estimator, It can also be used to obtain the estimates for fixed effects. LSDV estimator becomes infeasible if number of units N is very large.

Here provider estimates are non-zero, but warning is raised about singularity.

In [8]:
# Escape white, widowed, nevermarried to avoid summy variable trap
formula = "lnw ~ -1 + C(id) + " + " + ".join(sorted([x for x in features if x not in ["lnw", "white", "widowed", "nevermarried"]]))
print(formula)

fe = sm.OLS.from_formula(formula, data=balanced_sasp).fit(
    cov_type='cluster', 
    cov_kwds={'groups': balanced_sasp['id']}
)
print(fe.summary())

lnw ~ -1 + C(id) + age + age_cl + appearance_cl + asian + asian_cl + asq + asq_cl + black + black_cl + bmi + cohab + divorced + hispanic + hispanic_cl + hot + llength + married + massage_cl + other + othrace_cl + provider_second + reg + schooling + separated + unsafe
                            OLS Regression Results                            
Dep. Variable:                    lnw   R-squared:                       0.832
Model:                            OLS   Adj. R-squared:                  0.773
Method:                 Least Squares   F-statistic:                       nan
Date:                Sun, 11 Apr 2021   Prob (F-statistic):                nan
Time:                        19:03:49   Log-Likelihood:                 162.25
No. Observations:                1028   AIC:                             215.5
Df Residuals:                     758   BIC:                             1548.
Df Model:                         269                                         
Covariance Type:     

  cond2 = cond0 & (x <= _a)


## De-meaned OLS

Here we get zero estimates for provider estimates.

In [9]:
# Escape white, widowed, nevermarried to avoid summy variable trap
formula = "demean_lnw ~ " + " + ".join(
    sorted([x for x in demean_features if x not in
        ["demean_lnw", "demean_white", "demean_widowed", "demean_nevermarried"]])
)
print(print(formula))

dm_ols = sm.OLS.from_formula(formula, data=balanced_sasp).fit(
    cov_type='cluster',
    cov_kwds={'groups': balanced_sasp['id']}
)
print(dm_ols.summary())  

demean_lnw ~ demean_age + demean_age_cl + demean_appearance_cl + demean_asian + demean_asian_cl + demean_asq + demean_asq_cl + demean_black + demean_black_cl + demean_bmi + demean_cohab + demean_divorced + demean_hispanic + demean_hispanic_cl + demean_hot + demean_llength + demean_married + demean_massage_cl + demean_other + demean_othrace_cl + demean_provider_second + demean_reg + demean_schooling + demean_separated + demean_unsafe
None
                            OLS Regression Results                            
Dep. Variable:             demean_lnw   R-squared:                       0.516
Model:                            OLS   Adj. R-squared:                  0.510
Method:                 Least Squares   F-statistic:                     32.39
Date:                Sun, 11 Apr 2021   Prob (F-statistic):           9.91e-47
Time:                        19:03:49   Log-Likelihood:                 162.25
No. Observations:                1028   AIC:                            -296.5
Df Re



## Fixed effects in R *estimatr*

Here provider estimates are NA due to singularity!

In [10]:
formula = "lnw ~ " + " + ".join(sorted([x for x in features if x not in ["lnw", "white", "widowed", "nevermarried"]]))
print(formula)

lnw ~ age + age_cl + appearance_cl + asian + asian_cl + asq + asq_cl + black + black_cl + bmi + cohab + divorced + hispanic + hispanic_cl + hot + llength + married + massage_cl + other + othrace_cl + provider_second + reg + schooling + separated + unsafe


In [11]:
%%R -i balanced_sasp,formula -o out
formula = as.formula(formula)
model_fe = lm_robust(
    formula=formula,
    data=balanced_sasp,
    fixed_effect=~id,
    se_type="stata"
)
out = summary(model_fe)

  res = PandasDataFrame.from_items(items)


In [12]:
print(out)



Call:

lm_robust(formula = formula, data = balanced_sasp, fixed_effects = ~id, 

    se_type = "stata")



Standard error type:  HC1 



Coefficients: (12 not defined because the design matrix is rank deficient)

                  Estimate Std. Error   t value  Pr(>|t|)   CI Lower   CI Upper

age                     NA         NA        NA        NA         NA         NA

age_cl           0.0023054  6.393e-03   0.36059 7.185e-01 -0.0102454  0.0148562

appearance_cl    0.0056350  5.483e-03   1.02774 3.044e-01 -0.0051285  0.0163985

asian                   NA         NA        NA        NA         NA         NA

asian_cl        -0.0098734  3.474e-02  -0.28419 7.763e-01 -0.0780755  0.0583288

asq                     NA         NA        NA        NA         NA         NA

asq_cl          -0.0000147  7.006e-05  -0.20984 8.338e-01 -0.0001522  0.0001228

black                   NA         NA        NA        NA         NA         NA

black_cl         0.0265076  4.224e-02   0.62755 5.305e-0

## Fixed effects in R *plm*

In [13]:
formula = "lnw ~ " + " + ".join(sorted([x for x in features if x not in ["lnw", "white", "widowed", "nevermarried"]]))
print(formula)

lnw ~ age + age_cl + appearance_cl + asian + asian_cl + asq + asq_cl + black + black_cl + bmi + cohab + divorced + hispanic + hispanic_cl + hot + llength + married + massage_cl + other + othrace_cl + provider_second + reg + schooling + separated + unsafe


In [14]:
%%R -i balanced_sasp,formula -o out
formula = as.formula(formula)
model_fe = plm(
    formula=formula,
    data=balanced_sasp,
    index=c("id"),
    model="within"
)
out = summary(model_fe)

In [15]:
print(out)

Oneway (individual) effect Within Model



Call:

plm(formula = formula, data = balanced_sasp, model = "within", 

    index = c("id"))



Balanced Panel: n = 257, T = 4, N = 1028



Residuals:

     Min.   1st Qu.    Median   3rd Qu.      Max. 

-1.052181 -0.112350 -0.010595  0.106355  0.976679 



Coefficients:

                   Estimate  Std. Error  t-value  Pr(>|t|)    

age_cl           2.3054e-03  5.6515e-03   0.4079  0.683443    

appearance_cl    5.6350e-03  5.1154e-03   1.1016  0.270995    

asian_cl        -9.8734e-03  3.8936e-02  -0.2536  0.799890    

asq_cl          -1.4702e-05  5.9090e-05  -0.2488  0.803585    

black_cl         2.6508e-02  4.0876e-02   0.6485  0.516870    

hispanic_cl     -6.2088e-02  4.8895e-02  -1.2698  0.204541    

hot              5.2478e-02  2.4949e-02   2.1034  0.035761 *  

llength         -4.3451e-01  1.5746e-02 -27.5946 < 2.2e-16 ***

massage_cl      -1.0344e-03  2.5069e-02  -0.0413  0.967099    

othrace_cl       1.4217e-01  5.4900e-02   2.