**Linear Panel Data and Production Technology**

In [70]:
import pandas as pd 
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from linearmodels.panel import PanelOLS, RandomEffects
from scipy.stats import chi2

**1 Cobb-Douglas Production**

In [72]:
dat.sample(5) # Returnerer en tilfældig prøve på 5 rækker fra dat-dataframen.

Unnamed: 0,firmid,year,lcap,lemp,ldsa
1578,132,1974,0.18052,-0.848608,-0.311142
4812,402,1968,1.42346,1.559597,1.385296
762,64,1974,0.30808,-0.363891,-0.757899
3407,284,1979,0.912276,1.400748,0.878726
1079,90,1979,-1.07205,-0.954158,-1.13016


In [73]:
dat.year.unique() # returnerer en numpy-array med alle unikke værdier i kolonnen year fra dataframen dat

array([1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978,
       1979])

In [74]:
print(dat.columns)


Index(['firmid', 'year', 'lcap', 'lemp', 'ldsa'], dtype='object')


In [82]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from linearmodels.panel import PanelOLS, RandomEffects
from scipy.stats import chi2

# Indlæs data fra firms.csv
dat = pd.read_csv('firms.csv')

# Konverter til paneldata format (hvorfor paneldata? fordi vi skal estimere en lineær paneldata-model for Cobb-Douglas produktionsfunktionen.)
dat['year'] = dat.index % 12  
dat = dat.set_index(['year', dat.index])

# Estimer Pooled OLS
pooled_ols = smf.ols('ldsa ~ lcap + lemp', data=dat).fit()

# Estimer Fixed Effects (FE)
fe_model = PanelOLS.from_formula('ldsa ~ lcap + lemp + EntityEffects', data=dat).fit()

# Estimer Random Effects (RE)
re_model = RandomEffects.from_formula('ldsa ~ lcap + lemp', data=dat).fit()

# Hypotesetest: Konstant skalaafkast
beta_k, beta_l = fe_model.params['lcap'], fe_model.params['lemp']
wald_stat = ((beta_k + beta_l - 1)**2) / (fe_model.cov.iloc[0, 0] + fe_model.cov.iloc[1, 1])
p_value = 1 - chi2.cdf(wald_stat, 1)

# Hausman-test mellem FE og RE
diff = fe_model.params - re_model.params
var_diff = fe_model.cov + re_model.cov
hausman_stat = diff.T @ np.linalg.inv(var_diff) @ diff
hausman_p_value = 1 - chi2.cdf(hausman_stat, len(diff))

# Udskriv resultater
print("Pooled OLS:\n", pooled_ols.summary())
print("\nFixed Effects:\n", fe_model.summary)
print("\nRandom Effects:\n", re_model.summary)
print(f"\nWald-test for konstant skalaafkast: Test-statistik = {wald_stat:.3f}, p-værdi = {p_value:.3f}")
print(f"\nHausman-test mellem FE og RE: Test-statistik = {hausman_stat:.3f}, p-værdi = {hausman_p_value:.3f}")


Pooled OLS:
                             OLS Regression Results                            
Dep. Variable:                   ldsa   R-squared:                       0.914
Model:                            OLS   Adj. R-squared:                  0.914
Method:                 Least Squares   F-statistic:                 2.807e+04
Date:                Tue, 18 Feb 2025   Prob (F-statistic):               0.00
Time:                        11:32:46   Log-Likelihood:                -2125.9
No. Observations:                5292   AIC:                             4258.
Df Residuals:                    5289   BIC:                             4277.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   1.536e-08      0.005   3.09