**Imports**

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

**Load Final Dataset**

In [None]:
df = pd.read_csv("/content/master_with_shocks.csv")

In [None]:
df[["year", "sector", "prod_shock"]].head()

Unnamed: 0,year,sector,prod_shock
0,2005,Basic metals,0
1,2006,Basic metals,0
2,2007,Basic metals,0
3,2008,Basic metals,1
4,2009,Basic metals,0


**Drop Missing Growth Observations**

In [None]:
reg_df = df.dropna(subset=[
    "sector_output_growth",
    "output_per_worker_growth",
    "output_per_hour_growth"
])

**Baseline Regression**

**Model:**
**Δys,t​=β⋅Shockt​+εs,t​**

In [None]:
model1 = smf.ols(
    "sector_output_growth ~ prod_shock",
    data=reg_df
).fit(cov_type="HC1")

model1.summary()

0,1,2,3
Dep. Variable:,sector_output_growth,R-squared:,0.001
Model:,OLS,Adj. R-squared:,-0.004
Method:,Least Squares,F-statistic:,0.5972
Date:,"Mon, 12 Jan 2026",Prob (F-statistic):,0.441
Time:,13:27:23,Log-Likelihood:,-20.642
No. Observations:,207,AIC:,45.28
Df Residuals:,205,BIC:,51.95
Df Model:,1,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0015,0.021,-0.072,0.943,-0.043,0.040
prod_shock,0.0216,0.028,0.773,0.440,-0.033,0.076

0,1,2,3
Omnibus:,184.233,Durbin-Watson:,1.983
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5563.997
Skew:,-3.144,Prob(JB):,0.0
Kurtosis:,27.608,Cond. No.,3.07


**Sector Fixed Effects**

In [None]:
model2 = smf.ols(
    "sector_output_growth ~ prod_shock + C(sector)",
    data=reg_df
).fit(cov_type="HC1")

model2.summary()

0,1,2,3
Dep. Variable:,sector_output_growth,R-squared:,0.09
Model:,OLS,Adj. R-squared:,-0.042
Method:,Least Squares,F-statistic:,0.7838
Date:,"Mon, 12 Jan 2026",Prob (F-statistic):,0.764
Time:,13:27:26,Log-Likelihood:,-10.986
No. Observations:,207,AIC:,75.97
Df Residuals:,180,BIC:,166.0
Df Model:,26,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0724,0.024,2.991,0.003,0.025,0.120
C(sector)[T.Chemicals and chemical products],-0.1288,0.091,-1.412,0.158,-0.308,0.050
"C(sector)[T.Coke, refined petroleum products and nuclear fuel]",-0.0664,0.045,-1.485,0.137,-0.154,0.021
C(sector)[T.Electrical machinery and apparatus n.e.c.],0.0494,0.149,0.333,0.739,-0.242,0.341
C(sector)[T.Electricity],-0.1834,0.166,-1.106,0.269,-0.508,0.142
"C(sector)[T.Fabricated metal products, except machinery and equipment]",-0.0473,0.063,-0.751,0.453,-0.171,0.076
C(sector)[T.Food products and beverages],-0.0918,0.084,-1.096,0.273,-0.256,0.072
C(sector)[T.Furniture manufacturing n.e.c.],-0.1068,0.062,-1.728,0.084,-0.228,0.014
C(sector)[T.General Index],-0.0413,0.044,-0.946,0.344,-0.127,0.044

0,1,2,3
Omnibus:,168.194,Durbin-Watson:,2.008
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3449.457
Skew:,-2.885,Prob(JB):,0.0
Kurtosis:,22.148,Cond. No.,28.9


**Time Fixed Effects**

In [None]:
model3 = smf.ols(
    "sector_output_growth ~ prod_shock + C(sector) + C(year)",
    data=reg_df
).fit(cov_type="HC1")

model3.summary()



0,1,2,3
Dep. Variable:,sector_output_growth,R-squared:,0.551
Model:,OLS,Adj. R-squared:,0.469
Method:,Least Squares,F-statistic:,6.876
Date:,"Mon, 12 Jan 2026",Prob (F-statistic):,8.42e-18
Time:,13:27:31,Log-Likelihood:,62.211
No. Observations:,207,AIC:,-58.42
Df Residuals:,174,BIC:,51.56
Df Model:,32,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.4622,0.087,-5.329,0.000,-0.632,-0.292
C(sector)[T.Chemicals and chemical products],-0.0620,0.027,-2.277,0.023,-0.115,-0.009
"C(sector)[T.Coke, refined petroleum products and nuclear fuel]",0.0004,0.040,0.011,0.992,-0.078,0.079
C(sector)[T.Electrical machinery and apparatus n.e.c.],0.1163,0.138,0.843,0.399,-0.154,0.387
C(sector)[T.Electricity],-0.1165,0.098,-1.192,0.233,-0.308,0.075
"C(sector)[T.Fabricated metal products, except machinery and equipment]",0.0196,0.029,0.682,0.495,-0.037,0.076
C(sector)[T.Food products and beverages],-0.0250,0.028,-0.896,0.370,-0.080,0.030
C(sector)[T.Furniture manufacturing n.e.c.],-0.0399,0.037,-1.066,0.286,-0.113,0.034
C(sector)[T.General Index],0.0255,0.037,0.682,0.495,-0.048,0.099

0,1,2,3
Omnibus:,159.597,Durbin-Watson:,1.718
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5777.459
Skew:,-2.423,Prob(JB):,0.0
Kurtosis:,28.424,Cond. No.,1.36e+16


**Productivity Transmission Channel**

**Output per worker**

In [None]:
model4 = smf.ols(
    "output_per_worker_growth ~ prod_shock + C(sector) + C(year)",
    data=reg_df
).fit(cov_type="HC1")

model4.summary()



0,1,2,3
Dep. Variable:,output_per_worker_growth,R-squared:,1.0
Model:,OLS,Adj. R-squared:,1.0
Method:,Least Squares,F-statistic:,3.9350000000000004e+29
Date:,"Mon, 12 Jan 2026",Prob (F-statistic):,0.0
Time:,13:27:36,Log-Likelihood:,6983.6
No. Observations:,207,AIC:,-13900.0
Df Residuals:,174,BIC:,-13790.0
Df Model:,32,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.3985,2.7e-16,-1.47e+15,0.000,-0.399,-0.399
C(sector)[T.Chemicals and chemical products],-9.492e-17,3.01e-16,-0.316,0.752,-6.85e-16,4.95e-16
"C(sector)[T.Coke, refined petroleum products and nuclear fuel]",7.725e-19,3.26e-16,0.002,0.998,-6.38e-16,6.4e-16
C(sector)[T.Electrical machinery and apparatus n.e.c.],3.994e-16,4.4e-16,0.907,0.364,-4.63e-16,1.26e-15
C(sector)[T.Electricity],-2.298e-16,2.77e-16,-0.828,0.408,-7.74e-16,3.14e-16
"C(sector)[T.Fabricated metal products, except machinery and equipment]",1.086e-16,3.55e-16,0.306,0.760,-5.88e-16,8.05e-16
C(sector)[T.Food products and beverages],-4.16e-16,2.55e-16,-1.634,0.102,-9.15e-16,8.29e-17
C(sector)[T.Furniture manufacturing n.e.c.],-2.042e-16,2.78e-16,-0.733,0.463,-7.5e-16,3.41e-16
C(sector)[T.General Index],-4.978e-17,3.12e-16,-0.159,0.873,-6.62e-16,5.62e-16

0,1,2,3
Omnibus:,0.051,Durbin-Watson:,0.308
Prob(Omnibus):,0.975,Jarque-Bera (JB):,0.013
Skew:,-0.018,Prob(JB):,0.993
Kurtosis:,2.987,Cond. No.,1.36e+16


**Output per hour**

In [None]:
model5 = smf.ols(
    "output_per_hour_growth ~ prod_shock + C(sector) + C(year)",
    data=reg_df
).fit(cov_type="HC1")

model5.summary()



0,1,2,3
Dep. Variable:,output_per_hour_growth,R-squared:,1.0
Model:,OLS,Adj. R-squared:,1.0
Method:,Least Squares,F-statistic:,6.696e+29
Date:,"Mon, 12 Jan 2026",Prob (F-statistic):,0.0
Time:,13:27:40,Log-Likelihood:,6990.8
No. Observations:,207,AIC:,-13920.0
Df Residuals:,174,BIC:,-13810.0
Df Model:,32,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.3612,2.52e-16,-1.43e+15,0.000,-0.361,-0.361
C(sector)[T.Chemicals and chemical products],-8.267e-17,3.02e-16,-0.274,0.784,-6.75e-16,5.1e-16
"C(sector)[T.Coke, refined petroleum products and nuclear fuel]",1.233e-18,3.18e-16,0.004,0.997,-6.22e-16,6.24e-16
C(sector)[T.Electrical machinery and apparatus n.e.c.],3.297e-16,4.13e-16,0.798,0.425,-4.8e-16,1.14e-15
C(sector)[T.Electricity],-2.132e-16,2.7e-16,-0.789,0.430,-7.43e-16,3.16e-16
"C(sector)[T.Fabricated metal products, except machinery and equipment]",1.107e-16,3.47e-16,0.319,0.750,-5.7e-16,7.91e-16
C(sector)[T.Food products and beverages],-3.874e-16,2.48e-16,-1.561,0.119,-8.74e-16,9.91e-17
C(sector)[T.Furniture manufacturing n.e.c.],-1.866e-16,2.79e-16,-0.668,0.504,-7.34e-16,3.61e-16
C(sector)[T.General Index],-3.114e-17,3.04e-16,-0.102,0.919,-6.28e-16,5.66e-16

0,1,2,3
Omnibus:,0.969,Durbin-Watson:,0.166
Prob(Omnibus):,0.616,Jarque-Bera (JB):,0.683
Skew:,0.117,Prob(JB):,0.711
Kurtosis:,3.156,Cond. No.,1.36e+16


**Lagged Shock Effects (Persistence Test)**

In [None]:
df["prod_shock_lag1"] = df.groupby("sector")["prod_shock"].shift(1)

lag_df = df.dropna(subset=["sector_output_growth", "prod_shock_lag1"])

model6 = smf.ols(
    "sector_output_growth ~ prod_shock + prod_shock_lag1 + C(sector) + C(year)",
    data=lag_df
).fit(cov_type="HC1")

model6.summary()



0,1,2,3
Dep. Variable:,sector_output_growth,R-squared:,0.338
Model:,OLS,Adj. R-squared:,0.202
Method:,Least Squares,F-statistic:,2.59
Date:,"Mon, 12 Jan 2026",Prob (F-statistic):,7.19e-05
Time:,13:27:45,Log-Likelihood:,141.9
No. Observations:,182,AIC:,-219.8
Df Residuals:,150,BIC:,-117.3
Df Model:,31,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1492,0.026,5.817,0.000,0.099,0.199
C(sector)[T.Chemicals and chemical products],-0.0423,0.017,-2.424,0.015,-0.077,-0.008
"C(sector)[T.Coke, refined petroleum products and nuclear fuel]",-0.0319,0.024,-1.306,0.192,-0.080,0.016
C(sector)[T.Electrical machinery and apparatus n.e.c.],0.0890,0.155,0.575,0.565,-0.214,0.392
C(sector)[T.Electricity],-0.0199,0.022,-0.917,0.359,-0.062,0.023
"C(sector)[T.Fabricated metal products, except machinery and equipment]",0.0041,0.024,0.167,0.868,-0.044,0.052
C(sector)[T.Food products and beverages],-0.0177,0.028,-0.636,0.525,-0.072,0.037
C(sector)[T.Furniture manufacturing n.e.c.],-0.0577,0.036,-1.622,0.105,-0.128,0.012
C(sector)[T.General Index],-0.0095,0.010,-0.973,0.331,-0.029,0.010

0,1,2,3
Omnibus:,133.25,Durbin-Watson:,1.626
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2416.901
Skew:,2.412,Prob(JB):,0.0
Kurtosis:,20.188,Cond. No.,8670000000000000.0


**Sector Heterogeneity**

In [None]:
model7 = smf.ols(
    "sector_output_growth ~ prod_shock * C(sector) + C(year)",
    data=reg_df
).fit(cov_type="HC1")

model7.summary()



0,1,2,3
Dep. Variable:,sector_output_growth,R-squared:,0.571
Model:,OLS,Adj. R-squared:,0.407
Method:,Least Squares,F-statistic:,6.183
Date:,"Mon, 12 Jan 2026",Prob (F-statistic):,4.94e-15
Time:,13:28:00,Log-Likelihood:,66.954
No. Observations:,207,AIC:,-17.91
Df Residuals:,149,BIC:,175.4
Df Model:,57,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.4601,0.092,-5.002,0.000,-0.640,-0.280
C(sector)[T.Chemicals and chemical products],-0.0646,0.034,-1.926,0.054,-0.130,0.001
"C(sector)[T.Coke, refined petroleum products and nuclear fuel]",-0.0018,0.049,-0.037,0.970,-0.099,0.095
C(sector)[T.Electrical machinery and apparatus n.e.c.],0.0846,0.166,0.509,0.611,-0.241,0.410
C(sector)[T.Electricity],-0.1349,0.119,-1.132,0.258,-0.368,0.099
"C(sector)[T.Fabricated metal products, except machinery and equipment]",0.0243,0.034,0.709,0.478,-0.043,0.091
C(sector)[T.Food products and beverages],-0.0141,0.030,-0.470,0.638,-0.073,0.045
C(sector)[T.Furniture manufacturing n.e.c.],-0.0537,0.044,-1.229,0.219,-0.139,0.032
C(sector)[T.General Index],0.0278,0.046,0.607,0.544,-0.062,0.118

0,1,2,3
Omnibus:,156.056,Durbin-Watson:,1.788
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5955.663
Skew:,-2.316,Prob(JB):,0.0
Kurtosis:,28.866,Cond. No.,1.51e+16


**Compare Coefficients**

In [None]:
results = pd.DataFrame({
    "Model": [
        "Baseline",
        "Sector FE",
        "Sector + Time FE",
        "OPW Growth",
        "OPH Growth"
    ],
    "Shock Coefficient": [
        model1.params["prod_shock"],
        model2.params["prod_shock"],
        model3.params["prod_shock"],
        model4.params["prod_shock"],
        model5.params["prod_shock"]
    ],
    "p-value": [
        model1.pvalues["prod_shock"],
        model2.pvalues["prod_shock"],
        model3.pvalues["prod_shock"],
        model4.pvalues["prod_shock"],
        model5.pvalues["prod_shock"]
    ]
})

results

Unnamed: 0,Model,Shock Coefficient,p-value
0,Baseline,0.021605,0.4396645
1,Sector FE,0.021199,0.4829316
2,Sector + Time FE,0.245753,2.609264e-08
3,OPW Growth,0.212308,0.0
4,OPH Growth,0.189477,0.0


In [None]:
import os

# Create the directory if it does not exist
os.makedirs("../outputs", exist_ok=True)

results.to_csv("../outputs/regression_summary.csv", index=False)