In [161]:
import pandas as pd
import statsmodels.api as sm

from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM


import numpy as np

warnings.filterwarnings('ignore')


In [163]:
data = pd.read_csv("minwage.txt", sep="\t")
data.head()

Unnamed: 0,chain,own,state,empft,emppt,nmgrs,wagest,empft2,emppt2,nmgrs2,wagest2
0,1,0,0,30.0,15.0,3,.,3.5,35,3,4.3
1,2,0,0,6.5,6.5,4,.,0.0,15,4,4.45
2,2,1,0,3.0,7.0,2,.,3.0,7,4,5.0
3,4,1,0,20.0,20.0,4,5,0.0,36,2,5.25
4,4,1,0,6.0,26.0,5,5.5,28.0,3,6,4.75


In [165]:
data = data.replace('.', np.nan).dropna()
columns_to_convert = ["empft", "nmgrs", "emppt", "empft2", "nmgrs2", "emppt2", 'wagest', 'wagest2']

for column in columns_to_convert:
    data[column] = data[column].astype(float)


data["fte"] = data["empft"]+data["nmgrs"]+0.5 * data["emppt"]
data["fte2"] = data["empft2"]+ data["nmgrs2"]+0.5*data["emppt2"]

### Problem (a)

In [167]:
diff_table = data.groupby("state")[["fte", "fte2"]].mean().reset_index()
diff_table.columns = ["state", "before", "after"]
diff_table = diff_table.sort_values(by="state", ascending=False)
diff_table["state"] = diff_table["state"].replace({1: "NJ", 0: "PA"})
diff_table["diffs"] = diff_table["after"] - diff_table["before"]

diff_row = {
    "state": "diffs",
    "before": diff_table.iloc[0]["before"] - diff_table.iloc[1]["before"],
    "after": diff_table.iloc[0]["after"] - diff_table.iloc[1]["after"],
    "diffs": diff_table.iloc[0]["diffs"] - diff_table.iloc[1]["diffs"]
}

diff_table = pd.concat([diff_table, pd.DataFrame([diff_row])], ignore_index=True)



In [169]:
diff_table

Unnamed: 0,state,before,after,diffs
0,NJ,20.678246,21.076316,0.39807
1,PA,23.704545,21.825758,-1.878788
2,diffs,-3.0263,-0.749442,2.276858


In [171]:
wage_table = data.groupby("state")[["wagest", "wagest2"]].mean().reset_index()

wage_table.columns = ["state", "before", "after"]
wage_table = wage_table.sort_values(by="state", ascending=False)
wage_table["state"] = wage_table["state"].replace({1: "NJ", 0: "PA"})
wage_table["diffs"] = wage_table["after"] - wage_table["before"]
wage_table

Unnamed: 0,state,before,after,diffs
1,NJ,4.612982,5.08214,0.469158
0,PA,4.653636,4.618788,-0.034848


### Problem (b)

In [174]:
data_before = data[['chain', 'own', 'state', 'fte']]
data_before['post'] = 0
data_after = data[['chain', 'own', 'state', 'fte2']].rename(columns={"fte2": "fte"})
data_after['post'] = 1
data_2 = pd.concat([data_before, data_after])

data_2['DiD'] = data_2['post'] * data_2['state']
data_2 = data_2.rename(columns={"state": "treat"})

In [176]:
X = data_2[['treat', 'post', 'DiD']]
X = sm.add_constant(X)  

model_did = sm.OLS(data_2['fte'], X).fit()

print(model_did.summary())

                            OLS Regression Results                            
Dep. Variable:                    fte   R-squared:                       0.009
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     1.995
Date:                Thu, 05 Dec 2024   Prob (F-statistic):              0.113
Time:                        14:08:35   Log-Likelihood:                -2561.8
No. Observations:                 702   AIC:                             5132.
Df Residuals:                     698   BIC:                             5150.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         23.7045      1.148     20.640      0.0

### Problem (c)

In [179]:
data_3 = pd.get_dummies(data_2, columns=['chain'], drop_first=True)

X = data_3[['treat', 'post', 'DiD', 'own','chain_2', 'chain_3', 'chain_4']].astype('float')
X = sm.add_constant(X)  

model_did = sm.OLS(data_3['fte'], X).fit()

print(model_did.summary())

                            OLS Regression Results                            
Dep. Variable:                    fte   R-squared:                       0.218
Model:                            OLS   Adj. R-squared:                  0.210
Method:                 Least Squares   F-statistic:                     27.70
Date:                Thu, 05 Dec 2024   Prob (F-statistic):           1.16e-33
Time:                        14:08:38   Log-Likelihood:                -2478.3
No. Observations:                 702   AIC:                             4973.
Df Residuals:                     694   BIC:                             5009.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         26.3896      1.090     24.220      0.0

### Problem (d)

In [196]:
data['gap'] = ((5.05 - data['wagest']) / data['wagest']) * data['state'] * (data['wagest'] < 5.05)
data['fte_change'] = data['fte2'] - data['fte']

X = data[['gap']]
X = sm.add_constant(X)  
model_3 = sm.OLS(data['fte_change'] , X).fit()

print(model_3.summary())

                            OLS Regression Results                            
Dep. Variable:             fte_change   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     7.839
Date:                Thu, 05 Dec 2024   Prob (F-statistic):            0.00540
Time:                        14:11:14   Log-Likelihood:                -1254.9
No. Observations:                 351   AIC:                             2514.
Df Residuals:                     349   BIC:                             2522.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.4777      0.694     -2.130      0.0

In [194]:
product = data[data["state"] == 1]["gap"].mean() * model_3.params["gap"]
print('The product is: ', product)

The product is:  1.7829285215831572


### Problem (e)

In [217]:
data = pd.get_dummies(data, columns=['chain'], drop_first=True)

X = data[['gap', 'state', 'own', 'chain_2', 'chain_3', 'chain_4']].astype('float')
X = sm.add_constant(X)  

model_4 = sm.OLS(data['fte_change'] , X).fit()
print(model_4.summary())

# latex_code = model_3.summary().as_latex()
# print(latex_code)

                            OLS Regression Results                            
Dep. Variable:             fte_change   R-squared:                       0.030
Model:                            OLS   Adj. R-squared:                  0.013
Method:                 Least Squares   F-statistic:                     1.797
Date:                Thu, 05 Dec 2024   Prob (F-statistic):             0.0988
Time:                        14:22:33   Log-Likelihood:                -1253.4
No. Observations:                 351   AIC:                             2521.
Df Residuals:                     344   BIC:                             2548.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.6635      1.211     -1.374      0.1