In [None]:
import pandas as pd
import statsmodels.api as sm

# Load the data
df = pd.read_stata('assignment8.dta')

# Create the full meal price variable
# Full meal = entree + medium soda + small fries
df['pricemeal'] = df['priceentree'] + df['pricesoda'] + df['pricefry']

# Create a dataset with pre and post observations for each store
# Separate pre (time=0) and post (time=1) periods
pre = df[df['time'] == 0].copy()
post = df[df['time'] == 1].copy()

# Merge pre and post data by store
merged = pre.merge(post, on='store', suffixes=('_pre', '_post'))

# Calculate the change in meal price (ΔP)
merged['delta_price'] = merged['pricemeal_post'] - merged['pricemeal_pre']

# Create control variables (Xi) - using pre-period values
# Based on the paper, typical controls include:
X_vars = []

# Check which control variables are available and have variation
# Common controls: chain dummies, co-owned, region dummies
control_candidates = ['co_owned_pre']

# Add chain dummies (excluding one as reference category)
# Chain: 1=BK, 2=KFC, 3=Roys, 4=Wendys
for i in range(2, 5):  # Create dummies for chains 2, 3, 4 (chain 1 is reference)
    merged[f'chain_{i}'] = (merged['chain_pre'] == i).astype(int)
    control_candidates.append(f'chain_{i}')

# NJ treatment indicator (using pre-period state variable)
merged['NJ'] = merged['state_pre']

# Prepare the regression
# Drop observations with missing values in key variables
regression_data = merged[['delta_price', 'NJ'] + control_candidates].dropna()

# Set up the regression: ΔPi = α0 + βXi + γNJi + εi
y = regression_data['delta_price']
X = regression_data[['NJ'] + control_candidates]
X = sm.add_constant(X)

# Run OLS regression
model = sm.OLS(y, X)
results = model.fit()

# Display results
print("=" * 80)
print("Effect of Minimum Wage Increase on Full Meal Price")
print("=" * 80)
print(results.summary())

print("\n" + "=" * 80)
print("Key Results:")
print("=" * 80)
print(f"Coefficient on NJ (treatment effect): {results.params['NJ']:.4f}")
print(f"Standard error: {results.bse['NJ']:.4f}")
print(f"t-statistic: {results.tvalues['NJ']:.4f}")
print(f"p-value: {results.pvalues['NJ']:.4f}")
print(f"Number of observations: {int(results.nobs)}")

# Calculate summary statistics
print("\n" + "=" * 80)
print("Summary Statistics:")
print("=" * 80)
print(f"Mean change in meal price (NJ): ${regression_data[regression_data['NJ']==1]['delta_price'].mean():.4f}")
print(f"Mean change in meal price (PA): ${regression_data[regression_data['NJ']==0]['delta_price'].mean():.4f}")
print(f"Difference: ${regression_data[regression_data['NJ']==1]['delta_price'].mean() - regression_data[regression_data['NJ']==0]['delta_price'].mean():.4f}")

# Run a simpler specification without controls
print("\n" + "=" * 80)
print("Alternative Specification (without controls):")
print("=" * 80)
y_simple = regression_data['delta_price']
X_simple = sm.add_constant(regression_data['NJ'])
model_simple = sm.OLS(y_simple, X_simple)
results_simple = model_simple.fit()
print(results_simple.summary())

Effect of Minimum Wage Increase on Full Meal Price
                            OLS Regression Results                            
Dep. Variable:            delta_price   R-squared:                       0.058
Model:                            OLS   Adj. R-squared:                  0.045
Method:                 Least Squares   F-statistic:                     4.314
Date:                Tue, 02 Dec 2025   Prob (F-statistic):           0.000803
Time:                        00:35:18   Log-Likelihood:                -136.90
No. Observations:                 356   AIC:                             285.8
Df Residuals:                     350   BIC:                             309.1
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------