In [1]:
import pandas as pd
import statsmodels.formula.api as smf

# 1. Load the provided dataset
df = pd.read_csv("C:\\Users\\KShah1\\Downloads\\Incrementality Project\\Geo-Lift_Simulated_Dataset.csv")

# 2. Pre/Post revenue summary
summary = (
    df
    .groupby(['treated', 'period'])['revenue']
    .mean()
    .reset_index()
    .pivot(index='treated', columns='period', values='revenue')
    .rename(index={0: 'Control', 1: 'Treated'})
)
summary['Delta'] = summary['Post'] - summary['Pre']
print("Pre/Post Revenue Summary:\n", summary, "\n")

Pre/Post Revenue Summary:
 period      Post       Pre     Delta
treated                             
Control  102.896  99.37975   3.51625
Treated  118.975  96.09250  22.88250 



In [4]:
# 3. Difference-in-Differences regression
df['post'] = (df['period'] == 'Post').astype(int)
df['treat_post'] = df['treated'] * df['post']

print(df.head())

model = smf.ols('revenue ~ treated + post + treat_post', data=df)
result = model.fit(cov_type='cluster', cov_kwds={'groups': df['state']})

# 4. Extract and display results
did_coef = result.params['treat_post']
ci_lower, ci_upper = result.conf_int().loc['treat_post']


control_pre_mean = summary.loc['Control', 'Pre']
lift_pct = (did_coef / control_pre_mean) * 100

print(did_coef)
print(control_pre_mean)

print(result.summary(), "\n")
print(f"DiD Coefficient (Incremental Lift): {did_coef:.2f}")
print(f"95% CI for Lift: [{ci_lower:.2f}, {ci_upper:.2f}]")
print(f"Incremental Lift Percentage: {lift_pct:.2f}%")

  state  week period  treated  revenue  post  treat_post
0    CA     1    Pre        1   108.69     0           0
1    CA     2    Pre        1   113.55     0           0
2    CA     3    Pre        1   105.25     0           0
3    CA     4    Pre        1   105.74     0           0
4    CA     5    Pre        1   115.29     0           0
19.366249999999976
99.37975
                            OLS Regression Results                            
Dep. Variable:                revenue   R-squared:                       0.446
Model:                            OLS   Adj. R-squared:                  0.432
Method:                 Least Squares   F-statistic:                     96.83
Date:                Tue, 05 Aug 2025   Prob (F-statistic):           3.61e-07
Time:                        16:37:45   Log-Likelihood:                -430.97
No. Observations:                 120   AIC:                             869.9
Df Residuals:                     116   BIC:                             881.