In [126]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from joblib import Parallel, delayed

from src.data_gen import data_gen
from src.CSC_IPCA import CSC_IPCA

# set the global font to be Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 8
colors = sns.color_palette()
colors

In [127]:
# fix the number of treated units and post treatment periods to 5
N_tr, T1 = 5, 5
# fix the total number of covariates to 9, and the number of factors to 2
L, K = 9, 2 

# fix the drift
drift = 2

# issue the model
model = CSC_IPCA()

In [128]:
# define a function to gen simulated true and estimated att
def simulation_fun(T0, N_co, alpha, n_simulations):
    results = {}
    for i in range(n_simulations):
        # generate data
        df = data_gen(T0, T1, N_co, N_tr, L, K, drift)
        # add a constant
        df['const'] = 1
        # compute the true avg ATT
        att = df.query("tr_group==1").groupby('time')['eff'].mean()[-T1:].mean()

        # gen the covariates and observed covariates
        covariates = ['x' + str(i) for i in range(1, L+1)]
        L_obs = int(alpha*len(covariates))
        obs_covariates = list(np.random.choice(covariates, size=L_obs, replace=False)) + ['const']

        # fit the model
        model.fit(df, 'id', 'time', 'y', 'treated', obs_covariates, K)
        # predict
        y_syn = model.predict()

        # compute the estimated avg ATT
        att_est = (df.query("tr_group==1").groupby('time')['y'].mean()[-T1:] - y_syn.mean(axis=0)[-T1:]).mean()

        # compute the bias
        bias = att_est - att
        
        results[i] = att, att_est, bias, 
    results_df = pd.DataFrame(results, index=['att', 'att_est', 'bias']).T
    avg_bias = results_df.bias.mean()
    sd_att_est = results_df.att_est.std()
    rmse = np.sqrt(((results_df.bias)**2).mean())

    return avg_bias, sd_att_est, rmse

In [129]:
# Define a function to wrap the call to simulation_fun for readability and ease of use with joblib
def simulate(alpha, t, n):
    bias, std_att, rmse_att = simulation_fun(T0=t, N_co=n, alpha=alpha, n_simulations=1000)
    return {"alpha": alpha, "T0": t, "N_co": n, "bias": bias, "std_att": std_att, "rmse_att": rmse_att}

In [130]:
alphas = [1/3, 2/3, 1]
T0 = [10, 20, 40]
N_co = [10, 20, 40]

# Use joblib to run simulations in parallel
# n_jobs=-1 uses all available CPU cores
results = Parallel(n_jobs=-1)(delayed(simulate)(alpha, t, n) for alpha in alphas for t in T0 for n in N_co)

# Convert the results to a DataFrame
df = pd.DataFrame(results)

In [141]:
# this the 1000 simulations with the constant
df.pivot_table(index=['T0', 'N_co'], columns='alpha', values=['bias', 'std_att', 'rmse_att']).round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,bias,bias,bias,rmse_att,rmse_att,rmse_att,std_att,std_att,std_att
Unnamed: 0_level_1,alpha,0.333333,0.666667,1.000000,0.333333,0.666667,1.000000,0.333333,0.666667,1.000000
T0,N_co,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
10,10,2.382,0.747,0.189,4.619,3.011,1.712,3.975,2.943,1.732
10,20,1.452,0.42,0.063,3.538,2.18,0.984,3.273,2.186,1.076
10,40,0.92,0.222,0.008,2.747,1.745,0.789,2.65,1.786,0.917
20,10,2.534,1.121,0.237,4.441,3.015,1.192,3.688,2.829,1.271
20,20,1.52,0.421,0.048,3.276,1.84,0.872,2.946,1.849,0.977
20,40,1.008,0.258,0.036,2.632,1.451,0.539,2.498,1.505,0.705
40,10,2.746,1.148,0.227,4.982,2.863,1.167,4.166,2.665,1.201
40,20,1.733,0.54,0.089,3.964,1.783,0.732,3.607,1.757,0.874
40,40,0.807,0.281,0.044,2.53,1.632,0.531,2.457,1.654,0.677


In [139]:
# output the results to latex
df.pivot_table(index=['T0', 'N_co'], columns='alpha', values=['bias', 'std_att', 'rmse_att']).round(3).to_latex('figs/sim_results.tex', float_format="%.3f")