In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data_gen import data_gen
from src.CSC_IPCA import CSC_IPCA

# set the global font to be Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 8
colors = sns.color_palette()
colors

### 0. Estimation preparing

In [49]:
# fix the number of treated units and post treatment periods to 5
N_tr, T1 = 5, 5
# fix the total number of covariates to 10, and the number of factors to 2
L, K = 10, 2 

# fix the drift
drift = 2

# issue the model
model = CSC_IPCA()

In [62]:
# define a function to get true and estimated att
def repeat_fun(T0, N_co, alpha):
    # generate data
    df = data_gen(T0, T1, N_co, N_tr, L, K, drift)
    # add a constant
    df['const'] = 1
    # compute the true avg ATT
    att = df.query("tr_group==1").groupby('time')['eff'].mean()[-T1:].mean()

    # gen the covariates and observed covariates
    covariates = ['x' + str(i) for i in range(1, L+1)]
    L_obs = int(alpha*len(covariates))
    obs_covariates = list(np.random.choice(covariates, size=L_obs, replace=False)) + ['const']

    # fit the model
    model.fit(df, 'id', 'time', 'y', 'treated', obs_covariates, K)
    # predict
    y_syn = model.predict()

    # compute the estimated avg ATT
    att_est = (df.query("tr_group==1").groupby('time')['y'].mean()[-T1:] - y_syn.mean(axis=0)[-T1:]).mean()

    return att, att_est

### 1. First senario $\alpha=1/3$: We observe only 1/3 of the covariates
#### 1.1. $N_{co}=10, T_0=20$

In [66]:
results11 = {}
for i in range(100):
    results11[i] = repeat_fun(T0=20, N_co=20, alpha=1/3)

In [68]:
pd.DataFrame(results11, index=['att', 'att_est']).T.head(50)

Unnamed: 0,att,att_est
0,3.371952,7.73269
1,2.923049,5.207689
2,2.985819,4.005886
3,2.928923,3.314601
4,2.709587,4.055337
5,2.747286,12.659323
6,2.941054,2.191982
7,2.230193,2.484956
8,3.230158,1.363828
9,3.543913,6.634116


#### 1.2 $N_{co}=20, T_0=20$

In [None]:
results12 = {}
for i in range(1000):
    results12[i] = repeat_fun(T0=20, N_co=5, alpha=1/3)

#### 1.3 $N_{co}=40, T_0=20$

In [None]:
results13 = {}
for i in range(1000):
    results13[i] = repeat_fun(T0=20, N_co=5, alpha=1/3)