In [6]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression, LogisticRegression

from doubleml.data import DoubleMLPanelData
from doubleml.did import DoubleMLDIDMulti

In [7]:
import pandas as pd
import numpy as np
dta = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/csdid/main/data/sim_data.csv")
dta.head()
dta.loc[dta["G"] == 0, "G"] = np.nan

In [8]:
dml_data = DoubleMLPanelData(dta, y_col="Y", d_cols="G", id_col="id", t_col="period", x_cols=["X"])

In [9]:
control_group = "not_yet_treated"
control_group = "never_treated"

ml_g=LinearRegression()
ml_m=LogisticRegression()

# ml_g = LGBMRegressor()
# ml_m = LGBMClassifier()

dml_obj = DoubleMLDIDMulti(
    obj_dml_data=dml_data,
    ml_g=ml_g,
    ml_m=ml_m,
    gt_combinations="standard",
    control_group=control_group,
)

dml_obj.fit()

dml_obj.summary

Unnamed: 0,coef,std err,t,P>|t|,2.5 %,97.5 %
"ATT(2.0,1,2)",0.920659,0.064105,14.361647,0.0,0.795014,1.046303
"ATT(2.0,1,3)",1.987829,0.06466,30.742884,0.0,1.861098,2.11456
"ATT(2.0,1,4)",2.955122,0.063113,46.822594,0.0,2.831422,3.078821
"ATT(3.0,1,2)",-0.042606,0.066026,-0.645298,0.518734,-0.172015,0.086802
"ATT(3.0,2,3)",1.107568,0.065475,16.915916,0.0,0.979239,1.235896
"ATT(3.0,2,4)",2.057568,0.06547,31.427868,0.0,1.92925,2.185886
"ATT(4.0,1,2)",0.004233,0.068274,0.062002,0.950562,-0.129581,0.138048
"ATT(4.0,2,3)",0.061837,0.066472,0.930267,0.352233,-0.068446,0.192119
"ATT(4.0,3,4)",0.95301,0.067443,14.130675,0.0,0.820825,1.085195


In [10]:
level = 0.95

ci = dml_obj.confint(level=level)
dml_obj.bootstrap(n_rep_boot=5000)
ci_joint = dml_obj.confint(level=level, joint=True)

In [11]:
print(dml_obj)


------------------ Data summary      ------------------
Outcome variable: Y
Treatment variable(s): ['G']
Covariates: ['X']
Instrument variable(s): None
Time variable: period
Id variable: id
No. Observations: 3979

------------------ Score & algorithm ------------------
Score function: observational
GT combinations: ['(2.0,1,2)', '(2.0,1,3)', '(2.0,1,4)', '(3.0,1,2)', '(3.0,2,3)', '(3.0,2,4)', '(4.0,1,2)', '(4.0,2,3)', '(4.0,3,4)']
Control group: never_treated
Anticipation periods: 0
------------------ Machine learner   ------------------
Learner ml_g: LinearRegression()
Learner ml_m: LogisticRegression()
Out-of-sample Performance:
Regression:
Learner ml_g0 RMSE: [[1.42730197 1.41113036 1.39623362 1.4271475  1.40723812 1.41836799
  1.423926   1.40487993 1.42254218]]
Learner ml_g1 RMSE: [[1.40496958 1.43560178 1.39842847 1.4127457  1.42704868 1.38713382
  1.45719691 1.41487036 1.41335501]]
Classification:
Learner ml_m Log Loss: [[0.69060261 0.69041707 0.69043153 0.67919869 0.67914682 0.

In [12]:
aggregated = dml_obj.aggregate("group")
print(aggregated)

 Group Aggregation 

------------------ Overall Aggregated Effects ------------------
    coef  std err         t  P>|t|    2.5 %   97.5 %
1.487968 0.034215 43.488305    0.0 1.420908 1.555029
------------------ Aggregated Effects         ------------------
         coef   std err          t  P>|t|     2.5 %    97.5 %
2.0  1.954536  0.052243  37.412432    0.0  1.852142  2.056931
3.0  1.582568  0.056295  28.111848    0.0  1.472231  1.692905
4.0  0.953010  0.067443  14.130675    0.0  0.820825  1.085195
------------------ Additional Information     ------------------
Control Group: never_treated
Anticipation Periods: 0
Score: observational



In [13]:
aggregated_time = dml_obj.aggregate("time")
print(aggregated_time)

 Time Aggregation 

------------------ Overall Aggregated Effects ------------------
    coef  std err         t  P>|t|    2.5 %   97.5 %
1.480608 0.035103 42.178921    0.0 1.411807 1.549409
------------------ Aggregated Effects         ------------------
       coef   std err          t  P>|t|     2.5 %    97.5 %
2  0.920659  0.064105  14.361647    0.0  0.795014  1.046303
3  1.549048  0.051383  30.147284    0.0  1.448340  1.649757
4  1.972117  0.046579  42.339413    0.0  1.880824  2.063410
------------------ Additional Information     ------------------
Control Group: never_treated
Anticipation Periods: 0
Score: observational



In [14]:
aggregated_eventstudy = dml_obj.aggregate("eventstudy")
print(aggregated_eventstudy)

 Event Study Aggregation 

------------------ Overall Aggregated Effects ------------------
    coef  std err         t  P>|t|    2.5 %   97.5 %
1.990196 0.038709 51.414618    0.0 1.914328 2.066064
------------------ Aggregated Effects         ------------------
          coef   std err          t     P>|t|     2.5 %    97.5 %
-2.0  0.004233  0.068274   0.062002  0.950562 -0.129581  0.138048
-1.0  0.010997  0.040478   0.271675  0.785872 -0.068339  0.090333
0.0   0.992875  0.030721  32.319092  0.000000  0.932663  1.053087
1.0   2.022591  0.045690  44.267485  0.000000  1.933040  2.112143
2.0   2.955122  0.063113  46.822594  0.000000  2.831422  3.078821
------------------ Additional Information     ------------------
Control Group: never_treated
Anticipation Periods: 0
Score: observational

