# RCT Benchmark: t-test vs Linear Regression vs DML ATE

In [1]:
import numpy as np
import pandas as pd

from causalkit.data.generators import generate_rct
from causalkit.data.causaldata import CausalData
from causalkit.inference.atte.ttest import ttest
from causalkit.inference.ate.dml_ate_source import dml_ate_source

np.random.seed(42)


## Generate RCT data
We'll generate a balanced (50/50) RCT with a continuous outcome where the treated group's mean exceeds control by 0.5 units.


In [2]:
# Generate clean RCT without legacy ancillary columns
n = 10000
theta = 0.5

df = generate_rct(
    n=n,
    split=0.5,
    random_state=42,
    target_type="normal",
    target_params={"mean": {"A": 0.0, "B": theta}, "std": 1.0},
    k=5,                  # 5 pre-treatment covariates X independent of T
    add_ancillary=False   # <- no legacy/post-treatment columns
)

# Use only baseline X columns as confounders
confounders = [c for c in df.columns if c.startswith("x")]

# Wrap in CausalData with new column names
causal_data = CausalData(
    df=df,
    treatment='t',
    outcome='y',
    confounders=confounders
)

In [3]:
causal_data.df.head(100)

Unnamed: 0,y,t,x1,x2,x3,x4,x5
0,1.242044,1.0,0.304717,-1.039984,0.750451,0.940565,-1.951035
1,0.983041,0.0,-1.302180,0.127840,-0.316243,-0.016801,-0.853044
2,1.886970,1.0,0.879398,0.777792,0.066031,1.127241,0.467509
3,1.473005,1.0,-0.859292,0.368751,-0.958883,0.878450,-0.049926
4,-0.383807,1.0,-0.184862,-0.680930,1.222541,-0.154529,-0.428328
...,...,...,...,...,...,...,...
95,0.436576,1.0,-0.339258,1.063852,-1.141938,0.006339,2.597674
96,-0.826800,0.0,0.223080,1.433215,0.091520,0.580777,-0.056783
97,1.375048,1.0,-0.170408,-0.779482,0.430301,-0.851537,0.665585
98,1.878698,0.0,1.085287,0.366531,-0.286249,0.453966,-0.308673


## Wrap in CausalData
We provide a few covariates as confounders for DML (although the data is truly randomized).


In [4]:
from causalkit.eda import CausalEDA
eda = CausalEDA(causal_data)

In [5]:
# 1) Outcome statistics by treatment
eda.outcome_stats()

Unnamed: 0_level_0,count,mean,std,min,p10,p25,median,p75,p90,max
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,4984,0.001936,0.994248,-3.888142,-1.27391,-0.670266,-0.017448,0.665774,1.269994,3.890842
1.0,5016,0.467173,1.010009,-2.941354,-0.82884,-0.193604,0.464554,1.146434,1.750687,4.528347


In [6]:
# Shows means of confounders for control/treated groups, absolute differences, and SMD values
confounders_balance_df = eda.confounders_means()
display(confounders_balance_df)

Unnamed: 0_level_0,mean_t_0,mean_t_1,abs_diff,smd
confounders,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
x1,0.000371,-0.031262,0.031633,-0.031594
x3,0.023309,-0.000897,0.024206,-0.024414
x2,0.010625,-0.007365,0.01799,-0.017972
x5,0.005163,-0.00405,0.009214,-0.009177
x4,-0.001071,0.000134,0.001205,0.001187


## 1) t-test (difference in means)


In [7]:
tt_res = ttest(causal_data, confidence_level=0.95)
tt_res


{'p_value': 3.9956686710790904e-116,
 'absolute_difference': 0.4652369511509771,
 'absolute_ci': (0.4259470794208948, 0.5045268228810593),
 'relative_difference': 24026.959435932917,
 'relative_ci': (21997.851146132984, 26056.06772573285)}

## 2) Linear regression
The coefficient on treatment equals the difference in group means in an RCT.


In [8]:
# Python
import numpy as np
import statsmodels.api as sm

# Outcome
y = causal_data.target.to_numpy()

# Base X (no centering)
X_base = causal_data.df[confounders].to_numpy()
xbar = X_base.mean(axis=0)  # means of confounders, shape (p,)

# Treatment and interactions
T = causal_data.treatment.to_numpy().reshape(-1, 1)
TX = X_base * T

# Design matrix: intercept + T + X + T*X
X_design = np.column_stack([np.ones(len(T)), T, X_base, TX])

# Fit OLS with robust SE
res = sm.OLS(y, X_design).fit(cov_type="HC3")

# Dimensions and index bookkeeping
p = X_base.shape[1]
idx_const = 0
idx_T = 1
idx_X_start = 2
idx_X_end = idx_X_start + p       # exclusive
idx_TX_start = idx_X_end
idx_TX_end = idx_TX_start + p     # exclusive

# Parameter vector is [const, beta_T, beta_X (p), gamma_TX (p)]
beta = res.params
V = res.cov_params()

# Average treatment effect under the linear-interaction model:
# theta = beta_T + xbar' * gamma
theta_hat = float(beta[idx_T] + (xbar @ beta[idx_TX_start:idx_TX_end]))

# Delta-method variance: Var(a' beta) = a' V a
a = np.zeros_like(beta)
a[idx_T] = 1.0
a[idx_TX_start:idx_TX_end] = xbar
var_theta = float(a @ V @ a)
se_theta = float(np.sqrt(max(var_theta, 0.0)))

# 95% CI (normal approx)
from scipy.stats import norm
z = norm.ppf(0.975)
ci_low = theta_hat - z * se_theta
ci_high = theta_hat + z * se_theta

# Two-sided p-value for H0: theta = 0
zstat = theta_hat / se_theta if se_theta > 0 else np.inf
pval = 2 * (1 - norm.cdf(abs(zstat)))

theta_hat, (ci_low, ci_high), se_theta, pval

(0.4657689577517796,
 (np.float64(0.4264644134368944), np.float64(0.5050735020666648)),
 0.020053707427745846,
 np.float64(0.0))

## 3) Double Machine Learning (ATE)
We estimate ATE using DoubleML with default learners.


In [9]:
dml_res = dml_ate_source(causal_data, n_folds=3, confidence_level=0.95)
dml_res


{'coefficient': 0.4623862491849162,
 'std_error': 0.020992597581481868,
 'p_value': 1.6172661119929188e-107,
 'confidence_interval': (0.42124151398326914, 0.5035309843865633),
 'model': <doubleml.irm.irm.DoubleMLIRM at 0x13ec930e0>}

## Compare estimates


In [10]:
tt_ci_low, tt_ci_high = tt_res['absolute_ci']              # from the t-test
lin_ci_low, lin_ci_high = ci_low, ci_high                  # from your delta-method calc
dml_ci_low, dml_ci_high = dml_res['confidence_interval']   # from DoubleML

comparison = pd.DataFrame({
    'method': ['t-test', 'linear_regression', 'dml_ate'],
    'estimate': [
        tt_res['absolute_difference'],
        theta_hat,
        dml_res['coefficient']
    ],
    'ci_lower': [
        tt_ci_low,
        lin_ci_low,
        dml_ci_low
    ],
    'ci_upper': [
        tt_ci_high,
        lin_ci_high,
        dml_ci_high
    ]
})

comparison


Unnamed: 0,method,estimate,ci_lower,ci_upper
0,t-test,0.465237,0.425947,0.504527
1,linear_regression,0.465769,0.426464,0.505074
2,dml_ate,0.462386,0.421242,0.503531


Ground truth theta = 0.5