In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from causalkit.data import CausalDatasetGenerator, CausalData

# Reproducibility
np.random.seed(42)

confounder_specs = [
    {"name": "tenure_months", "dist": "normal", "mu": 24, "sd": 12},
    {"name": "avg_sessions_week", "dist": "normal", "mu": 5, "sd": 2},
    {"name": "spend_last_month", "dist": "uniform", "a": 0, "b": 200},
    {"name": "premium_user", "dist": "bernoulli", "p": 0.25},
    {"name": "urban_resident", "dist": "bernoulli", "p": 0.60},
]

# Causal effect and noise
theta = 1.8  # ATE: +1.8 ARPU units if new_feature = 1
sigma_y = 3.5  # ARPU noise std
target_t_rate = 0.35  # ~35% treated

# Effects of confounders on ARPU (baseline, additive)
# Order: tenure_months, avg_sessions_week, spend_last_month, premium_user, urban_resident
beta_y = np.array([
    0.05,  # tenure_months: small positive effect
    0.40,  # avg_sessions_week: strong positive effect
    0.02,  # spend_last_month: recent spend correlates with ARPU
    2.00,  # premium_user: premium users have higher ARPU
    1.00,  # urban_resident: urban users slightly higher ARPU
], dtype=float)

# Effects of confounders on treatment assignment (log-odds scale)
beta_t = np.array([
    0.015,  # tenure_months
    0.10,  # avg_sessions_week
    0.002,  # spend_last_month
    0.75,  # premium_user
    0.30,  # urban_resident: more likely to get the feature
], dtype=float)

gen = CausalDatasetGenerator(
    theta=theta,
    outcome_type="continuous",
    sigma_y=sigma_y,
    target_t_rate=target_t_rate,
    seed=42,
    confounder_specs=confounder_specs,
    beta_y=beta_y,
    beta_t=beta_t,
)


# Create dataset
causal_data = gen.to_causal_data(
    n=5000,
    cofounders = [
    "tenure_months",
    "avg_sessions_week",
    "spend_last_month",
    "premium_user",
    "urban_resident",
]
)

print(f"Generated {len(causal_data.df)} observations")
print(f"True causal effect: {gen.theta}")
print(f"Treatment rate: {causal_data.df['t'].mean():.1%}")
print(f"Average outcome: {causal_data.df['y'].mean():.2f}")

# Show first few rows
causal_data.df.head()


TypeError: CausalDatasetGenerator.to_causal_data() got an unexpected keyword argument 'cofounders'. Did you mean 'confounders'?

In [2]:
from causalkit.eda import CausalEDA
eda = CausalEDA(causal_data)

In [5]:
# Example of using the new summary() method for beautiful text output
print("=== DESIGN REPORT SUMMARY ===")
print(report.summary())

=== DESIGN REPORT SUMMARY ===
CAUSAL DESIGN REPORT SUMMARY

📊 TREATMENT & OUTCOME SUMMARY
------------------------------
Treatment Rate: 34.1%
Naive Difference (Treated - Control): 2.4398

Outcome by Treatment:
     count    mean     std
t                         
0.0   3294  6.0493  3.9618
1.0   1706  8.4891  3.9371

🎯 TREATMENT PREDICTABILITY
------------------------------
Treatment AUC: 0.6230
  ✅ Low predictability - minimal confounding risk

🔄 POSITIVITY/OVERLAP ASSESSMENT
------------------------------
Propensity Score Bounds: (0.05, 0.95)
Share Below Lower Bound: 0.0%
Share Above Upper Bound: 0.0%
Total in Extreme Regions: 0.0%
  ✅ Good overlap - positivity assumption satisfied

⚖️  COVARIATE BALANCE ASSESSMENT
------------------------------
Total Variables: 5
Imbalanced Before Weighting: 4
Imbalanced After Weighting: 0
  ✅ Perfect balance achieved after weighting

Worst Imbalanced Variables (Before Weighting):
  premium_user: 0.329 → -0.001
  avg_sessions_week: 0.199 → 0.006
  