In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression
NUM_DATA_POINTS = 100000

# Introduction

![figures/identification_estimation_flowchart.png](figures/identification_estimation_flowchart.png)

Causal estimand: Average treatment effect (ATE): E[Y(1) - Y(0)] = E[Y|do(T=1)] - E[Y|do(T=0)]

# Causal Model

## Graphical Model (DAG)

![figures/dag_1.png](figures/dag_1.png)

## Structural Causal Model (SCM)

## Data generating process

In [2]:
def f_x():
    u_x = np.random.choice([0, 1])
    return u_x

def f_t(x):
    u_t = np.random.normal()
    intermediate = 0.5 * x + u_t
    return 1 if intermediate > 0 else 0

def f_y(x, t):
    u_y = np.random.normal()
    return 0.8 * x + 1.2 * t + u_y

X = np.array([f_x() for _ in range(NUM_DATA_POINTS)])
T = np.array([f_t(x) for x in X])
Y = np.array([f_y(x, t) for x, t in zip(X, T)])

# Identification

Backdoor criterion

Backdoor adjustment

# Estimation

Linear model

Propensity score

Matching?

In [3]:
XT = np.array([X, T]).T
model = LinearRegression()
model.fit(XT, Y)
XT_1 = np.array([X, np.ones(len(X))]).T
XT_0 = np.array([X, np.zeros(len(X))]).T
ate_estimate = np.mean(model.predict(XT_1) - model.predict(XT_0))
print("ATE estimate:", ate_estimate)

ATE estimate: 1.2060046013593746


In [4]:
print("ATE estimate:", model.coef_[1])

ATE estimate: 1.2060046013593742


# Colliders?

# Unobserved confounding

![figures/dag_2.png](figures/dag_2.png)

## Estimation

## Sensitivity analysis

# Conclusion

TODO

Just a small part of causal inference