In [1]:
import seaborn as sns
import pandas as pd
import numpy as np

In [2]:
# Load Titanic dataset
data = sns.load_dataset('titanic')
data = data.dropna(subset=['survived', 'sex'])

In [3]:
# Keep only necessary columns
df = data[['survived', 'sex']].copy()
df.head()

Unnamed: 0,survived,sex
0,0,male
1,1,female
2,1,female
3,1,female
4,0,male


In [4]:
# Encode treatment: female = 1, male = 0
df['true_treatment'] = df['sex'].map({'female': 1, 'male': 0})
df.head()

Unnamed: 0,survived,sex,true_treatment
0,0,male,0
1,1,female,1
2,1,female,1
3,1,female,1
4,0,male,0


In [5]:
# Compute true probabilities of survival for each group
p_female = df[df['true_treatment'] == 1]['survived'].mean()
p_male = df[df['true_treatment'] == 0]['survived'].mean()

p_female, p_male

(np.float64(0.7420382165605095), np.float64(0.18890814558058924))

In [6]:
# Simulate RCT with 1000 people
np.random.seed(42)
N = 1000
simulated = pd.DataFrame()
simulated['treatment'] = np.random.binomial(1, 0.5, size=N)

simulated

Unnamed: 0,treatment
0,0
1,1
2,1
3,1
4,0
...,...
995,0
996,1
997,0
998,1


In [7]:
# Assign outcomes probabilistically using true survival rates
simulated['outcome'] = simulated['treatment'].apply(
    lambda t: np.random.binomial(1, p_female if t == 1 else p_male)
)

simulated

Unnamed: 0,treatment,outcome
0,0,0
1,1,1
2,1,0
3,1,1
4,0,0
...,...,...
995,0,0
996,1,0
997,0,0
998,1,1


In [8]:
# Estimate ATE in this RCT
ate = simulated[simulated['treatment'] == 1]['outcome'].mean() - \
      simulated[simulated['treatment'] == 0]['outcome'].mean()

print(f"Simulated RCT ATE (female vs. male): {ate:.3f}")

Simulated RCT ATE (female vs. male): 0.544


In [9]:
p_female - p_male

np.float64(0.5531300709799203)