In [1]:
import numpy as np
import pandas as pd
from scipy import stats

# Para reproducibilidad
np.random.seed(5)

n = 100

# Resultado potencial si NO recibe tratamiento: y0 ~ N(100, 30^2)
y0 = np.random.normal(loc=100, scale=30, size=n)

# Efecto de tratamiento constante
te = 20

# Resultado potencial si recibe tratamiento: y1 = y0 + te + N(0, 10^2)
y1 = y0 + te + np.random.normal(loc=0, scale=10, size=n)

# Tratamiento otorgado aleatoriamente
# drawnorm random -> normal estándar
random_normal = np.random.normal(loc=0, scale=1, size=n)
D = (random_normal > 0).astype(int)  # 1 si tratado, 0 si control

# Variable observada
y = D * y1 + (1 - D) * y0

# Armamos un DataFrame como si fuera la base en Stata
df = pd.DataFrame({
    'y0': y0,
    'y1': y1,
    'D': D,
    'y': y
})

In [3]:
import pandas as pd
import pyreadstat as st
pd.set_option('display.max_columns', None)
path = r"C:\Users\HP\OneDrive\Escritorio\David Guzzi\DiTella\MEC\Materias\2025\2025 2T\[MT08-MT13] Microeconometría II\Clases prácticas\PS\PS5-20251103\ps5_ex2.dta"

df, meta = st.read_dta(path)
df.head(1)

Unnamed: 0,y0,te,y1,random,D,y,U
0,89.203667,20.0,102.014397,0.775717,1.0,102.014397,0.0


In [4]:
y_treated = df.loc[df['D'] == 1, 'y']
y_control = df.loc[df['D'] == 0, 'y']

ate_sample = y_treated.mean() - y_control.mean()
ate_sample

np.float64(19.67476337727264)

In [5]:
t_stat, p_value = stats.ttest_ind(y_treated, y_control, equal_var=True)
t_stat, p_value

(np.float64(3.226834906864113), np.float64(0.001702316698383185))

In [6]:
print("Two-sample t test with equal variances")
print(f"Mean treated (D=1): {y_treated.mean():.3f}")
print(f"Mean control (D=0): {y_control.mean():.3f}")
print(f"ATE sample (treated - control): {ate_sample:.3f}")
print(f"t statistic: {t_stat:.3f}")
print(f"p-value: {p_value:.3f}")


Two-sample t test with equal variances
Mean treated (D=1): 116.619
Mean control (D=0): 96.944
ATE sample (treated - control): 19.675
t statistic: 3.227
p-value: 0.002


In [7]:
ATE_true = df['te'].mean()
ATE_true

np.float64(20.0)

In [8]:
ATT_true = df.loc[df['D'] == 1, 'te'].mean()
ATT_true


np.float64(20.0)

In [9]:
ATU_true = df.loc[df['D'] == 0, 'te'].mean()
ATU_true


np.float64(20.0)