In [6]:
import numpy as np
from scipy import stats
import pandas as pd
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings("ignore", category=DataConversionWarning)


In [2]:
df = pd.read_pickle("data.pkl")

# LINEAR DR

In [21]:
from econml.dr import LinearDRLearner
from sklearn.linear_model import LogisticRegression
from econml.inference import BootstrapInference

ycol = "callback"
controls = ["ba_quality", "language_skills", 
              "exp_highquality", "ma", "certificate",
              'occ_Administrative', 'occ_Biotech and Pharmacy', 'occ_Civil Engineer',
              'occ_Clerical', 'occ_Ecommerce', 'occ_Education',
              'occ_Electrical Engineer', 'occ_Executive Assistant', 'occ_Finance',
              'occ_Food Services Managers', 'occ_Human Resources Payroll',
              'occ_Insurance', 'occ_Maintenance Technician',
              'occ_Marketing and Sales', 'occ_Media and Arts', 'occ_Production',
              'occ_Programmer', 'occ_Retail', 'occ_Social Worker', 'occ_Technology']

treatments = ['female_type1', 'female_type2', 'female_type3', 'female_type4']

# Select bootstrap for CI estimation
inf = BootstrapInference(n_bootstrap_samples=50, bootstrap_type='normal')
ate_dr = {}
ate_ci = {}
for i, treat in enumerate(treatments, 1):

    print(f"Running DR Learner for: {treat}")

    y = df[ycol].values
    t = df[treat].values
    X = df[controls].values

    est_dr = LinearDRLearner(
        discrete_outcome=True,
        model_propensity=LogisticRegression(solver='sag',n_jobs=-1),
        cv=2,
        min_propensity=0.05
    )

    # fit
    est_dr.fit(y, t, X=X, inference=inf)

    # ATE estimate
    ate_dr[treat] = est_dr.ate(X)
    ate_ci[treat] = est_dr.ate_interval(X)
    print(f"ATE: {ate_dr[treat]}")
    print(f"ATE: {ate_ci[treat]}")


Running DR Learner for: female_type1
ATE: [0.00797287]
ATE: (array([-0.07275457]), array([0.08870031]))
Running DR Learner for: female_type2
ATE: [0.00767577]
ATE: (array([-0.09220172]), array([0.10755326]))
Running DR Learner for: female_type3
ATE: [-0.01976987]
ATE: (array([-0.10538024]), array([0.0658405]))
Running DR Learner for: female_type4
ATE: [-0.03541502]
ATE: (array([-0.10972249]), array([0.03889245]))


# NON LINEAR DR

In [22]:
from econml.dr import ForestDRLearner
rf_ate_dr = {}
rf_ate_ci = {}
for i, treat in enumerate(treatments, 1):

    print(f"Running DR Learner for: {treat}")

    y = df[ycol].values
    t = df[treat].values
    X = df[controls].values

    est_dr = ForestDRLearner(
        discrete_outcome=True,
        model_propensity=LogisticRegression(solver='sag',n_jobs=-1),
        cv=2,
        min_propensity=0.05
    )

    # fit
    est_dr.fit(y, t, X=X)

    # ATE estimate
    rf_ate_dr[treat] = est_dr.ate(X)
    print(f"ATE: {rf_ate_dr[treat]}")

Running DR Learner for: female_type1
ATE: [0.00962935]
Running DR Learner for: female_type2
ATE: [0.00615931]
Running DR Learner for: female_type3
ATE: [-0.01854799]
Running DR Learner for: female_type4
ATE: [-0.03701956]


# Table

In [23]:
# ECONML results in arrays instead of values, so need to convert to float
def to_float(x):
    return float(x) if not hasattr(x, '__len__') else float(x[0])

In [26]:
rows = []

# Build table by looping across treatments
for treat in treatments:

    # convert ATEs
    lin_ate = to_float(ate_dr[treat])
    rf_ate  = to_float(rf_ate_dr[treat])

    # CI
    lb, ub = ate_ci[treat]
    lb, ub = to_float(lb), to_float(ub)

    rows.append({
        "Treatment": treat,
        "Linear DR ATE": f"{lin_ate:.4f}",
        "Linear DR 95% CI": f"({lb:.4f}, {ub:.4f})",
        "RF DR ATE": f"{rf_ate:.4f}"
    })

df_dr = pd.DataFrame(rows)
print(df_dr.to_latex(index=False,
                     escape=False,
                     column_format="lccc",
                     caption="ATE Estimates from Linear and RF Doubly Robust Learners",
                     label="tab:dr_results"))

\begin{table}
\caption{ATE Estimates from Linear and RF Doubly Robust Learners}
\label{tab:dr_results}
\begin{tabular}{lccc}
\toprule
Treatment & Linear DR ATE & Linear DR 95% CI & RF DR ATE \\
\midrule
female_type1 & 0.0080 & (-0.0728, 0.0887) & 0.0096 \\
female_type2 & 0.0077 & (-0.0922, 0.1076) & 0.0062 \\
female_type3 & -0.0198 & (-0.1054, 0.0658) & -0.0185 \\
female_type4 & -0.0354 & (-0.1097, 0.0389) & -0.0370 \\
\bottomrule
\end{tabular}
\end{table}

