In [18]:
# import pandas as pd
# import numpy as np
# import statsmodels.api as sm

# # ------------------------------------------------------------
# # Load and combine the data
# # The data has two sources e=1 (observational) and e=2 (experimental)
# q3 = pd.read_csv("data_for_HW4.csv")

# # We will construct a single model that uses both datasets.
# # Let's use a flexible parametric specification that allows for 
# # treatment effect heterogeneity via interactions between T and X.
# #
# # Model:
# #   Y = α + Xβ + T(γ0 + Xγ) + ε
# #
# # This model can be written as:
# #   Y = α + Xβ + Tγ0 + (T * X)γ + ε
# #
# # This is a linear model in parameters. The treatment effect at point X is:
# #   τ(X) = γ0 + Xγ
# #
# # By fitting this model on the combined data (e=1 and e=2), we share all 
# # parameters and assume a common structural relationship, using both datasets 
# # simultaneously.
# #
# # Note: This code assumes that treatment T is binary {0,1}. The model includes 
# # all main effects of X and their interactions with T, allowing the treatment 
# # effect to vary with X. Since e=2 is randomized, it identifies the parameters. 
# # Using e=1 as well can increase precision. We are not separating the datasets, 
# # but fitting one unified model.

# # Extract variables
# Y = q3['y'].values
# T = q3['t'].values
# X = q3[['x.1','x.2','x.3','x.4','x.5']].values

# # Create interaction terms between T and X
# # We'll form a design matrix: [1, X, T, T*X]
# n = X.shape[0]
# intercept = np.ones((n,1))
# TX = (T.reshape(-1,1) * X)  # Element-wise multiplication

# # Final design matrix:
# # Z = [1, X, T, T*X]
# Z = np.hstack([intercept, X, T.reshape(-1,1), TX])

# # Fit the linear model using OLS
# model = sm.OLS(Y, Z).fit()

# print(model.summary())

# # ------------------------------------------------------------
# # Interpretation:
# # The fitted model gives estimates of α, β, γ0, and γ.
# # The treatment effect function at X is: τ(X) = γ0 + Xγ.
# # After fitting the model, we have a single set of parameters learned 
# # from both datasets together. This leverages the assumption that both 
# # datasets come from the same structural equation. The RCT data (e=2) 
# # secures identification, while the observational data (e=1) can provide 
# # additional precision.

# # You can extract the parameters and use them to calculate τ(X) at any point X.
# params = model.params

# # Parameter mapping:
# # params = [α, β_1, β_2, β_3, β_4, β_5, γ0, γ_1, γ_2, γ_3, γ_4, γ_5]
# alpha = params[0]
# beta = params[1:6]    # Coefficients for X
# gamma0 = params[6]    
# gamma = params[7:12]  # Coefficients for (T*X)

# # For a given X (row vector of length 5), tau(X) = γ0 + X * γ
# def tau(x_vec):
#     return gamma0 + np.dot(x_vec, gamma)

In [21]:
import pandas as pd
import doubleml as dml
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Load the combined dataset
df = pd.read_csv("data_for_HW4.csv")

df.rename(columns=lambda c: c.replace(".", "_"), inplace=True)

# Specify outcome, treatment, and covariates
y_col = 'y'
d_col = 't'
x_cols = ['x_1', 'x_2', 'x_3', 'x_4', 'x_5']

for col in x_cols:
    df[col + "_e"] = df[col] * (df['e'] - 1)

x_cols = [col for col in list(df.columns) if col not in [y_col, d_col]]

# Create a DoubleMLData object
obj_dml_data = dml.DoubleMLData(df, y_col, d_col, x_cols)

# Set up learners for the nuisance functions
# ml_g: model for E[Y|X], ml_m: model for P(T=1|X)
ml_g = RandomForestRegressor(max_features='sqrt', random_state=42)
ml_m = RandomForestClassifier(max_features='sqrt', random_state=42)

ml_g = GridSearchCV(
    estimator=ml_g,
    param_grid={
        'n_estimators': [50, 200, 500],
        'max_features': ['sqrt'],
        'max_depth': [1, 3, 5]
    },
    cv=5,
    n_jobs=-1
)

ml_m = GridSearchCV(
    estimator=ml_m,
    param_grid={
        'n_estimators': [50, 200, 500],
        'max_features': ['sqrt'],
        'max_depth': [1, 3, 5],
    },
    cv=5,
    n_jobs=-1
)

# Initialize the DoubleMLPLR object
dml_plr_rf = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, score='partialling out')

# Fit the model
dml_plr_rf.fit()

# Print the summary of the estimated causal effect
print(dml_plr_rf.summary)

       coef   std err          t  P>|t|     2.5 %    97.5 %
t  1.314469  0.020126  65.310582    0.0  1.275022  1.353917


In [20]:
import pandas as pd
import doubleml as dml
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.model_selection import GridSearchCV

# Load the combined dataset
df = pd.read_csv("data_for_HW4.csv")

df.rename(columns=lambda c: c.replace(".", "_"), inplace=True)

# Specify outcome, treatment, and covariates
y_col = 'y'
d_col = 't'
x_cols = ['x_1', 'x_2', 'x_3', 'x_4', 'x_5']

for col in x_cols:
    df[col + "_e"] = df[col] * (df['e'] - 1)

x_cols = [col for col in list(df.columns) if col not in [y_col, d_col]]

# Create a DoubleMLData object
obj_dml_data = dml.DoubleMLData(df, y_col, d_col, x_cols)

# Set up learners for the nuisance functions
# ml_g: model for E[Y|X], ml_m: model for P(T=1|X)
ml_g = MLPRegressor(random_state=42, max_iter=3000)
ml_m = MLPClassifier(random_state=42, max_iter=3000)

ml_g = GridSearchCV(
    estimator=ml_g,
    param_grid={
        'hidden_layer_sizes': [(32, 32), (32, 32, 32, 32,), (64, 32, 16)],
        'activation': ['relu'],
        'solver': ['adam'],
        'alpha': [0.01, .1, .5],
        'learning_rate': ['constant', 'adaptive']
    },
    cv=5,
    n_jobs=-1
)

ml_m = GridSearchCV(
    estimator=ml_m,
    param_grid={
        'hidden_layer_sizes': [(32, 32), (32, 32, 32, 32,), (64, 32, 16)],
        'activation': ['relu'],
        'solver': ['adam'],
        'alpha': [0.01, .1, .5],
        'learning_rate': ['adaptive']
    },
    cv=5,
    n_jobs=-1
)

# Initialize the DoubleMLPLR object
dml_plr_dnn = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, score='partialling out')

# Fit the model
dml_plr_dnn.fit()

# Print the summary of the estimated causal effect
print(dml_plr_dnn.summary)

     coef   std err          t  P>|t|     2.5 %    97.5 %
t  1.5903  0.018785  84.658239    0.0  1.553482  1.627117


             n_jobs=-1,
             param_grid={'activation': ['relu'], 'alpha': [0.01, 0.1, 0.5],
                         'hidden_layer_sizes': [(32, 32), (32, 32, 32, 32),
                                                (64, 32, 16)],
                         'learning_rate': ['adaptive'], 'solver': ['adam']}) for ml_m are close to zero or one (eps=1e-12).
