In [32]:
import pandas as pd
import numpy as np
from Gen_data import SimulationStudy
import Methods_all_sample as method
from Analysis_new import get_split
import subprocess
import pickle
import matplotlib.pyplot as plt


In [33]:
def save_to_pkl(data_name, data):
    with open(f'{data_name}.pkl', 'wb') as pickle_file:
        pickle.dump(data, pickle_file)

In [34]:
np.random.seed(220924)
sim: SimulationStudy = SimulationStudy(p=30, mean_correlation=0.5, cor_variance=0.2, n=4000, no_feat_cate=3, non_linear = 'quadratic', seed=220924)
simulation = sim.create_dataset()
train_df, test_df, X_train, Y_train, T_train, X_test, T_test, Y_test, true_cate_train, true_cate_test = get_split(simulation)

df_dict = {'train_df': train_df, 'X_train': X_train, 'Y_train': Y_train, 'T_train': T_train, 'X_test': X_test, 'T_test': T_test}

for data_name, data in df_dict.items():
    save_to_pkl(data_name, data)


T-Learner Feature Importance

In [35]:
def run_TLearner_causalml(env=r'C:\Users\joaov\anaconda3\envs\causalml-py38\python.exe'):
    subprocess.run([env, 'TLearner_feat_imp.py'])
    
run_TLearner_causalml()

In [36]:
est_t, estimated_cate_train, estimated_cate_test, RMSE_test, RMSE_train = method.TLearner_estimator(Y_train, T_train, X_train, X_test, T_test, 
                                                                                                    true_cate_train, true_cate_test)

Check that both libraries deliver the exact same CATEs

In [37]:
cate_t_causalml= np.load('causalml_cate_tlearner.npy', allow_pickle=True)

In [38]:
plt.figure(figsize=(8, 6))
plt.hist([estimated_cate_test, cate_t_causalml.reshape(1,-1).flatten()], bins=30, alpha=0.5, label=['Econ ML', 'CausalML'])
plt.xlabel('CATE Estimates')
plt.ylabel('Frequency')
plt.title('Overlap of CATE Estimates from EconML and CausalML')
plt.legend(loc='upper right')
plt.show()

FigureCanvasPgf is non-interactive, and thus cannot be shown


In [39]:
import feature_importance

Feature Importance T-Learner

In [40]:
important_feats_t = np.load('feat_importance.npy', allow_pickle=True)
feats_dict = important_feats_t.item()
series = feats_dict[1]
important_feats_t = series.index


feature_importance.partial_dependence_plots(X_test, important_feats_t, est = est_t)

GRF Feature Importance

In [41]:
est_grf, feat_importance_grf, estimated_cate_train_grf, estimated_cate_test_grf, RMSE_test_grf, RMSE_train_grf = method.GRF_estimator(Y_train, T_train, X_train, X_test, T_test, 
                                                                                                    true_cate_train, true_cate_test)

important_feats_grf = feature_importance.get_important_feats(X_test, feat_importance_grf)
feature_importance.partial_dependence_plots(X_test, important_feats_grf, est = est_grf)

CF DML Feature Importance

In [42]:
est_cfdml, feat_importance_cfdml, estimated_cate_train_cfdml, estimated_cate_test_cfdml, RMSE_test_cfdml, RMSE_train_cfdml = method.CF_DML(Y_train, T_train, X_train, X_test, T_test, 
                                                                                                    true_cate_train, true_cate_test)

important_feats_cfdml = feature_importance.get_important_feats(X_test, feat_importance_cfdml)
feature_importance.partial_dependence_plots(X_test, important_feats_cfdml, est = est_cfdml)