# EconML

In [3]:
# !pip install econml

## モデルたち
+ Double Machine Learning
 + Linear final stage
 + Sparse linear final stage
 + Nonparametric last stage
+ Orthogonal Random Forests
+ Meta-Learners
 + XLearner
 + SLearner
 + TLearner
+ Doubly Robust Learners
 + Linear final stage
 + Sparse linear final stage
 + Nonparametric final stage
+ Orthogonal Instrumental Variables
 + Intent to Treat Doubly Robust Learner
+ Deep Instrumental Variables

※ソースコードあさったら、二段階最小二乗法もあった。

## CATE modelとは
+ Conditional Average Treatment Effects Estimation
 + bbb
   + ccc

In [4]:
from econml.cate_interpreter import SingleTreeCateInterpreter

intrp = SingleTreeCateInterpreter(include_model_uncertainty=True,
                                                            max_depth=2,
                                                            min_samples_leaf=10)

In [None]:
# We interpret the CATE model's behavior based on the features used for heterogeneity
intrp.interpret(est, X)
# Plot the tree
plt.figure(figsize=(25, 5))
intrp.plot(feature_names=['A', 'B', 'C', 'D'], fontsize=12)
plt.show()

# CausalML

In [7]:
# !pip install causalml

## モデルたち
+ Tree-based algorithms
 + Uplift tree/random forests on KL divergence, Euclidean Distance, and Chi-Square
 + Uplift tree/random forests on Contextual Treatment Selection
+ Meta-learner algorithms
 + S-learner
 + T-learner
 + X-learner
 + R-learner（知らないやつ）
+ Instrumental variables algorithms
 + 2-Stage Least Squares (2SLS)

In [3]:
from causalml.inference.meta import LRSRegressor
from causalml.inference.meta import XGBTRegressor, MLPTRegressor
from causalml.inference.meta import BaseXRegressor
from causalml.inference.meta import BaseRRegressor
from xgboost import XGBRegressor
from causalml.dataset import synthetic_data

In [19]:
y, X, treatment, _, _, e = synthetic_data(mode=1, n=1000, p=5, sigma=1.0)

In [14]:
lr = LRSRegressor()
te, lb, ub = lr.estimate_ate(X, treatment, y)
print('Average Treatment Effect (Linear Regression): {:.2f} ({:.2f}, {:.2f})'.format(te[0], lb[0], ub[0]))

Average Treatment Effect (Linear Regression): 0.74 (0.63, 0.85)


In [15]:
xg = XGBTRegressor(random_state=42)
te, lb, ub = xg.estimate_ate(X, treatment, y)
print('Average Treatment Effect (XGBoost): {:.2f} ({:.2f}, {:.2f})'.format(te[0], lb[0], ub[0]))

Average Treatment Effect (XGBoost): 0.62 (0.54, 0.69)


In [16]:
nn = MLPTRegressor(hidden_layer_sizes=(10, 10),
                 learning_rate_init=.1,
                 early_stopping=True,
                 random_state=42)
te, lb, ub = nn.estimate_ate(X, treatment, y)
print('Average Treatment Effect (Neural Network (MLP)): {:.2f} ({:.2f}, {:.2f})'.format(te[0], lb[0], ub[0]))

Average Treatment Effect (Neural Network (MLP)): 0.82 (0.73, 0.91)


In [20]:
# xl = BaseXRegressor(learner=XGBRegressor(random_state=27))
# te, lb, ub = xl.estimate_ate(X, e, treatment, y)
# print('Average Treatment Effect (BaseXRegressor using XGBoost): {:.2f} ({:.2f}, {:.2f})'.format(te[0], lb[0], ub[0]))

In [21]:
rl = BaseRRegressor(learner=XGBRegressor(random_state=42))
te, lb, ub =  rl.estimate_ate(X=X, p=e, treatment=treatment, y=y)
print('Average Treatment Effect (BaseRRegressor using XGBoost): {:.2f} ({:.2f}, {:.2f})'.format(te[0], lb[0], ub[0]))

Average Treatment Effect (BaseRRegressor using XGBoost): 0.60 (0.59, 0.60)


In [32]:
dir(lightgbm)

In [34]:
# from causalml.inference.meta import BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor
# from causalml.dataset.regression import synthetic_data
# import numpy as np
# from sklearn.ensemble import RandomForestRegressor
# import lightgbm

# # Load synthetic data
# y, X, treatment, tau, b, e = synthetic_data(mode=1, n=10000, p=25, sigma=0.5)
# w_multi = np.array(['treatment_A' if x==1 else 'control' for x in treatment]) # customize treatment/control names

# slearner = BaseSRegressor(lightgbm.LGBMRegressor(), control_name='control')
# slearner.estimate_ate(X, w_multi, y)
# slearner_tau = slearner.fit_predict(X, w_multi, y)

# model_tau_feature = RandomForestRegressor()  # specify model for model_tau_feature

# slearner.get_importance(X=X, tau=slearner_tau, model_tau_feature=model_tau_feature,
#                         normalize=True, method='auto', features=feature_names)

# # Using the feature_importances_ method in the base learner (LGBMRegressor() in this example)
# slearner.plot_importance(X=X, tau=slearner_tau, normalize=True, method='auto')

# # Using eli5's PermutationImportance
# slearner.plot_importance(X=X, tau=slearner_tau, normalize=True, method='permutation')

# # Using SHAP
# shap_slearner = slearner.get_shap_values(X=X, tau=slearner_tau)

# # Plot shap values without specifying shap_dict
# slearner.plot_shap_values(X=X, tau=slearner_tau)

# # Plot shap values WITH specifying shap_dict
# slearner.plot_shap_values(shap_dict=shap_slearner)

# # interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
# slearner.plot_shap_dependence(treatment_group='treatment_A',
#                               feature_idx=1,
#                               X=X,
#                               tau=slearner_tau,
#                               interaction_idx='auto')