In [8]:
from causalml.inference.meta import LRSRegressor

In [9]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from xgboost import XGBRegressor
import warnings

from causalml.inference.meta import LRSRegressor
from causalml.inference.meta import XGBTRegressor, MLPTRegressor
from causalml.inference.meta import BaseXRegressor, BaseRRegressor, BaseSRegressor, BaseTRegressor
from causalml.match import NearestNeighborMatch, MatchOptimizer, create_table_one
from causalml.propensity import ElasticNetPropensityModel
from causalml.dataset import *
from causalml.metrics import *

warnings.filterwarnings('ignore')
plt.style.use('fivethirtyeight')

In [10]:
import causalml
print(causalml.__version__)

0.13.0


In [11]:
# Generate synthetic data using mode 1
y, X, treatment, tau, b, e = synthetic_data(mode=1, n=10000, p=8, sigma=1.0)

In [13]:
treatment,y

(array([1, 1, 0, ..., 1, 0, 0]),
 array([2.34390778, 2.53615765, 1.0438236 , ..., 1.27761875, 1.10040105,
        1.52938174]))

In [7]:
# Ready-to-use S-Learner using LinearRegression
learner_s = LRSRegressor()
ate_s = learner_s.estimate_ate(X=X, treatment=treatment, y=y)
print(ate_s)
print('ATE estimate: {:.03f}'.format(ate_s[0][0]))
print('ATE lower bound: {:.03f}'.format(ate_s[1][0]))
print('ATE upper bound: {:.03f}'.format(ate_s[2][0]))

# After calling estimate_ate, add pretrain=True flag to skip training
# This flag is applicable for other meta learner
ate_s = learner_s.estimate_ate(X=X, treatment=treatment, y=y, pretrain=True)
print(ate_s)
print('ATE estimate: {:.03f}'.format(ate_s[0][0]))
print('ATE lower bound: {:.03f}'.format(ate_s[1][0]))
print('ATE upper bound: {:.03f}'.format(ate_s[2][0]))

(array([0.71000558]), array([0.66278554]), array([0.75722562]))
ATE estimate: 0.710
ATE lower bound: 0.663
ATE upper bound: 0.757
(array([0.71000558]), array([0.66278554]), array([0.75722562]))
ATE estimate: 0.710
ATE lower bound: 0.663
ATE upper bound: 0.757


In [20]:
# Ready-to-use T-Learner using XGB
learner_t = XGBTRegressor()
ate_t = learner_t.estimate_ate(X=X, treatment=treatment, y=y)
print('Using the ready-to-use XGBTRegressor class')
print(ate_t)

# Calling the Base Learner class and feeding in XGB
learner_t = BaseTRegressor(learner=XGBRegressor())
ate_t = learner_t.estimate_ate(X=X, treatment=treatment, y=y)
print('\nUsing the BaseTRegressor class and using XGB (same result):')
print(ate_t)

# Calling the Base Learner class and feeding in LinearRegression
learner_t = BaseTRegressor(learner=LinearRegression())
ate_t = learner_t.estimate_ate(X=X, treatment=treatment, y=y)
print('\nUsing the BaseTRegressor class and using Linear Regression (different result):')
print(ate_t)

Using the ready-to-use XGBTRegressor class
(array([0.53139259]), array([0.50619338]), array([0.5565918]))

Using the BaseTRegressor class and using XGB (same result):
(array([0.53139259]), array([0.50619338]), array([0.5565918]))

Using the BaseTRegressor class and using Linear Regression (different result):
(array([0.66450111]), array([0.62405815]), array([0.70494406]))


In [14]:
# Simulate randomized trial: mode=2
y, X, w, tau, b, e = synthetic_data(mode=2, n=2000, p=10, sigma=3.0)

df = pd.DataFrame(X)
feature_names = [f'feature_{i}' for i in range(X.shape[1])]
df.columns = feature_names
df['outcome'] = y
df['treatment'] = w
df['treatment_effect'] = tau

In [15]:
df

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,outcome,treatment,treatment_effect
0,-1.282953,-1.389113,0.000000,0.491388,-0.933886,-0.603955,0.453313,-0.801766,0.148728,0.757179,-5.115472,0,-1.060372
1,0.264404,-2.743457,0.000000,0.123914,-0.123881,0.864383,-1.047508,1.566007,-0.573055,-0.660420,0.290439,0,0.326766
2,0.650330,0.587039,1.237369,-0.094023,0.298703,1.672891,0.421792,-0.955444,-1.720199,0.252442,1.471926,0,1.679469
3,0.231283,0.296227,0.527510,-1.460268,-0.937364,-0.251391,2.115048,0.774319,-1.076403,0.350440,0.268651,1,1.083472
4,-1.171923,0.167832,0.000000,0.172589,1.079902,-0.118722,-1.528634,-0.006747,-0.346487,0.077435,0.034911,0,-0.391343
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,-0.493035,-1.908314,0.000000,-0.192043,-0.186568,0.491411,-0.123647,0.410428,-0.529426,-1.752098,1.766625,1,-0.354726
1996,-1.384667,-0.106182,0.000000,-0.255577,-0.174889,1.302332,-0.455120,-0.670361,-0.839470,0.628577,1.408221,1,-0.743202
1997,-0.875686,0.757639,0.000000,0.712265,1.636796,0.089836,0.518497,-0.855429,-0.169228,1.155269,-0.231889,1,0.266380
1998,1.487990,-0.578558,0.909432,1.506715,-0.659746,-0.014130,0.196534,-2.040742,0.215403,0.069996,3.693178,0,1.933129


In [14]:
import numpy as np
import pandas as pd

from causalml.optimize import get_pns_bounds

In [15]:
# Data processing
import pandas as pd
import numpy as np
# Create synthetic data
from causalml.dataset import synthetic_data
# Visualization
import seaborn as sns
# Machine learning model
from lightgbm import LGBMRegressor

In [16]:
# Set a seed for reproducibility
np.random.seed(42)
# Create a synthetic dataset
y, X, treatment, _, _, _ = synthetic_data(mode=1, n=1000, p=5, sigma=1.0)
# Save the data in a pandas dataframe
df = pd.DataFrame({'y': y, 'X1': X.T[0], 'X2': X.T[1], 'X3': X.T[2], 'X4': X.T[3], 'X5': X.T[4], 'treatment': treatment})
# Check treatment
df['treatment'].value_counts()

1    512
0    488
Name: treatment, dtype: int64

In [26]:
df

Unnamed: 0,y,X1,X2,X3,X4,X5,treatment
0,0.969019,0.374540,0.950714,0.731994,0.598658,0.156019,1
1,1.671670,0.155995,0.058084,0.866176,0.601115,0.708073,0
2,0.630293,0.020584,0.969910,0.832443,0.212339,0.181825,0
3,1.589116,0.183405,0.304242,0.524756,0.431945,0.291229,0
4,0.342692,0.611853,0.139494,0.292145,0.366362,0.456070,1
...,...,...,...,...,...,...,...
995,-0.007271,0.446726,0.123450,0.205980,0.000943,0.039265,0
996,2.285836,0.977284,0.242354,0.663311,0.839100,0.550660,1
997,1.647223,0.153145,0.728116,0.600251,0.731251,0.769747,1
998,1.957809,0.975410,0.573563,0.341641,0.648465,0.068441,1


In [18]:
# Features
features = df.loc[:, ['X1', 'X2', 'X3', 'X4', 'X5', 'treatment']]
# Dependent variable
y = df.loc[:, ['y']]

In [20]:
features

Unnamed: 0,X1,X2,X3,X4,X5,treatment
0,0.374540,0.950714,0.731994,0.598658,0.156019,1
1,0.155995,0.058084,0.866176,0.601115,0.708073,0
2,0.020584,0.969910,0.832443,0.212339,0.181825,0
3,0.183405,0.304242,0.524756,0.431945,0.291229,0
4,0.611853,0.139494,0.292145,0.366362,0.456070,1
...,...,...,...,...,...,...
995,0.446726,0.123450,0.205980,0.000943,0.039265,0
996,0.977284,0.242354,0.663311,0.839100,0.550660,1
997,0.153145,0.728116,0.600251,0.731251,0.769747,1
998,0.975410,0.573563,0.341641,0.648465,0.068441,1


In [21]:
# Initiate the light GBM model
s_learner = LGBMRegressor()
# Fit the model
s_learner.fit(features, y);

In [22]:
# Set treatment value to 1
with_treatment = features.assign(treatment = 1)
# With treatment predictions
with_treatment_predict = s_learner.predict(with_treatment)
# Set treatment value to 0
without_treatment = features.assign(treatment = 0)
# With treatment predictions
without_treatment_predict = s_learner.predict(without_treatment)

In [25]:
with_treatment

Unnamed: 0,X1,X2,X3,X4,X5,treatment
0,0.374540,0.950714,0.731994,0.598658,0.156019,1
1,0.155995,0.058084,0.866176,0.601115,0.708073,1
2,0.020584,0.969910,0.832443,0.212339,0.181825,1
3,0.183405,0.304242,0.524756,0.431945,0.291229,1
4,0.611853,0.139494,0.292145,0.366362,0.456070,1
...,...,...,...,...,...,...
995,0.446726,0.123450,0.205980,0.000943,0.039265,1
996,0.977284,0.242354,0.663311,0.839100,0.550660,1
997,0.153145,0.728116,0.600251,0.731251,0.769747,1
998,0.975410,0.573563,0.341641,0.648465,0.068441,1
