In [2]:
import econml
import dowhy
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

In [3]:
df = pd.read_pickle("df_causal_effects.p")
df.head()

Unnamed: 0,age,hasGraduateDegree,greaterThan50k
0,39,False,False
1,50,False,False
2,38,False,False
3,53,False,False
5,37,True,False


In [6]:
# define causal model
model = dowhy.CausalModel(
    data=df,
    treatment = "hasGraduateDegree",
    outcome = "greaterThan50k",
    common_causes = "age")

# define estimand
estimand = model.identify_effect(proceed_when_unidentifiable=True)

In [8]:
# Linear Regression
LR_estimate = model.estimate_effect(estimand,
                                    method_name = "backdoor.linear_regression")

#ATE
print(LR_estimate)


*** Causal Estimate ***

## Identified estimand
Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
         d                                 
────────────────────(E[greaterThan50k|age])
d[hasGraduateDegree]                       
Estimand assumption 1, Unconfoundedness: If U→{hasGraduateDegree} and U→greaterThan50k then P(greaterThan50k|hasGraduateDegree,age,U) = P(greaterThan50k|hasGraduateDegree,age)

## Realized estimand
b: greaterThan50k~hasGraduateDegree+age
Target units: ate

## Estimate
Mean value: 0.29760513570328795



  intercept_parameter = self.model.params[0]


In [9]:
# Double Machine Learning
DML_estimate = model.estimate_effect(estimand,
                                     method_name = "backdoor.econml.dml.DML",
                                     method_params = {"init_params":{
                                         'model_y':LinearRegression(),
                                         'model_t':LinearRegression(),
                                         'model_final': LinearRegression()},
                                                      "fit_params":{}
                                                     })
#DML ATE
print(DML_estimate)

*** Causal Estimate ***

## Identified estimand
Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
         d                                 
────────────────────(E[greaterThan50k|age])
d[hasGraduateDegree]                       
Estimand assumption 1, Unconfoundedness: If U→{hasGraduateDegree} and U→greaterThan50k then P(greaterThan50k|hasGraduateDegree,age,U) = P(greaterThan50k|hasGraduateDegree,age)

## Realized estimand
b: greaterThan50k~hasGraduateDegree+age | 
Target units: ate

## Estimate
Mean value: 0.29779791121709576
Effect estimates: [[0.29779791]]



The final model has a nonzero intercept for at least one outcome; it will be subtracted, but consider fitting a model without an intercept if possible.


In [10]:
# X-learner
Xlearner_estimate = model.estimate_effect(estimand,
                          method_name = "backdoor.econml.metalearners.XLearner",
                          method_params = {"init_params":{
                                              'models':DecisionTreeRegressor()
                                                      },
                                          "fit_params":{}
                                          })

# X-learner ATE
print(Xlearner_estimate)

*** Causal Estimate ***

## Identified estimand
Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
         d                                 
────────────────────(E[greaterThan50k|age])
d[hasGraduateDegree]                       
Estimand assumption 1, Unconfoundedness: If U→{hasGraduateDegree} and U→greaterThan50k then P(greaterThan50k|hasGraduateDegree,age,U) = P(greaterThan50k|hasGraduateDegree,age)

## Realized estimand
b: greaterThan50k~hasGraduateDegree+age
Target units: ate

## Estimate
Mean value: 0.20232049378002753
Effect estimates: [[ 0.31037666]
 [ 0.21099013]
 [ 0.36363636]
 ...
 [ 0.16049383]
 [-0.00342775]
 [ 0.2008029 ]]



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


In [11]:
# Dow is a good library to try out various models for ATE