In [None]:
#!pip install dowhy==0.6
#!pip install econml==0.12.0

In [None]:
from itertools import combinations

import numpy as np
import pandas as pd

import dowhy
from dowhy import CausalModel
import dowhy.datasets

from sklearn.linear_model import LassoCV
from sklearn.ensemble import GradientBoostingRegressor

import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

# Causal Inference in Python: An Introduction


Causality was an *enfant terrible* of the big data and statistical learning revolution of the early 2010s. Many people believed (myself included) that having large enough datasets and efficient learning algorithms is sufficient and we do not need the concept of causality at all. Today, causal inference, modeling and discovery is being used more and more broadly across areas – from medical research and neuroscience to marketing and fraud detection. This talk briefly introduces main causal concepts and two Python libraries – DoWhy and EconML – for performing causal inference.

## Causal model with DoWhy & EconML

### Generate a dataset

In [None]:
# Create the dataset
W = np.random.randn(1000)
T = np.random.randn(1000) + .2*W + 3
Y = 6*T + 2*W - 13

df = pd.DataFrame(np.vstack([W, T, Y]).T, columns=['W', 'T', 'Y'])

In [None]:
df

In [None]:
plt.figure(figsize=(3, 3))
plt.scatter(df['T'], df['Y'], alpha=.2)
plt.show()

### Stage 1: Model the problem

#### Stage 1.1 - Define the graph - `GML`

In [None]:
# Create the graph describing the causal structure

graph = """
graph [
    directed 1
    
    node [
        id "T" 
        label "T"
    ]    
    node [
        id "W"
        label "W"
    ]
    node [
        id "Y"
        label "Y"
    ]
    
    edge [
        source "W"
        target "T"
    ]
    
    edge [
        source "W"
        target "Y"
    ]
    
    edge [
        source "T"
        target "Y"
    ]
]
"""

In [None]:
# Remove newlines
graph = graph.replace('\n', '')

#### Stage 1.2 - define the DoWhy model

In [None]:
# With graph
model = CausalModel(
    data=df,
    treatment='T',
    outcome='Y',
    graph=graph
)

In [None]:
plt.figure(figsize=(3, 3))
model.view_model()
plt.show()

## Stage 2: Identify the estimand

In [None]:
estimand = model.identify_effect(proceed_when_unidentifiable=True)
print(estimand)

## Stage 3: Estimate the causal effect

#### Example 1 - Linear Regression

In [None]:
estimate = model.estimate_effect(
    identified_estimand=estimand,
    method_name='backdoor.linear_regression'
)

print(f'Estimate of causal effect (linear regression): {estimate.value}')

#### Example 2 - Double Machnie Learning

In [None]:
estimate = model.estimate_effect(
    identified_estimand=estimand,
    method_name='backdoor.econml.dml.DML',
    method_params={
        'init_params': {
            'model_y': GradientBoostingRegressor(),
            'model_t': GradientBoostingRegressor(),
            'model_final': LassoCV(fit_intercept=False),
        },
        'fit_params': {}}
)

print(f'Estimate of causal effect (DML): {estimate.value}')

## Stage 4: Run refutation tests

In [None]:
refute_results = model.refute_estimate(
    estimand=estimand, 
    estimate=estimate,
    method_name='placebo_treatment_refuter'
)

In [None]:
print(refute_results)

In [None]:
refute_results = model.refute_estimate(
    estimand=estimand, 
    estimate=estimate,
    method_name='random_common_cause'
)

In [None]:
print(refute_results)