In [154]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [155]:
from sklearn.linear_model import LinearRegression
from causalml.inference.meta.slearner import BaseSLearner

import pandas as pd
import numpy as np

from scipy.stats import norm, logistic

In [156]:
import pyAgrum as gum
import pyAgrum.lib as gnb

import pyAgrum.causal as csl
import pyAgrum.causal.notebook as cslnb

import pyAgrum.skbn as skbn

In [157]:
def getDiscretizedVariable(name, tick_start, tick_end, num_intervals):
    variable = gum.DiscretizedVariable(name, name)
    ticks = np.linspace(tick_start, tick_end, num_intervals+1)
    for i in ticks:
        variable.addTick(i)
    variable.setEmpirical(True)
    return variable

def processDataFrame(df):
    for column in df.columns:
        if df[column].nunique() == 2:
            df[column] = df[column].astype(int)
        else:
            df[column] = df[column].apply(
                lambda x : sum([float(i) for i in x[1:-1].split(";")])/2
            )

def getATEfromCBN(cslbn):
    _, c0, _ = csl.causalImpact(cm=cslbn, doing="T", on="Y", values={"T":0})
    _, c1, _ = csl.causalImpact(cm=cslbn, doing="T", on="Y", values={"T":1})
    diff = c1 - c0
    return diff.expectedValue(lambda d : diff.variable(0).numerical(d[diff.variable(0).name()]))

In [158]:
n = int(1e6)

# Linear Randomized Contolled Trial

$$
X \sim \mathcal{N}(1,1) \\
T \sim \mathcal{B}(0.3) \\
Y \sim \mathcal{N}(X+2T, 1)
$$

$$ Y = X + 2T + \varepsilon \text{, } \varepsilon \sim \mathcal{N}(0,1)$$

$$
\begin{align*}
\tau &= \mathbb{E}[Y \mid do(T=1)] - \mathbb{E}[Y \mid do(T=0)] \\
&= \mathbb{E}[Y \mid T=1] - \mathbb{E}[Y \mid T=0] \\
&= \mathbb{E}[X + 2T + \varepsilon \mid T=1] - \mathbb{E}[X + 2T + \varepsilon \mid T=0] \\
&= \mathbb{E}[X] + 2 \cdot 1 + \mathbb{E}[\varepsilon] - \mathbb{E}[X] - 2 \cdot 0 - \mathbb{E}[\varepsilon]\\
&= 2
\end{align*}
$$

In [159]:
X = np.random.normal(1, 1, n)
T = np.random.binomial(1, 0.3, n)
Y = np.random.normal(X+2*T, 1, n)
df0 = pd.DataFrame({"X": X, "T": T, "Y":Y}, index=np.arange(0,n))
df0.head()

Unnamed: 0,X,T,Y
0,-0.956939,0,-1.270711
1,2.307382,1,4.932536
2,0.231758,0,0.894167
3,1.541802,0,1.70101
4,0.215875,0,0.008218


In [160]:
bn0 = gum.BayesNet()

bn0.beginTopologyTransformation()

bn0.add(getDiscretizedVariable("X", -4,6, 100))
bn0.add(getDiscretizedVariable("Y", -4,6, 100))
bn0.add(gum.IntegerVariable("T", "T", [0,1]))

bn0.addArc("X", "Y")
bn0.addArc("T", "Y")

bn0.cpt("X").fillFromDistribution(norm, loc=1, scale=1)
bn0.cpt("T").fillWith([0.7, 0.3])
bn0.cpt("Y").fillFromDistribution(norm, loc="X + 2*T", scale=1)

bn0.endTopologyTransformation()

cslbn0 = csl.CausalModel(bn0)

cslbn0

In [161]:
getATEfromCBN(cslbn0)

1.9655810046044915

In [162]:
po0 = csl.CausalEffectEstimation(df0, cslbn0)
po0.identifyAdjustmentSet(intervention="T", outcome="Y")

Randomized Controlled Trial adjustment found. 

Supported estimators include:
- CausalModelEstimator
- DM
If the outcome variable is a cause of other covariates in the causal graph,
Backdoor estimators may also be used.


'Randomized Controlled Trial'

In [163]:
print(po0)

<pyAgrum.causal.causalEffectEstimation._CausalEffectEstimation.CausalEffectEstimation object at 0x740730382560>

 Dataframe	: <pandas.core.frame.DataFrame object at 0x740732bfdc30>
	- shape		: (1000000, 3)
	- columns	: Index(['X', 'T', 'Y'], dtype='object')
	- memory usage	: 32.0 MB
 Causal Model	: <pyAgrum.causal._CausalModel.CausalModel object at 0x740732845900>
	- names		: {0: 'X', 1: 'Y', 2: 'T'}
	- causal BN	: BN{nodes: 3, arcs: 2, domainSize: 20000, dim: 19900, mem: 157Ko 48o}
	- observ. BN	: BN{nodes: 3, arcs: 2, domainSize: 20000, dim: 19900, mem: 157Ko 48o}
 Adjustment	: Randomized Controlled Trial
 Intervention	: T
 Outcome	: Y
 Confounders	: {'X'}


In [164]:
po0.fitCausalBNEstimator()
po0.estimateCausalEffect()

1.989180940121197

In [165]:
po0.fitSLearner()
po0.estimateCausalEffect()

1.9985396565983176

In [166]:
po0.fitCustomEstimator(BaseSLearner(learner=LinearRegression()))
po0.estimateCausalEffect()

array([1.99853966])

# Linear Backdoor

$$
X \sim \mathcal{N}(1,1) \\
T \sim \mathcal{B}((1+e^{-X})^{-1}) \\
Y \sim \mathcal{N}(X+2T, 1)
$$

$$ Y = X + 2T + \varepsilon \text{, } \varepsilon \sim \mathcal{N}(0,1)$$

$$
\begin{align*}
\tau &= \mathbb{E}[Y \mid do(T=1)] - \mathbb{E}[Y \mid do(T=0)] \\
&= \mathbb{E}_X [\mathbb{E}[Y \mid T=1, X] - \mathbb{E}[Y \mid T=0, X]] \\
&= \mathbb{E}_X [\mathbb{E}_{\varepsilon}[X + 2T + \varepsilon \mid T=1, X] - \mathbb{E}_{\varepsilon}[X + 2T + \varepsilon \mid T=0, X]] \\
&= \mathbb{E}_X [(X + 2 \cdot 1) - (X + 2 \cdot 0)] \\
&= 2
\end{align*}
$$

In [167]:
X = np.random.normal(1, 1, n)
T = np.random.binomial(1, np.power(1+np.exp(-X), -1))
Y = np.random.normal(X+2*T, 1, n)
df1 = pd.DataFrame({"X": X, "T": T, "Y": Y}, index=np.arange(0,n))
df1.head()

Unnamed: 0,X,T,Y
0,-0.057144,1,3.114281
1,0.896178,1,3.735207
2,1.706789,1,3.954915
3,1.564825,1,3.621778
4,2.493069,1,5.48123


In [168]:
bn1 = gum.BayesNet()

bn1.beginTopologyTransformation()

bn1.add(getDiscretizedVariable("X", -4,6, 100))
bn1.add(getDiscretizedVariable("Y", -4,6, 100))
bn1.add(gum.IntegerVariable("T", "T", [0,1]))

bn1.addArc("X", "T")
bn1.addArc("X", "Y")
bn1.addArc("T", "Y")

bn1.cpt("X").fillFromDistribution(norm, loc=1, scale=1)
bn1.cpt("T").fillFromDistribution(logistic, loc="X", scale=1)
bn1.cpt("Y").fillFromDistribution(norm, loc="X + 2*T", scale=1)

bn1.endTopologyTransformation()

cslbn1 = csl.CausalModel(bn1)

cslbn1

In [169]:
getATEfromCBN(cslbn1)

1.9655810046044917

In [170]:
po1 = csl.CausalEffectEstimation(df1, cslbn1)
po1.identifyAdjustmentSet(intervention="T", outcome="Y")

Backdoor adjustment found. 

Supported estimators include:
- CausalModelEstimator
- SLearner
- TLearner
- XLearner
- PStratification
- IPW


'Backdoor'

In [171]:
print(po1)

<pyAgrum.causal.causalEffectEstimation._CausalEffectEstimation.CausalEffectEstimation object at 0x74088aebdcc0>

 Dataframe	: <pandas.core.frame.DataFrame object at 0x740732b63910>
	- shape		: (1000000, 3)
	- columns	: Index(['X', 'T', 'Y'], dtype='object')
	- memory usage	: 32.0 MB
 Causal Model	: <pyAgrum.causal._CausalModel.CausalModel object at 0x74074904ba00>
	- names		: {0: 'X', 1: 'Y', 2: 'T'}
	- causal BN	: BN{nodes: 3, arcs: 3, domainSize: 20000, dim: 19999, mem: 158Ko 608o}
	- observ. BN	: BN{nodes: 3, arcs: 3, domainSize: 20000, dim: 19999, mem: 158Ko 608o}
 Adjustment	: Backdoor
 Intervention	: T
 Outcome	: Y
 Confounders	: {'X'}


In [172]:
po1.fitCausalBNEstimator()
po1.estimateCausalEffect()

1.99292702372421

In [173]:
po1.fitSLearner()
po1.estimateCausalEffect()

2.0003358621449654

# Non Linear Backdoor

$$
X \sim \mathcal{N}(1,1) \\
T \sim \mathcal{B}((1+e^{-X})^{-1}) \\
Y \sim \mathcal{N}(X(1+2T), 1)
$$

$$
\begin{align*}
\tau &= \mathbb{E}[Y \mid do(T=1)] - \mathbb{E}[Y \mid do(T=0)] \\
&= \mathbb{E}_X [\mathbb{E}[Y \mid T=1, X] - \mathbb{E}[Y \mid T=0, X]] \\
&= \mathbb{E}_X [X(1 + 2 \cdot 1) - X(1 + 2 \cdot 0)] \\
&= 2\mathbb{E}[X] \\
&= 2
\end{align*}
$$

In [174]:
X = np.random.normal(1, 1, n)
T = np.random.binomial(1, np.power(1+np.exp(-X), -1))
Y = np.random.normal(X+2*T*X, 1, n)
df2 = pd.DataFrame({"X": X, "T": T, "Y":Y}, index=np.arange(0,n))
df2.head()

Unnamed: 0,X,T,Y
0,-0.96915,0,-1.407017
1,1.776413,1,4.434231
2,1.591836,1,5.795717
3,-0.480775,0,0.258419
4,0.495358,1,2.363707


In [175]:
bn2 = gum.BayesNet()

bn2.beginTopologyTransformation()

bn2.add(getDiscretizedVariable("X", -4, 6, 3))
bn2.add(getDiscretizedVariable("Y", -4, 6, 100))
bn2.add(gum.IntegerVariable("T", "T", [0,1]))

bn2.addArc("X", "T")
bn2.addArc("X", "Y")
bn2.addArc("T", "Y")

bn2.cpt("X").fillFromDistribution(norm, loc=1, scale=1)
bn2.cpt("T").fillFromDistribution(logistic, loc="X", scale=1)
bn2.cpt("Y").fillFromDistribution(norm, loc="X*(1 + 2*T)", scale=1)

bn2.endTopologyTransformation()

cslbn2 = csl.CausalModel(bn2)

cslbn2

In [176]:
getATEfromCBN(cslbn2)

1.980811707300236

In [177]:
po2 = csl.CausalEffectEstimation(df2, cslbn2)
po2.identifyAdjustmentSet("T", "Y")

Backdoor adjustment found. 

Supported estimators include:
- CausalModelEstimator
- SLearner
- TLearner
- XLearner
- PStratification
- IPW


'Backdoor'

In [178]:
print(po2)

<pyAgrum.causal.causalEffectEstimation._CausalEffectEstimation.CausalEffectEstimation object at 0x7407321d9900>

 Dataframe	: <pandas.core.frame.DataFrame object at 0x7407321db7c0>
	- shape		: (1000000, 3)
	- columns	: Index(['X', 'T', 'Y'], dtype='object')
	- memory usage	: 32.0 MB
 Causal Model	: <pyAgrum.causal._CausalModel.CausalModel object at 0x7407321db310>
	- names		: {0: 'X', 1: 'Y', 2: 'T'}
	- causal BN	: BN{nodes: 3, arcs: 3, domainSize: 600, dim: 599, mem: 4Ko 776o}
	- observ. BN	: BN{nodes: 3, arcs: 3, domainSize: 600, dim: 599, mem: 4Ko 776o}
 Adjustment	: Backdoor
 Intervention	: T
 Outcome	: Y
 Confounders	: {'X'}


In [179]:
po2.useBackdoorAdjustment("T", "Y", {"X"})

In [180]:
po2.fitCausalBNEstimator()
po2.estimateCausalEffect()

2.509447214263444

In [181]:
po2.fitSLearner()
po2.estimateCausalEffect()

1.322625728019034

In [182]:
po2.fitSLearner(learner="XGBRegressor")
po2.estimateCausalEffect()

1.993271

# High Dimentional Non Linear Backdoor

$$
\boldsymbol{\mu} = 
\begin{pmatrix}
1\\
1\\
1\\
1\\
1
\end{pmatrix}

\boldsymbol{\Sigma} = 
\begin{pmatrix}
1 & 0.5 & -0.5 & -0.5 & -0.5\\
0.5 & 1 & 0.5 & 1 & -0.5 \\
-0.5 & 0.5 & 1 & 0.5 & 0.5\\
-0.5 & 1 & 0.5 & 1 & -0.5 \\
-0.5 & -0.5 & 0.5 & -0.5 & 1
\end{pmatrix}

\alpha =
\begin{pmatrix}
1\\
-1\\
2\\
2\\
-2
\end{pmatrix}

\beta =
\begin{pmatrix}
5\\
-3\\
-1\\
2\\
-2
\end{pmatrix}
$$

$$
\boldsymbol{X} \sim \mathcal{N}(\boldsymbol{\mu},\boldsymbol{\Sigma}) \\
T \sim \mathcal{B}((1+e^{-\boldsymbol{X}\alpha})^{-1}) \\
Y \sim \mathcal{N}(\boldsymbol{X}^T\beta \cdot (1+2T), 1)
$$

$$
\begin{align*}
\tau &= \mathbb{E}[Y \mid do(T=1)] - \mathbb{E}[Y \mid do(T=0)] \\
&= \mathbb{E}_{\boldsymbol{X}} [\mathbb{E}[Y \mid T=1, \boldsymbol{X}] - \mathbb{E}[Y \mid T=0, \boldsymbol{X}]] \\
&= \mathbb{E}_{\boldsymbol{X}} [\boldsymbol{X}^T\beta(1 + 2 \cdot 1) - \boldsymbol{X}^T\beta(1 + 2 \cdot 0)] \\
&= 2\mathbb{E}[\boldsymbol{X}^T\beta] \\
&= 2\mathbb{E}[5X_1 -3X_2 -X_3 +2X_4 -2X_5] \\
&= 2(5-3-1+2-2) \\
&= 2
\end{align*}
$$

In [183]:
mu = np.array([1,1,1,1,1])
sigma = np.array(
    [
        [1.0, 0.5, -0.5, -0.5, -0.5],
        [0.5, 1.0, 0.5, 1.0, -0.5],
        [-0.5, 0.5, 1.0, 0.5, 0.5],
        [-0.5, 1.0, 0.5, 1.0, -0.5],
        [-0.5, -0.5, 0.5, -0.5, 1.0],
    ]
)

X = np.random.multivariate_normal(mu, sigma, n)

X1 = X[:,0]
X2 = X[:,1]
X3 = X[:,2]
X4 = X[:,3]
X5 = X[:,4]

alpha = np.array([1,-1,2,2,-2])
beta = np.array([5,-3,-1,2,-2])

T = np.random.binomial(1, np.power(1+np.exp(- X @ alpha), -1))
Y = np.random.normal((X @ beta)*(1 + 2*T), 1, n)

df3 = pd.DataFrame(
    {"X1": X1, "X2": X2, "X3": X3, "X4": X4, "X5": X5, "T": T, "Y":Y},
    index=np.arange(0,n)
)
df3.head()

covariance is not symmetric positive-semidefinite.


Unnamed: 0,X1,X2,X3,X4,X5,T,Y
0,1.107175,1.904445,1.430844,1.426488,0.765378,1,-0.261765
1,0.524774,3.105881,1.66898,3.004682,-0.386301,1,-3.749738
2,1.469082,1.833303,1.626939,2.691975,0.3643,1,16.38699
3,0.376697,0.645956,0.57159,0.882415,0.807404,1,-1.036533
4,1.775607,-0.248942,0.817451,0.255278,1.814282,0,6.517313


In [184]:
bn3 = gum.BayesNet()

bn3.beginTopologyTransformation()

bn3.add(getDiscretizedVariable("X1", -4,6, 3))
bn3.add(getDiscretizedVariable("X2", -4,6, 3))
bn3.add(getDiscretizedVariable("X3", -4,6, 3))
bn3.add(getDiscretizedVariable("X4", -4,6, 3))
bn3.add(getDiscretizedVariable("X5", -4,6, 3))
bn3.add(getDiscretizedVariable("Y", -100,100, 100))
bn3.add(gum.IntegerVariable("T", "T", [0,1]))

bn3.addArc("X1", "T")
bn3.addArc("X2", "T")
bn3.addArc("X3", "T")
bn3.addArc("X4", "T")
bn3.addArc("X5", "T")
bn3.addArc("X1", "Y")
bn3.addArc("X2", "Y")
bn3.addArc("X3", "Y")
bn3.addArc("X4", "Y")
bn3.addArc("X5", "Y")
bn3.addArc("T", "Y")

bn3.cpt("X1").fillFromDistribution(norm, loc=1, scale=1)
bn3.cpt("X2").fillFromDistribution(norm, loc=1, scale=1)
bn3.cpt("X3").fillFromDistribution(norm, loc=1, scale=1)
bn3.cpt("X4").fillFromDistribution(norm, loc=1, scale=1)
bn3.cpt("X5").fillFromDistribution(norm, loc=1, scale=1)
bn3.cpt("T").fillFromDistribution(logistic, loc="X1 - X2 + 2*X3 + 2*X4 - 2*X5", scale=1)
bn3.cpt("Y").fillFromDistribution(norm, loc="(5*X1 - 3*X2 - X3 + 2*X4 - 2*X5) * (1 + 2*T)", scale=1)

bn3.endTopologyTransformation()

cslbn3 = csl.CausalModel(bn3)
cslbn3.addLatentVariable("U", ("X1", "X2", "X3", "X4", "X5"))

cslbn3

In [185]:
getATEfromCBN(cslbn3)

1.9999995506112345

In [186]:
po3 = csl.CausalEffectEstimation(df3, cslbn3)
po3.identifyAdjustmentSet(intervention="T", outcome="Y")

Backdoor adjustment found. 

Supported estimators include:
- CausalModelEstimator
- SLearner
- TLearner
- XLearner
- PStratification
- IPW


'Backdoor'

In [187]:
print(po3)

<pyAgrum.causal.causalEffectEstimation._CausalEffectEstimation.CausalEffectEstimation object at 0x7407321d9120>

 Dataframe	: <pandas.core.frame.DataFrame object at 0x74073179ae30>
	- shape		: (1000000, 7)
	- columns	: Index(['X1', 'X2', 'X3', 'X4', 'X5', 'T', 'Y'], dtype='object')
	- memory usage	: 64.0 MB
 Causal Model	: <pyAgrum.causal._CausalModel.CausalModel object at 0x7407320e9150>
	- names		: {0: 'X1', 1: 'X2', 2: 'X3', 3: 'X4', 4: 'X5', 5: 'Y', 6: 'T', 7: 'U'}
	- causal BN	: BN{nodes: 8, arcs: 16, domainSize: 97200, dim: 48378, mem: 383Ko 752o}
	- observ. BN	: BN{nodes: 7, arcs: 11, domainSize: 48600, dim: 48367, mem: 383Ko 616o}
 Adjustment	: Backdoor
 Intervention	: T
 Outcome	: Y
 Confounders	: {'X2', 'X1', 'X5', 'X3', 'X4'}


In [188]:
po3.fitCausalBNEstimator()
po3.estimateCausalEffect()

5.691838110160974

In [189]:
po3.fitSLearner()
po3.estimateCausalEffect()

-0.013035946102707648

In [190]:
po3.fitSLearner(learner="XGBRegressor")
po3.estimateCausalEffect()

1.7051575

In [191]:
po3.fitTLearner()
po3.estimateCausalEffect()

2.015867550691381

In [192]:
po3.fitXLearner()
po3.estimateCausalEffect()

2.015867550647617

In [193]:
po3.fitPStratification(num_strata=50)
po3.estimateCausalEffect()

2.0729018115415783

In [194]:
po3.fitIPW()
po3.estimateCausalEffect()

2.2258507842889723

# Linear Frontdoor

$$
U \sim \mathcal{N}(1,1) \\
T \sim \mathcal{B}((1+e^{-U})^{-1}) \\
M \sim \mathcal{N}(T,1) \\
Y \sim \mathcal{N}(U + 2M, 1)
$$

$$
\begin{align*}

\tau &= \mathbb{E}[Y \mid do(T=1)] - \mathbb{E}[Y \mid do(T=0)] \\

&= \int_{\Omega} Y(\omega)  d\mathbb{P}[\omega \mid do(T=1)] - \int_{\Omega} Y(\omega)  d\mathbb{P}[\omega \mid do(T=0)] \\

&= \int_{Y(\Omega)} y \left( f_{Y\mid do(T=1)}(y) - f_{Y\mid do(T=0)}(y) \right) dy \\

&= \int_{Y(\Omega)} y \left( \int_{M(\Omega)} \left( \left( f_{M \mid T=1}(m) - f_{M \mid T=0}(m) \right) \sum_{t'\in T(\Omega)} f_{Y \mid M=m, T=t'}(y) \mathbb{P}[T=t'] \right) dm \right) dy\\

&= \int_{M(\Omega)} \left( f_{M \mid T=1}(m) - f_{M \mid T=0}(m) \right) \sum_{t'\in T(\Omega)} \mathbb{P}[T=t'] \left( \int_{Y(\Omega)} y  f_{Y \mid M=m, T=t'}(y) dy \right) dm\\

&= \int_{M(\Omega)} \left( f_{M \mid T=1}(m) - f_{M \mid T=0}(m) \right) \underbrace{\mathbb{E}_{T} \left[ \mathbb{E}_{Y} [Y \mid T, M=m] \right]}_{(*)} dm

\end{align*}
$$

$$
\begin{align*}

(*) &= \mathbb{E}_{T} \left[ \mathbb{E}_{U} [U\mid T] \right]+2m \\

%&= \mathbb{P}[T=0]\mathbb{E}_{U} [U\mid T=0] + \mathbb{P}[T=1]\mathbb{E}_{U} [U\mid T=1] + 2m \\

%&= \int_{U(\Omega)} u \left( \mathbb{P}[T=0] f_{U\mid T=0}(u) + \mathbb{P}[T=1]f_{U\mid T=1}(u)\right) du  + 2m \\ 

%&= \int_{U(\Omega)} u \left( \mathbb{P}[T=0\mid U=u] f_{U}(u) + \mathbb{P}[T=1\mid U=u] f_{U}(u)\right) du  + 2m \\ 

&=  \mathbb{E}_{U} [U]+2m \\
&=  1+2m \\

\end{align*}
$$

$$
\begin{align*}
\tau &= \int_{M(\Omega)} \left( f_{M \mid T=1}(m) - f_{M \mid T=0}(m) \right) (1+2m) dm \\
&= 2\left( \mathbb{E}_M[M \mid T=1] - \mathbb{E}_M[M \mid T=0] \right) \\
&= 2(1-0) \\
&= 2

\end{align*}
$$

In [247]:
U = np.random.normal(1, 1, n)
T = np.random.binomial(1, np.power(1+np.exp(-U), -1))
M = np.random.normal(T, 1, n)
Y = np.random.normal(U + 2*M, 1, n)
df4 = pd.DataFrame({"M": M, "T": T, "Y": Y}, index=np.arange(0,n))
df4.head()

Unnamed: 0,M,T,Y
0,3.013356,1,6.42879
1,-0.625273,1,-1.769808
2,1.670314,1,6.414448
3,1.133898,1,3.316626
4,1.433072,1,2.056731


In [248]:
# Must declare the latent variables last

bn4 = gum.BayesNet()

bn4.beginTopologyTransformation()

bn4.add(getDiscretizedVariable("M", -4,6, 10))
bn4.add(getDiscretizedVariable("Y", -4,6, 100))
bn4.add(gum.IntegerVariable("T", "T", [0,1]))
bn4.add(getDiscretizedVariable("U", -4,6, 3))

bn4.addArc("U", "T")
bn4.addArc("U", "Y")
bn4.addArc("T", "M")
bn4.addArc("M", "Y")

bn4.cpt("U").fillFromDistribution(norm, loc=1, scale=1)
bn4.cpt("T").fillFromDistribution(logistic, loc="U", scale=1)
bn4.cpt("M").fillFromDistribution(norm, loc="T", scale=1)
bn4.cpt("Y").fillFromDistribution(norm, loc="U + 2*M", scale=1)

ie = gum.LazyPropagation(bn4)

cpt_T = ie.posterior("T")
cpt_Y = ie.evidenceImpact("Y", ["T", "M"])
ie = None

bn4.eraseArc("U", "T")
bn4.eraseArc("U", "Y")
bn4.addArc("T", "Y")
bn4.erase("U")

bn4.cpt("T").fillWith(cpt_T)
bn4.cpt("Y").fillWith(cpt_Y)

bn4.endTopologyTransformation()

cslbn4 = csl.CausalModel(bn4)

cslbn4.addLatentVariable("u", ("T", "Y"))

cslbn4

In [249]:
getATEfromCBN(cslbn4)

1.8297935560297274

In [250]:
po4 = csl.CausalEffectEstimation(df4, cslbn4)
po4.identifyAdjustmentSet(intervention="T", outcome="Y")

Generalized Frontdoor adjustment found. 

Supported estimators include:
- CausalModelEstimator
- SimplePlugIn
- GeneralizedPlugIn


'Generalized Frontdoor'

In [251]:
print(po4)

<pyAgrum.causal.causalEffectEstimation._CausalEffectEstimation.CausalEffectEstimation object at 0x740731b43d90>

 Dataframe	: <pandas.core.frame.DataFrame object at 0x740722fd2560>
	- shape		: (1000000, 3)
	- columns	: Index(['M', 'T', 'Y'], dtype='object')
	- memory usage	: 32.0 MB
 Causal Model	: <pyAgrum.causal._CausalModel.CausalModel object at 0x740731b410f0>
	- names		: {0: 'M', 1: 'Y', 2: 'T', 3: 'u'}
	- causal BN	: BN{nodes: 4, arcs: 4, domainSize: 4000, dim: 2001, mem: 15Ko 848o}
	- observ. BN	: BN{nodes: 3, arcs: 3, domainSize: 2000, dim: 1999, mem: 15Ko 816o}
 Adjustment	: Generalized Frontdoor
 Intervention	: T
 Outcome	: Y
 Confounders	: set()
 Mediators	: {'M'}


In [252]:
po4.fitCausalBNEstimator()
po4.estimateCausalEffect()

1.7220767832694983

In [253]:
po4.fitSimplePlugIn()
po4.estimateCausalEffect()

2.0020399594750926

# Linear Generalized Frontdoor

$$
U \sim \mathcal{N}(1,1) \\
X_i \sim \mathcal{N}(1,1) \text{, } i \in [\![1,3]\!]\\
T \sim \mathcal{B}((1+e^{-(U + X_1 - 5X_3)})^{-1}) \\
M \sim \mathcal{N}(T + 2X_1 - X_2,1) \\
Y \sim \mathcal{N}(U + 2M + 3X_2 + X_3, 1)
$$

$$
\boldsymbol{X} =
\begin{pmatrix}
X_1\\
X_2\\
X_3
\end{pmatrix}
\sim
\mathcal{N}(
\begin{pmatrix}
1\\
1\\
1
\end{pmatrix}
,
\begin{pmatrix}
1 & 0 & 0\\
0 & 1 & 0\\
0 & 0 & 1
\end{pmatrix}
)
$$

$$
\begin{align*}

\tau &= \mathbb{E}[Y \mid do(T=1)] - \mathbb{E}[Y \mid do(T=0)] \\

&= \mathbb{E}_{\boldsymbol{X}}[ \underbrace{ \mathbb{E}[Y \mid do(T=1), \boldsymbol{X}] - \mathbb{E}[Y \mid do(T=0), \boldsymbol{X}] }_{2} ] \\

&= \mathbb{E}_{\boldsymbol{X}}[2]\\

&= 2
\end{align*}
$$

In [254]:
U = np.random.normal(1, 1, n)
X1 = np.random.normal(1, 1, n)
X2 = np.random.normal(1, 1, n)
X3 = np.random.normal(1, 1, n)
T = np.random.binomial(1, np.power(1+np.exp(-(U + X1 - 5*X3)), -1))
M = np.random.normal(T + 2*X1 - X2, 1, n)
Y = np.random.normal(U + 2*M + 3*X2 + X3, 1, n)
df5 = pd.DataFrame({"U": U, "X1": X1, "X2": X2, "X3": X3, "M": M, "T": T, "Y":Y}, index=np.arange(0,n))
df5.head()

Unnamed: 0,U,X1,X2,X3,M,T,Y
0,-1.042895,1.827976,0.51893,1.163559,4.35804,0,10.373389
1,1.01156,0.980215,1.41585,-0.316938,0.553328,1,5.829744
2,-0.122977,0.780433,0.811936,0.331055,0.420179,0,3.147091
3,1.335029,0.619752,1.44623,2.194042,0.068502,0,5.287631
4,0.152861,-0.600183,1.445204,0.768437,-2.827581,0,0.089457


In [255]:
bn5 = gum.BayesNet()

bn5.beginTopologyTransformation()

bn5.add(getDiscretizedVariable("X1", -4,6, 3))
bn5.add(getDiscretizedVariable("X2", -4,6, 3))
bn5.add(getDiscretizedVariable("X3", -4,6, 3))
bn5.add(getDiscretizedVariable("M", -4,6, 10))
bn5.add(getDiscretizedVariable("Y", -4,6, 100))
bn5.add(gum.IntegerVariable("T", "T", [0,1]))
bn5.add(getDiscretizedVariable("U", -4,6, 3))

bn5.addArc("U", "T")
bn5.addArc("U", "Y")

bn5.addArc("X1", "T")
bn5.addArc("X1", "M")

bn5.addArc("X2", "M")
bn5.addArc("X2", "Y")

bn5.addArc("X3", "T")
bn5.addArc("X3", "Y")

bn5.addArc("T", "M")
bn5.addArc("M", "Y")

bn5.cpt("U").fillFromDistribution(norm, loc=1, scale=1)
bn5.cpt("X1").fillFromDistribution(norm, loc=1, scale=1)
bn5.cpt("X2").fillFromDistribution(norm, loc=1, scale=1)
bn5.cpt("X3").fillFromDistribution(norm, loc=1, scale=1)
bn5.cpt("T").fillFromDistribution(logistic, loc="U + X1 - 5*X3", scale=1)
bn5.cpt("M").fillFromDistribution(norm, loc="T + 2*X1 - X2", scale=1)
bn5.cpt("Y").fillFromDistribution(norm, loc="U + 2*M + 3*X2 + X3", scale=1)

ie = gum.LazyPropagation(bn5)

cpt_T = ie.evidenceImpact("T",["X1", "X3"])
cpt_Y = ie.evidenceImpact("Y", ["T", "M", "X2", "X3"])
ie = None

bn5.eraseArc("U", "T")
bn5.eraseArc("U", "Y")
bn5.addArc("T", "Y")
bn5.erase("U")

bn5.cpt("T").fillWith(cpt_T)
bn5.cpt("Y").fillWith(cpt_Y)

bn5.endTopologyTransformation()

cslbn5 = csl.CausalModel(bn5)

cslbn5.addLatentVariable("u", ("T", "Y"))

cslbn5

In [256]:
getATEfromCBN(cslbn5)

0.4114089805885899

In [257]:
po5 = csl.CausalEffectEstimation(df5, cslbn5)
po5.identifyAdjustmentSet(intervention="T", outcome="Y")

Generalized Frontdoor adjustment found. 

Supported estimators include:
- CausalModelEstimator
- SimplePlugIn
- GeneralizedPlugIn


'Generalized Frontdoor'

In [258]:
print(po5)

<pyAgrum.causal.causalEffectEstimation._CausalEffectEstimation.CausalEffectEstimation object at 0x7407317843d0>

 Dataframe	: <pandas.core.frame.DataFrame object at 0x740731b40fd0>
	- shape		: (1000000, 7)
	- columns	: Index(['U', 'X1', 'X2', 'X3', 'M', 'T', 'Y'], dtype='object')
	- memory usage	: 64.0 MB
 Causal Model	: <pyAgrum.causal._CausalModel.CausalModel object at 0x740731a6a620>
	- names		: {0: 'X1', 1: 'X2', 2: 'X3', 3: 'M', 4: 'Y', 5: 'T', 6: 'u'}
	- causal BN	: BN{nodes: 7, arcs: 10, domainSize: 108000, dim: 18007, mem: 142Ko 408o}
	- observ. BN	: BN{nodes: 6, arcs: 9, domainSize: 54000, dim: 17997, mem: 142Ko 248o}
 Adjustment	: Generalized Frontdoor
 Intervention	: T
 Outcome	: Y
 Confounders	: {'X2', 'X1'}
 Mediators	: {'M'}


In [259]:
po5.fitCausalBNEstimator()
po5.estimateCausalEffect()

0.7339056341354395

In [260]:
po5.fitSimplePlugIn()
po5.estimateCausalEffect()

2.442240024000153

In [261]:
po5.fitSimplePlugIn(learner="XGBRegressor")
po5.estimateCausalEffect()

2.4418838687932167

In [262]:
po5.fitGeneralizedPlugIn()
po5.estimateCausalEffect()

1.9939266227328598

# Linear Binary Instrumental Variable

In [263]:
U = np.random.normal(1, 1, n)
W = np.random.binomial(1, 0.3, n)
T = np.random.binomial(1, np.power(1+np.exp(- (U - 2*W)), -1))
Y = np.random.normal(U + 2*T, 1, n)
df6 = pd.DataFrame({"W": W, "T": T, "Y":Y}, index=np.arange(0,n))
df6.head()

Unnamed: 0,W,T,Y
0,1,1,3.017251
1,0,0,1.434015
2,1,0,-0.463321
3,0,1,2.354191
4,0,0,1.412879


In [264]:
bn6 = gum.BayesNet()

bn6.beginTopologyTransformation()

bn6.add(gum.IntegerVariable("W", "W", [0,1]))
bn6.add(getDiscretizedVariable("Y", -4,6, 100))
bn6.add(gum.IntegerVariable("T", "T", [0,1]))
bn6.add(getDiscretizedVariable("U", -4,6, 3))

bn6.addArc("U", "T")
bn6.addArc("U", "Y")
bn6.addArc("W", "T")
bn6.addArc("T", "Y")

bn6.cpt("U").fillFromDistribution(norm, loc=1, scale=1)
bn6.cpt("W")[:] = [0.7, 0.3]
bn6.cpt("T").fillFromDistribution(logistic, loc="U - 2*W", scale=1)
bn6.cpt("Y").fillFromDistribution(norm, loc="U + 2*T", scale=1)

ie = gum.LazyPropagation(bn6)

cpt_T = ie.evidenceImpact("T",["W"])
cpt_Y = ie.evidenceImpact("Y", ["T"])
ie = None

bn6.eraseArc("U", "T")
bn6.eraseArc("U", "Y")
bn6.erase("U")

bn6.cpt("T").fillWith(cpt_T)
bn6.cpt("Y").fillWith(cpt_Y)

bn6.endTopologyTransformation()

cslbn6 = csl.CausalModel(bn6)

cslbn6.addLatentVariable("u", ("T", "Y"))
cslbn6.addCausalArc("T", "Y")

cslbn6

The causal effect is not identifiable using do-Calculus.

In [265]:
po6 = csl.CausalEffectEstimation(df6, cslbn6)
po6.identifyAdjustmentSet(intervention="T", outcome="Y")

Generalized Instrumental Variable adjustment found. 

Supported estimators include:
- CausalModelEstimator
- Wald
- WaldIPW
- NormalizedWaldIPW
- TSLS


'Generalized Instrumental Variable'

In [266]:
print(po6)

<pyAgrum.causal.causalEffectEstimation._CausalEffectEstimation.CausalEffectEstimation object at 0x7407321e2e90>

 Dataframe	: <pandas.core.frame.DataFrame object at 0x740731b43250>
	- shape		: (1000000, 3)
	- columns	: Index(['W', 'T', 'Y'], dtype='object')
	- memory usage	: 32.0 MB
 Causal Model	: <pyAgrum.causal._CausalModel.CausalModel object at 0x7407321e0040>
	- names		: {0: 'W', 1: 'Y', 2: 'T', 3: 'u'}
	- causal BN	: BN{nodes: 4, arcs: 4, domainSize: 800, dim: 402, mem: 3Ko 224o}
	- observ. BN	: BN{nodes: 3, arcs: 2, domainSize: 400, dim: 201, mem: 1Ko 624o}
 Adjustment	: Generalized Instrumental Variable
 Intervention	: T
 Outcome	: Y
 Confounders	: set()
 Instrument	: W


In [229]:
po6.fitCausalBNEstimator()
po6.estimateCausalEffect()

2.7097575714427675

In [230]:
po6.fitWald()
po6.estimateCausalEffect()

1.9928717348619418

In [231]:
po6.fitTSLS()
po6.estimateCausalEffect()

1.9928717348619502

# Linear Continous Instrumental Variable

In [232]:
U = np.random.normal(1, 1, n)
W = np.random.normal(1, 1, n)
T = np.random.binomial(1, np.power(1+np.exp(- (U - 2*W)), -1))
Y = np.random.normal(U + 2*T, 1, n)
df7 = pd.DataFrame({"U": U, "W": W, "T": T, "Y":Y}, index=np.arange(0,n))
df7.head()

Unnamed: 0,U,W,T,Y
0,1.674083,0.808387,0,3.991843
1,2.21888,0.035708,1,4.590935
2,1.275041,0.593989,1,4.294753
3,2.098737,0.323299,1,5.875829
4,1.572824,1.249767,1,3.247934


In [267]:
bn7 = gum.BayesNet()

bn7.beginTopologyTransformation()

bn7.add(getDiscretizedVariable("W", -4,6, 10))
bn7.add(getDiscretizedVariable("Y", -4,6, 100))
bn7.add(gum.IntegerVariable("T", "T", [0,1]))
bn7.add(getDiscretizedVariable("U", -4,6, 3))

bn7.addArc("U", "T")
bn7.addArc("U", "Y")
bn7.addArc("W", "T")
bn7.addArc("T", "Y")

bn7.cpt("U").fillFromDistribution(norm, loc=1, scale=1)
bn7.cpt("W").fillFromDistribution(norm, loc=1, scale=1)
bn7.cpt("T").fillFromDistribution(logistic, loc="U - 2*W", scale=1)
bn7.cpt("Y").fillFromDistribution(norm, loc="U + 2*T", scale=1)

ie = gum.LazyPropagation(bn7)

cpt_T = ie.evidenceImpact("T",["W"])
cpt_Y = ie.evidenceImpact("Y", ["T"])
ie = None

bn7.eraseArc("U", "T")
bn7.eraseArc("U", "Y")
bn7.erase("U")

bn7.cpt("T").fillWith(cpt_T)
bn7.cpt("Y").fillWith(cpt_Y)

bn7.endTopologyTransformation()

cslbn7 = csl.CausalModel(bn7)

cslbn7.addLatentVariable("u", ("T", "Y"))
cslbn7.addCausalArc("T", "Y")

cslbn7

In [268]:
po7 = csl.CausalEffectEstimation(df7, cslbn7)
po7.identifyAdjustmentSet(intervention="T", outcome="Y")

Generalized Instrumental Variable adjustment found. 

Supported estimators include:
- CausalModelEstimator
- Wald
- WaldIPW
- NormalizedWaldIPW
- TSLS


'Generalized Instrumental Variable'

In [269]:
print(po7)

<pyAgrum.causal.causalEffectEstimation._CausalEffectEstimation.CausalEffectEstimation object at 0x74073240bd00>

 Dataframe	: <pandas.core.frame.DataFrame object at 0x74072bf81e70>
	- shape		: (1000000, 4)
	- columns	: Index(['U', 'W', 'T', 'Y'], dtype='object')
	- memory usage	: 40.0 MB
 Causal Model	: <pyAgrum.causal._CausalModel.CausalModel object at 0x74073240b0d0>
	- names		: {0: 'W', 1: 'Y', 2: 'T', 3: 'u'}
	- causal BN	: BN{nodes: 4, arcs: 4, domainSize: 4000, dim: 426, mem: 3Ko 544o}
	- observ. BN	: BN{nodes: 3, arcs: 2, domainSize: 2000, dim: 217, mem: 1Ko 816o}
 Adjustment	: Generalized Instrumental Variable
 Intervention	: T
 Outcome	: Y
 Confounders	: set()
 Instrument	: W


In [270]:
po7.fitCausalBNEstimator()
po7.estimateCausalEffect()

2.5592847029162487

In [271]:
po7.fitTSLS()
po7.estimateCausalEffect()

2.0054326275284566

# Linear Binary Instrumental Variable with Covariates

In [272]:
U = np.random.normal(1, 1, n)
X1 = np.random.normal(1, 1, n)
X2 = np.random.normal(1, 1, n)
X3 = np.random.normal(1, 1, n)
W = np.random.binomial(1, np.power(1+np.exp(-(5 - 4*X1 + X3)), -1))
T = np.random.binomial(1, np.power(1+np.exp(-(U - 2*W + 3*X1 + X2)), -1))
Y = np.random.normal(U + 2*T - 4*X3, 1, n)
df8 = pd.DataFrame({"W": W, "X1": X1, "X2": X2, "X3": X3, "T": T, "Y":Y}, index=np.arange(0,n))
df8.head()

Unnamed: 0,W,X1,X2,X3,T,Y
0,1,0.096666,0.417926,0.418575,0,-0.255187
1,0,2.273919,2.721355,0.88683,1,-1.152904
2,0,2.21851,1.731742,-0.37338,1,5.837515
3,1,-0.539854,1.414256,1.957047,0,-7.377389
4,0,0.924187,1.92022,0.700337,1,2.404231


In [273]:
bn8 = gum.BayesNet()

bn8.beginTopologyTransformation()

bn8.add(gum.IntegerVariable("W", "W", [0,1]))
bn8.add(getDiscretizedVariable("Y", -4,6, 100))
bn8.add(gum.IntegerVariable("T", "T", [0,1]))
bn8.add(getDiscretizedVariable("X1", -4,6, 3))
bn8.add(getDiscretizedVariable("X2", -4,6, 3))
bn8.add(getDiscretizedVariable("X3", -4,6, 3))
bn8.add(getDiscretizedVariable("U", -4,6, 3))

bn8.addArc("U", "T")
bn8.addArc("U", "Y")
bn8.addArc("W", "T")
bn8.addArc("T", "Y")

bn8.addArc("X1", "W")
bn8.addArc("X1", "T")
bn8.addArc("X2", "T")
bn8.addArc("X2", "Y")
bn8.addArc("X3", "W")
bn8.addArc("X3", "Y")

bn8.cpt("U").fillFromDistribution(norm, loc=1, scale=1)
bn8.cpt("X1").fillFromDistribution(norm, loc=1, scale=1)
bn8.cpt("X2").fillFromDistribution(norm, loc=1, scale=1)
bn8.cpt("X3").fillFromDistribution(norm, loc=1, scale=1)
bn8.cpt("W").fillFromDistribution(logistic, loc="5 - 4*X1 + X3", scale=1)
bn8.cpt("T").fillFromDistribution(logistic, loc="U - 2*W + 3*X1 + X2", scale=1)
bn8.cpt("Y").fillFromDistribution(norm, loc="U + 2*T - 4*X3 + X2", scale=1)

ie = gum.LazyPropagation(bn8)

cpt_T = ie.evidenceImpact("T", ["W", "X1", "X2"])
cpt_Y = ie.evidenceImpact("Y", ["T", "X3", "X2"])
ie = None

bn8.eraseArc("U", "T")
bn8.eraseArc("U", "Y")
bn8.erase("U")

bn8.cpt("T").fillWith(cpt_T)
bn8.cpt("Y").fillWith(cpt_Y)

bn8.endTopologyTransformation()

cslbn8 = csl.CausalModel(bn8)

cslbn8.addLatentVariable("u", ("T", "Y"))
cslbn8.addCausalArc("T", "Y")

cslbn8

In [274]:
po8 = csl.CausalEffectEstimation(df8, cslbn8)
po8.identifyAdjustmentSet(intervention="T", outcome="Y")

Generalized Instrumental Variable adjustment found. 

Supported estimators include:
- CausalModelEstimator
- Wald
- WaldIPW
- NormalizedWaldIPW
- TSLS


'Generalized Instrumental Variable'

In [275]:
print(po8)

<pyAgrum.causal.causalEffectEstimation._CausalEffectEstimation.CausalEffectEstimation object at 0x740732409ff0>

 Dataframe	: <pandas.core.frame.DataFrame object at 0x7407324092a0>
	- shape		: (1000000, 6)
	- columns	: Index(['W', 'X1', 'X2', 'X3', 'T', 'Y'], dtype='object')
	- memory usage	: 56.0 MB
 Causal Model	: <pyAgrum.causal._CausalModel.CausalModel object at 0x74073241c3a0>
	- names		: {0: 'W', 1: 'Y', 2: 'T', 3: 'X1', 4: 'X2', 5: 'X3', 6: 'u'}
	- causal BN	: BN{nodes: 7, arcs: 10, domainSize: 21600, dim: 3616, mem: 28Ko 936o}
	- observ. BN	: BN{nodes: 6, arcs: 8, domainSize: 10800, dim: 1815, mem: 14Ko 568o}
 Adjustment	: Generalized Instrumental Variable
 Intervention	: T
 Outcome	: Y
 Confounders	: {'X3'}
 Instrument	: W


In [276]:
po8.fitCausalBNEstimator()
po8.estimateCausalEffect()

2.731222804273997

In [277]:
po8.fitTSLS()
po8.estimateCausalEffect()

-1.0041099752935734

In [278]:
po8.fitWaldIPW()
po8.estimateCausalEffect()

2.0173544503239786

In [279]:
po8.fitNormalizedWaldIPW()
po8.estimateCausalEffect()

2.013237495868846

In [280]:
po8.fitNormalizedWaldIPW(iv_probability_learner="XGBClassifier")
po8.estimateCausalEffect()

1.9961405217093806