# DAGs: D-Separation and Conditonal Independencies, Adjustment via Backdoor and Swigs, Equivalence Classes, Falsifiability Tests.


In [None]:
!pip install pgmpy

In [None]:
import warnings
warnings.simplefilter('ignore')

# Graph Generation and Plotting

The following DAG is due to Judea Pearl

In [None]:
import networkx as nx

digraph = nx.DiGraph([('Z1','X1'),
                    ('X1','D'),
                    ('Z1','X2'),
                    ('Z2','X3'),
                    ('X3','Y'),
                    ('Z2','X2'),
                    ('X2', 'Y'),
                    ('X2', 'D'),
                    ('M', 'Y'),
                    ('D', 'M')])

In [None]:
from pgmpy.base.DAG import DAG

G = DAG(digraph)

In [None]:
import pylab as plt

nx.draw_planar(G, with_labels=True)
plt.show()

In [None]:
print(list(G.predecessors("X2")))
print(list(G.successors("X2")))
print(list(nx.ancestors(G, "X2")))
print(list(nx.descendants(G, "X2")))

# Find Paths Between D and Y




In [None]:
list(nx.all_simple_paths(G.to_undirected(), "D", "Y"))

# List All Testable Implications of the Model

Here we use D-separation to list all the conditional independence relations deduced from the DAG.

In [None]:
# these returns all conditional independencies even among two sets of variables
# conditional on a third set
dseps = G.get_independencies()

In [None]:
# we display only the ones that correpond to pairs of singletons
for dsep in dseps.get_assertions():
    if len(dsep.get_assertion()[1])==1:
        print(dsep)

# Backdoor and frontdoor adjustments between D and Y

In [None]:
from pgmpy.models.BayesianModel import BayesianNetwork
from pgmpy.inference.CausalInference import CausalInference

inference = CausalInference(BayesianNetwork(G))

In [None]:
inference.get_all_backdoor_adjustment_sets('D', 'Y')

In [None]:
inference.get_all_frontdoor_adjustment_sets('D', 'Y')

In [None]:
inference.get_minimal_adjustment_set('D', 'Y')

In [None]:
inference.is_valid_backdoor_adjustment_set('D', 'Y', 'X2')

# Testing DAG Validity by Checking Implied Conditional Independencies with DoWhy-GCM

We found all the implied conditional independence relations above. Can we test them?  The answer is yes, and is particularly simple if the DAG is associated with a linear SEM.

To illustrate, we simulate the data from a Linear SEM associated to DAG G, and perform a test of conditional independence restrictions, using a kernel based test.

In [None]:
!apt install libgraphviz-dev
!pip install pygraphviz
!pip install dowhy

In [None]:
# generate data from the SCM
import numpy as np
import pandas as pd

def gen_data(n):
    Z1 = np.random.normal(0, 1, size=n)
    Z2 = np.random.normal(0, 1, size=n)
    X1 = Z1 + np.random.normal(0, 1, size=n)
    X2 = Z1 + Z2 + np.random.normal(0, 1, size=n)
    X3 = Z2 + np.random.normal(0, 1, size=n)
    D = X1 + X2 + np.random.normal(0, 1, size=n)
    M = D + np.random.normal(0, 1, size=n)
    Y = M + X2 + X3 + np.random.normal(0, 1, size=n)
    return pd.DataFrame({'Z1': Z1, 'Z2': Z2, 'X1': X1, 'X2': X2, 'X3': X3, 'D': D, 'M': M, 'Y': Y})

In [None]:
data = gen_data(5000)

In [None]:
import dowhy.gcm as gcm
from dowhy.gcm.independence_test import kernel_based, regression_based

causal_model = gcm.StructuralCausalModel(digraph)
rej = gcm.refute_causal_structure(causal_model.graph, data, conditional_independence_test=kernel_based)

The rejection result contains two types of tests:
- local markov tests: that test whether any node is independent of its non-descendants conditional on its parents (these also imply global markov conditions)
- edge dependence tests: these test that the edges that were included in the graph carry sufficient amount of correlation.

In our case, we will only be focusing on the local markov tests. We will reject the graph if any of the local markov tests fails, but we will ignore the other tests.

In [None]:
rej

Next we replace $D$ by $\bar D$ generated differently:
$$
\bar D= (D + Y)/2
$$
Basically $\bar D$ is an average of $D$ and $Y$ generated by $D$.  We then test if the resulting collection of random variables satisifes conditional indepdendence restrictions, exploiting linearity.  We end up rejecting these restrictions and therefore the validity of this model for the data generated in this way.  This makes sense, because the new data no longer obeys the previous DAG structure.



In [None]:
data['D'] = (data['D'] + data['Y'])/2

In [None]:
rej = gcm.refute_causal_structure(causal_model.graph, data, conditional_independence_test=kernel_based)

In [None]:
rej

# Identification with Front-Door and DoWhy

In [None]:
data = gen_data(5000)

In [None]:
import dowhy

causal_graph = """digraph {
Z1; Z2; X1; X2; X3; D; M; Y;
Z1 -> X1;
X1 -> D;
Z1 -> X2;
Z2 -> X3;
X3 -> Y;
Z2 -> X2;
X2 -> Y;
X2 -> D;
M -> Y;
D -> M;
}"""

In [None]:
cm = dowhy.CausalModel(data = data,
        treatment='D',
        outcome='Y',
        graph=causal_graph)

In [None]:
from PIL import Image

cm.view_model(file_name='dag')
Image.open('dag.png')

In [None]:
# method seems not to be doing false discovery rate (multiple testing/joint inference) control
# some tests might fail because of that when k is large (k=size of conditioning set)
print(cm.refute_graph(k=2))

In [None]:
identified_estimand = cm.identify_effect(proceed_when_unidentifiable=True)
print(identified_estimand)

In [None]:
estimate = cm.estimate_effect(identified_estimand, method_name='frontdoor.two_stage_regression')

In [None]:
print(estimate)