In [18]:
import dowhy
from dowhy import CausalModel
from IPython.display import Image, display
import pandas as pd

# a utility function to parse the .gml file to string
def gml_to_string(file):
    gml_str = ''
    with open(file, 'r') as file:
        for line in file:
            gml_str += line.rstrip()
    return gml_str


def dowhy_backdoor(file):

    # creating arbitrary data
    df = pd.read_stata("data/close_college.dta")

    gml_graph = gml_to_string(file)
    # With GML string
    model=CausalModel(
        data = df,
        treatment='educ',
        outcome='lwage',
        graph=gml_graph
    )

    identified_estimand = model.identify_effect()
    print(identified_estimand)
    return model
# model = dowhy_backdoor('graph_files/dag1.gml')


In [19]:
import os
def prove_causality(file):
    model = dowhy_backdoor(file)
    identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
    adjustment_set = identified_estimand.get_backdoor_variables()
    causal_estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.linear_regression")
    
    return adjustment_set, causal_estimate.value


def get_causal_df_for_all_DAGs(folder):
    files = os.listdir(folder)

    causal_df = pd.DataFrame(columns=['DAG', 'estimate', 'adjustment_set'])
    
    for file in files:
        adj_set, estimate = prove_causality(f'{folder}/{file}')
        causal_df.loc[len(causal_df)] = {'DAG':file, 'estimate': estimate, 'adjustment_set':adj_set}

    return causal_df


#### PC with prior knowledge CPDAG

In [20]:
get_causal_df_for_all_DAGs("graph_files/PC_CPDAG_graphs")

Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
   d                                
───────(E[lwage|exper,nearc4,black])
d[educ]                             
Estimand assumption 1, Unconfoundedness: If U→{educ} and U→lwage then P(lwage|educ,exper,nearc4,black,U) = P(lwage|educ,exper,nearc4,black)

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: frontdoor
No such variable(s) found!

Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
   d                                
───────(E[lwage|exper,nearc4,black])
d[educ]                             
Estimand assumption 1, Unconfoundedness: If U→{educ} and U→lwage then P(lwage|educ,exper,nearc4,black,U) = P(lwage|educ,exper,nearc4,black)

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: frontdoor
No such variable(s) found!

Estimand typ

Unnamed: 0,DAG,estimate,adjustment_set
0,dag1.gml,0.078918,"[exper, nearc4, black]"
1,dag10.gml,0.074561,"[exper, nearc4, black]"
2,dag2.gml,0.078918,"[exper, nearc4, black]"
3,dag3.gml,0.078918,"[exper, nearc4, black]"
4,dag4.gml,0.078918,"[exper, nearc4, black]"
5,dag5.gml,0.078918,"[exper, nearc4, black]"
6,dag6.gml,0.07713,"[exper, nearc4, black]"
7,dag7.gml,0.075967,"[exper, nearc4, black]"
8,dag8.gml,0.075967,"[exper, nearc4, black]"
9,dag9.gml,0.075967,"[exper, nearc4, black]"


### PC without prior knowledge CPDAG

In [21]:
get_causal_df_for_all_DAGs("graph_files/PC_withoutPK_CPDAG_graphs")

Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
   d                                
───────(E[lwage|exper,nearc4,black])
d[educ]                             
Estimand assumption 1, Unconfoundedness: If U→{educ} and U→lwage then P(lwage|educ,exper,nearc4,black,U) = P(lwage|educ,exper,nearc4,black)

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: frontdoor
No such variable(s) found!

Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
   d                                
───────(E[lwage|exper,nearc4,black])
d[educ]                             
Estimand assumption 1, Unconfoundedness: If U→{educ} and U→lwage then P(lwage|educ,exper,nearc4,black,U) = P(lwage|educ,exper,nearc4,black)

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: frontdoor
No such variable(s) found!

Estimand typ

Unnamed: 0,DAG,estimate,adjustment_set
0,dag1.gml,0.078918,"[exper, nearc4, black]"
1,dag10.gml,0.074561,"[exper, nearc4, black]"
2,dag2.gml,0.078918,"[exper, nearc4, black]"
3,dag3.gml,0.078918,"[exper, nearc4, black]"
4,dag4.gml,0.078918,"[exper, nearc4, black]"
5,dag5.gml,0.078918,"[exper, nearc4, black]"
6,dag6.gml,0.07713,"[exper, nearc4, black]"
7,dag7.gml,0.075967,"[exper, nearc4, black]"
8,dag8.gml,0.075967,"[exper, nearc4, black]"
9,dag9.gml,0.075967,"[exper, nearc4, black]"


#### GES

In [22]:
get_causal_df_for_all_DAGs("graph_files/GES_CPDAG_graphs")

No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.
No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.
No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.
No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.
No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.
No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.
No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.
No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.
No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.
No directed path from ['educ'] to ['lwage'] in the causal graph.
Causal effect is zero.


Unnamed: 0,DAG,estimate,adjustment_set
0,dag1.gml,0,[]
1,dag10.gml,0,[]
2,dag2.gml,0,[]
3,dag3.gml,0,[]
4,dag4.gml,0,[]
5,dag5.gml,0,[]
6,dag6.gml,0,[]
7,dag7.gml,0,[]
8,dag8.gml,0,[]
9,dag9.gml,0,[]
