In [79]:
import numpy as np
from pgmpy.metrics import correlation_score
from pgmpy.base import DAG
from pgmpy.models import BayesianNetwork
import networkx as nx
from scipy.stats import chi2_contingency
import pandas as pd

import bnlearn as bn
import matplotlib.pyplot as plt

In [80]:
datasets = ["asia", "cancer","child","insurance"]
threshs = [0.1, 0.2, 0.3]

In [81]:
def SHD(g1, g2):
    return np.sum(np.abs(g1 - g2))

In [82]:
def format_adj(adj, thresh = 0.1):
    adj[np.abs(adj) < thresh] = 0
    modified_adj = np.zeros_like(adj)
    for i in range(len(adj)):
        for j in range(i, len(adj)):
            if adj[i][j] > adj[j][i]:
                modified_adj[i][j] = 1
            elif adj[i][j] < adj[j][i]:
                modified_adj[j][i] = 1
    return modified_adj

In [83]:
def generate_df(file_name):
    model = bn.import_DAG("BIFs/" + file_name + ".bif")
    df = bn.sampling(model, n=10000)
    nodes_orig = df.columns
    return df

In [84]:
seed = 914013600
data_metrics = []
for i in range(len(datasets)):
    file_names = [datasets[i]+"_trueG",datasets[i]+"_predG"]

    np.random.seed(seed)
    data = generate_df(datasets[i])
    
    true_g = np.loadtxt("dag_gnn_matrices/"+file_names[0])
    pred_g = np.loadtxt("dag_gnn_matrices/"+file_names[1])

    for j in range(len(threshs)):
        frmt_adj = pred_g
        frmt_adj[np.abs(frmt_adj) < threshs[j]] = 0
        g = nx.DiGraph(frmt_adj)
        g = nx.relabel_nodes(
            g, {
                i:data.columns[i] for i in range(len(frmt_adj))
            }
        )
        corr_score = correlation_score(DAG(frmt_adj), pd.DataFrame(data.to_numpy()))
        print("cor_score = ", corr_score)
        frmt_adj = format_adj(pred_g, threshs[j])
        shd = SHD(frmt_adj, true_g)
        print("SHD = ", shd)
        data_metrics.append([
            datasets[i], threshs[j], corr_score, shd
        ])

[bnlearn] >Import <BIFs/asia.bif>
[bnlearn] >Loading bif file <BIFs/asia.bif>
[bnlearn] >Check whether CPDs sum up to one.
cor_score =  0.0
SHD =  18.0
cor_score =  0.18181818181818182
SHD =  16.0
cor_score =  0.18181818181818182
SHD =  13.0
[bnlearn] >Import <BIFs/cancer.bif>
[bnlearn] >Loading bif file <BIFs/cancer.bif>
[bnlearn] >Check whether CPDs sum up to one.
cor_score =  0.8
SHD =  3.0
cor_score =  0.8
SHD =  3.0
cor_score =  0.75
SHD =  4.0
[bnlearn] >Import <BIFs/child.bif>
[bnlearn] >Loading bif file <BIFs/child.bif>




[bnlearn] >Check whether CPDs sum up to one.
[bnlearn] >CPD [Age] does not add up to 1 but is: [[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]
[bnlearn] >CPD [ChestXray] does not add up to 1 but is: [[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[bnlearn] >CPD [HypoxiaInO2] does not add up to 1 but is: [[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[bnlearn] >CPD [LowerBodyO2] does not add up to 1 but is: [[1. 1. 1.]
 [1. 1. 1.]]
[bnlearn] >CPD [RUQO2] does not add up to 1 but is: [1. 1. 1.]
[bnlearn] >CPD [XrayReport] does not add up to 1 but is: [1. 1. 1. 1. 1.]
cor_score =  0.0
SHD =  52.0
cor_score =  0.3252032520325203
SHD =  32.0
cor_score =  0.29411764705882354
SHD =  25.0
[bnlearn] >Import <BIFs/insurance.bif>
[bnlearn] >Loading bif file <BIFs/insurance.bif>




[bnlearn] >Check whether CPDs sum up to one.
[bnlearn] >CPD [Accident] does not add up to 1 but is: [[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]
[bnlearn] >CPD [CarValue] does not add up to 1 but is: [[[1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]]]
[bnlearn] >CPD [DrivHist] does not add up to 1 but is: [[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[bnlearn] >CPD [DrivingSkill] does not add up to 1 but is: [[1. 1.]
 [1. 1.]
 [1. 1.]]
[bnlearn] >CPD [HomeBase] does not add up to 1 but is: [[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[bnlearn] >CPD [MedCost] does not add up to 1 but is: [[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]]
[bnlearn]

In [85]:
metrics = pd.DataFrame(data_metrics, columns=["Dataset","Thresholds", "Correlation Score","SHD"])

In [86]:
metrics.to_csv("DAG_GNN_metrics.csv")

In [87]:
metrics

Unnamed: 0,Dataset,Thresholds,Correlation Score,SHD
0,asia,0.1,0.0,18.0
1,asia,0.2,0.181818,16.0
2,asia,0.3,0.181818,13.0
3,cancer,0.1,0.8,3.0
4,cancer,0.2,0.8,3.0
5,cancer,0.3,0.75,4.0
6,child,0.1,0.0,52.0
7,child,0.2,0.325203,32.0
8,child,0.3,0.294118,25.0
9,insurance,0.1,0.0,106.0
