## Evaluating `pyagrum` inferences on CN

In [1]:
import sys
import os
sys.path.append(os.path.abspath("../src"))
from utils import *
from run import *
import random
import numpy as np
import pandas as pd
import math
import pyagrum as gum
import pyagrum.lib.notebook as gnb

In [2]:
# Set seeds
random.seed(42)
gum.initRandom(seed=42)

In [3]:
bn=gum.fastBN("X1[2]<-T[2]->X2[2]; T->X3[2]; T->X4[2]; T->X5[2]; T->X6[2]; T->X7[2]; T->X8[2]; T->X9[2]; T->X0[2];")
gnb.flow.row(bn, bn.cpt("T"), bn.cpt("X1"))

T,T
0,1
0.0332,0.9668

Unnamed: 0_level_0,X1,X1
T,0,1
0,0.5385,0.4615
1,0.4896,0.5104


In [4]:
# Generate data from BN
gpop_ss = 5000
ratio = 6
pool_ss = gpop_ss // ratio
rpop_ss = gpop_ss - pool_ss
g = gum.BNDatabaseGenerator(bn)
g.drawSamples(gpop_ss)
g.setDiscretizedLabelModeRandom()
gpop = g.to_pandas()
pool_idx = np.random.choice(gpop_ss, replace=False, size=pool_ss)
pool = gpop.iloc[pool_idx]
rpop = gpop.iloc[~ gpop.index.isin(pool_idx)]

In [5]:
# Estimate CN by local IDM from BN(theta_hat)
ess = 5
bn_copy = gum.BayesNet(bn)
add_counts_to_bn(bn_copy, pool)
cn = gum.CredalNet(bn_copy)
cn.idmLearning(ess)

In [6]:
# Evidence list
evid_vec = [random_product(*((0,1) for _ in range(bn.size() - 1))) for _ in range(100)]

In [7]:
bn_ie = gum.LazyPropagation(bn)
e = evid_vec[10]
cov = sorted([i for i in bn.names()])
cov.remove("T")
evid = {k:v for k,v in zip(cov, e)}
evid

{'X0': 1,
 'X1': 1,
 'X2': 1,
 'X3': 0,
 'X4': 0,
 'X5': 1,
 'X6': 1,
 'X7': 0,
 'X8': 1,
 'X9': 1}

In [8]:
# BN inferences
bn_mpes, bn_probs = run_inference_bn(bn, evid_vec)

# Pyagrum inferences
cn_mpes, cn_probs = run_inference_cn_pyagrum(cn, evid_vec)
cn_probs = np.array(cn_probs)

print(f"CN certainty (pyagrum): {sum(cn_probs > 0.5) * 100/ len(cn_probs):.1f} %")

# No-pyagrum inferences
my_cn_mpes, my_cn_log_probs = run_inference_cn(cn, evid_vec)
my_cn_probs = np.array([math.exp(p) for p in my_cn_log_probs])

print(f"CN certainty (no pyagrum): {sum(my_cn_probs > 0.5) * 100/ len(my_cn_probs):.1f} %")

CN certainty (pyagrum): 98.0 %
CN certainty (no pyagrum): 79.0 %


In [9]:
results = pd.DataFrame(
    {"bn_mpes": bn_mpes,
     "bn_probs": bn_probs,
     "py_mpes": cn_mpes, 
     "py_probs": cn_probs, 
     "my_mpes": my_cn_mpes,
     "my_probs": my_cn_probs}
)

results

Unnamed: 0,bn_mpes,bn_probs,py_mpes,py_probs,my_mpes,my_probs
0,1,0.999800,1,0.994443,1,0.994444
1,1,0.959044,1,0.624536,1,0.624538
2,1,0.949950,1,0.782433,1,0.782434
3,1,0.909082,1,0.783721,1,0.783722
4,1,0.987497,1,0.805699,1,0.805700
...,...,...,...,...,...,...
95,1,0.984942,1,0.933517,1,0.933518
96,1,0.995730,1,0.968381,1,0.968382
97,1,0.873905,0,0.665917,1,0.300374
98,1,0.998249,1,0.974472,1,0.974472


In [10]:
print("CN (pyagrum) accuracy: ", sum(results["py_mpes"] == results["bn_mpes"]) / len(results))
print("CN (no pyagrum) accuracy: ", sum(results["my_mpes"] == results["bn_mpes"]) / len(results))

CN (pyagrum) accuracy:  0.84
CN (no pyagrum) accuracy:  1.0
