## Local IDM learning of a Credal Network

In [8]:
import random
import numpy as np
import math
from statsmodels.distributions.empirical_distribution import ECDF
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from scipy.stats import norm

import pyagrum as gum
import pyagrum.lib.notebook as gnb

In [9]:
# Set seeds
random.seed(42)
gum.initRandom(seed=42)

In [10]:
# Init BN
bn=gum.fastBN("A[2]->B[2]<-C[2]")

# Generate data from BN
gpop_ss = 1000
g = gum.BNDatabaseGenerator(bn)
g.drawSamples(gpop_ss)
g.setDiscretizedLabelModeRandom()
gpop = g.to_pandas()

assert(gpop.shape[0]==gpop_ss)

In [11]:
# Add counts of events to BN
for node in bn.names():
    var = bn.variable(node)
    parents = bn.parents(node)
    parent_names = [bn.variable(p).name() for p in parents]

    shape = [bn.variable(p).domainSize() for p in parents] + [var.domainSize()]
    counts_array = np.zeros(shape, dtype=float)  # float, not int!

    for _, row in gpop.iterrows():
        try:
            key = tuple([int(row[p]) for p in parent_names] + [int(row[node])])
            counts_array[key] += 1.0
        except KeyError:
            continue

    bn.cpt(node).fillWith(counts_array.flatten().tolist())

In [12]:
# Plot BN
bn_tmp = bn
gnb.flow.row(bn, bn.cpt("A"), bn.cpt("B"),
             captions=["CN", "CPT (A) counts", "CPT (B) counts"])

# Quick check
c = gpop[gpop["A"]=="0"]
assert(bn.cpt("A")[0] == len(c))

A,A
0,1
197.0,803.0

Unnamed: 0_level_0,Unnamed: 1_level_0,B,B
C,A,0,1
0,0,38.0,72.0
0,1,37.0,50.0
1,0,158.0,237.0
1,1,392.0,16.0


In [13]:
# Convert BN to CN
cn = gum.CredalNet(bn)

# IDM Learning ('s' must be an integer)
cn.idmLearning(s=2)

# Print CN
print(cn)


A:Range([0,1])
<> : [[0.198603 , 0.801397] , [0.196607 , 0.803393]]

B:Range([0,1])
<A:0|C:0> : [[0.357143 , 0.642857] , [0.339286 , 0.660714]]
<A:1|C:0> : [[0.438202 , 0.561798] , [0.41573 , 0.58427]]
<A:0|C:1> : [[0.403023 , 0.596977] , [0.397985 , 0.602015]]
<A:1|C:1> : [[0.960976 , 0.0390244] , [0.956098 , 0.0439024]]

C:Range([0,1])
<> : [[0.505988 , 0.494012] , [0.503992 , 0.496008]]




In [14]:
# Save BN min & max
cn.saveBNsMinMax("./bn_min.bif", "./bn_max.bif")

# Load BN min & max
bn_min = gum.loadBN("./bn_min.bif")
bn_max = gum.loadBN("./bn_max.bif")

# Plot
gnb.flow.row(cn.current_bn(), 
             bn_min.cpt("A"), bn_min.cpt("B"), 
             bn_max.cpt("A"), bn_max.cpt("B"), 
             captions=["BN", "CPT (A) min ", "CPT (B) min", "CPT (A) max ", "CPT (B) max"])

A,A
0,1
0.1966,0.8014

Unnamed: 0_level_0,Unnamed: 1_level_0,B,B
C,A,0,1
0,0,0.3393,0.6429
0,1,0.4157,0.5618
1,0,0.398,0.597
1,1,0.9561,0.039

A,A
0,1
0.1986,0.8034

Unnamed: 0_level_0,Unnamed: 1_level_0,B,B
C,A,0,1
0,0,0.3571,0.6607
0,1,0.4382,0.5843
1,0,0.403,0.602
1,1,0.961,0.0439
