## IDM Learning of a Credal Network

In [1]:
import random
import numpy as np
import math
from statsmodels.distributions.empirical_distribution import ECDF
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from scipy.stats import norm

import pyagrum as gum
import pyagrum.lib.notebook as gnb

In [2]:
# Set seeds
random.seed(42)
gum.initRandom(seed=42)

In [3]:
# Init BN
bn=gum.fastBN("A[2]->B[2]<-C[3]")

# Generate data from BN
gpop_ss = 1000
g = gum.BNDatabaseGenerator(bn)
g.drawSamples(gpop_ss)
g.setDiscretizedLabelModeRandom()
gpop = g.to_pandas()

assert(gpop.shape[0]==gpop_ss)

In [4]:
# Add counts of events to BN
for node in bn.names():
    var = bn.variable(node)
    parents = bn.parents(node)
    parent_names = [bn.variable(p).name() for p in parents]

    shape = [bn.variable(p).domainSize() for p in parents] + [var.domainSize()]
    counts_array = np.zeros(shape, dtype=float)  # float, non int!

    for _, row in gpop.iterrows():
        try:
            key = tuple([int(row[p]) for p in parent_names] + [int(row[node])])
            counts_array[key] += 1.0
        except KeyError:
            continue

    bn.cpt(node).fillWith(counts_array.flatten().tolist())

In [5]:
# Plot BN
gnb.flow.row(bn, bn.cpt("A"), bn.cpt("B"), captions=["CN", "CPT (A)", "CPT (B | A)"])

# Quick check
c = gpop[gpop["A"]=="0"]
print(f"Counts of A=0: {len(c)}")

A,A
0,1
159.0,841.0

Unnamed: 0_level_0,Unnamed: 1_level_0,B,B
C,A,0,1
0,0,94.0,20.0
0,1,0.0,2.0
1,0,22.0,21.0
1,1,210.0,397.0
2,0,4.0,8.0
2,1,22.0,200.0


Counts of A=0: 159


In [6]:
# Convert BN to CN
cn = gum.CredalNet(bn)

# IDM Learning ('s' must be an integer)
cn.idmLearning(s=2)

# Print CN
print(cn)


A:Range([0,1])
<> : [[0.160679 , 0.839321] , [0.158683 , 0.841317]]

B:Range([0,1])
<A:0|C:0> : [[0.827586 , 0.172414] , [0.810345 , 0.189655]]
<A:1|C:0> : [[0.5 , 0.5] , [0 , 1]]
<A:0|C:1> : [[0.533333 , 0.466667] , [0.488889 , 0.511111]]
<A:1|C:1> : [[0.348112 , 0.651888] , [0.344828 , 0.655172]]
<A:0|C:2> : [[0.428571 , 0.571429] , [0.285714 , 0.714286]]
<A:1|C:2> : [[0.107143 , 0.892857] , [0.0982143 , 0.901786]]

C:Range([0,2])
<> : [[0.721557 , 0.0139721 , 0.264471] , [0.719561 , 0.0159681 , 0.264471] , [0.719561 , 0.0139721 , 0.266467]]




In [7]:
# Save BN min & max
cn.saveBNsMinMax("./bn_min.bif", "./bn_max.bif")

# Load BN min & max
bn_min = gum.loadBN("./bn_min.bif")
bn_max = gum.loadBN("./bn_max.bif")

# Plot
gnb.flow.row(cn.current_bn(), 
             bn_min.cpt("A"), bn_min.cpt("B"), 
             bn_max.cpt("A"), bn_max.cpt("B"), 
             captions=["BN", "CPT (A) min ", "CPT (B) min", "CPT (A) max ", "CPT (B) max"])

A,A
0,1
0.1587,0.8393

Unnamed: 0_level_0,Unnamed: 1_level_0,B,B
C,A,0,1
0,0,0.8103,0.1724
0,1,0.0,0.5
1,0,0.4889,0.4667
1,1,0.3448,0.6519
2,0,0.2857,0.5714
2,1,0.0982,0.8929

A,A
0,1
0.1607,0.8413

Unnamed: 0_level_0,Unnamed: 1_level_0,B,B
C,A,0,1
0,0,0.8276,0.1897
0,1,0.5,1.0
1,0,0.5333,0.5111
1,1,0.3481,0.6552
2,0,0.4286,0.7143
2,1,0.1071,0.9018
