In [None]:

import logging
import warnings
logging.getLogger('pgmpy').setLevel(logging.ERROR)
logging.getLogger('pgmpy').setLevel(logging.WARNING)
warnings.filterwarnings("ignore") 
warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")

import os
import random
import itertools
import pysnooper
import pandas as pd
import pyagrum as gum
import concurrent.futures
from functools import reduce
import numpy as np
from time import *
import networkx as nx
from Decom_Tree import *
from copy import deepcopy
os.environ["NUMEXPR_MAX_THREADS"] = "16"
from pgmpy.utils import get_example_model
from pgmpy.sampling import BayesianModelSampling
from pgmpy.factors import factor_product,factor_divide
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import DiscreteBayesianNetwork,JunctionTree
from pgmpy.factors.discrete import DiscreteFactor, TabularCPD
from pgmpy.inference import VariableElimination, BeliefPropagation

In [None]:
def generate_state_names(g):
    return {node: [0, 1] for node in list(g.nodes)}

def get_random_cpds_with_labels(model, state_names, inplace=False, seed=None):
    cpds = []
    for node in model.nodes():
        parents = list(model.predecessors(node))
        cpds.append(
            TabularCPD.get_random(
                variable=node,
                evidence=parents,
                cardinality={var: 2 for var in model.nodes()},  
                state_names=state_names, 
                seed=seed
            )
        )
    if inplace:
        model.add_cpds(*cpds)
    else:
        return cpds
    
def mar_pro(t,r):
        
    factors_order = [set(t.factors[i].variables) for i in range(len(t.factors))]
    factor_copy = deepcopy(t.factors)

    while t.number_of_nodes() > 0:

        min_degree_node = min(t.nodes(), key=t.degree)

        self_index = factors_order.index(set(min_degree_node))

        S_factor = deepcopy(factor_copy[self_index]) 
        if len(t.nodes) > 1:

            neibor_set = set(*t.adj[min_degree_node])

            S = set(min_degree_node)&neibor_set 

            S_marginal_nodes = set(factor_copy[self_index].variables) - S

            S_factor.marginalize(S_marginal_nodes)


            marginal_nodes =  set(min_degree_node) - (S|set(R)) 

            neibor_inder = factors_order.index(neibor_set)

            if len(marginal_nodes)>0:
                factor_copy[self_index].marginalize(marginal_nodes )

            factor_copy[neibor_inder] = factor_product(factor_copy[self_index],factor_copy[neibor_inder]) #P(A)P(B)
            factor_copy[neibor_inder] = factor_divide(factor_copy[neibor_inder],S_factor)#P(A)P(B)/P(S)

            #print(neibor_set,factor_copy[neibor_inder])

        else:
            marginal_nodes =  set(factor_copy[self_index].variables) - set(R)
            #print(marginal_nodes)
            if len(marginal_nodes)>0:
                factor_copy[self_index].marginalize(marginal_nodes)

        t.remove_node(min_degree_node)
    
    return factor_copy[self_index]

In [None]:
model = get_example_model('child')
G = nx.DiGraph()
G.add_nodes_from(model.nodes)
G.add_edges_from(model.edges)
state_names = generate_state_names(G)
decom = Graph_Decom(G)
atoms = decom.Decom()

In [None]:
#Compute P('HypDistrib' = 0|'HypoxiaInO2' = 0) In-cluster
logging.getLogger('pgmpy').setLevel(logging.ERROR)
bn = DiscreteBayesianNetwork()
bn.add_nodes_from(list(G.nodes))
bn.add_edges_from(list(G.edges))

learn_bn = DiscreteBayesianNetwork()
learn_bn.add_nodes_from(list(G.nodes))
learn_bn.add_edges_from(list(G.edges))

sum_sub_T,sum_full_T,sum_Bias,sum_RMSE = [],[],[],[]
for sample_size in [100,500,1000,2500,5000,7500,10000]:
    Bias,RMSE = 0,0
    sub_T,full_T = 0,0
    for i in range(100):
        bn.cpds = []
        get_random_cpds_with_labels(bn, state_names, inplace=True)
        ori_infer = BeliefPropagation(bn) 
        ori_query = ori_infer.query(variables = ['HypDistrib'], evidence= {'HypoxiaInO2':0}, show_progress=False).values[0] 
        df = BayesianModelSampling(bn).forward_sample(size=sample_size, show_progress=False)

        
        start = time()
        T = decom.spann_tree()
        C = ['HypDistrib', 'DuctFlow', 'HypoxiaInO2', 'LowerBodyO2', 'CardiacMixing', 'Disease', 'LungParench']   
        sub_df = df[C]
        sub_model = DiscreteBayesianNetwork(list(G.subgraph(C).edges))
        sub_model.add_nodes_from(C)
        sub_model.cpds = []
        sub_model.fit(sub_df,estimator=MaximumLikelihoodEstimator)
        sub_infer = VariableElimination(sub_model) 
        sub_query = sub_infer.query(variables = ['HypDistrib'], evidence= {'HypoxiaInO2':0}, show_progress=False).values[0] 
        sub_T += time()-start
        Bias += abs(sub_query-ori_query)
        RMSE += (sub_query-ori_query)**2

        
        start = time()
        learn_bn.cpds = []
        learn_bn.fit(df,estimator=MaximumLikelihoodEstimator)
        full_infer = BeliefPropagation(learn_bn) 
        full_query = full_infer.query(variables = ['HypDistrib'], evidence= {'HypoxiaInO2':0}, show_progress=False).values[0]
        full_T += time()-start
        
    
    sum_sub_T.append(sub_T)
    sum_full_T.append(full_T)
    sum_Bias.append(Bias/100)
    sum_RMSE.append((RMSE/100)** 0.5)
print(sum_sub_T,sum_full_T,sum_Bias,sum_RMSE)

In [None]:
#Compute P('CO2Report' = 0|'Grunting' = 0) Cross-cluster
logging.getLogger('pgmpy').setLevel(logging.ERROR)

R = {'Grunting','CO2Report'}

bn = DiscreteBayesianNetwork()
bn.add_nodes_from(list(G.nodes))
bn.add_edges_from(list(G.edges))

learn_bn = DiscreteBayesianNetwork()
learn_bn.add_nodes_from(list(G.nodes))
learn_bn.add_edges_from(list(G.edges))

sum_sub_T,sum_full_T,sum_Bias,sum_RMSE = [],[],[],[]
for sample_size in [100,500,1000,2500,5000,7500,10000]:#
    Bias,RMSE = 0,0
    sub_T,full_T = 0,0
    for i in range(100):

        bn.cpds = []
        get_random_cpds_with_labels(bn, state_names, inplace=True)
        ori_infer = BeliefPropagation(bn) 
        ori_query = ori_infer.query(variables = ['CO2Report'], evidence= {'Grunting':0}, show_progress=False).values[0] 
        df = BayesianModelSampling(bn).forward_sample(size=sample_size, show_progress=False)


        start = time() 
        T = decom.PPDD(R)
        clique_trees = JunctionTree(T.edges())
        evi_0 = 0
        for i in list(T.nodes):
            sub_model = DiscreteBayesianNetwork(list(G.subgraph(list(i)).edges))
            sub_model.cpds = []
            sub_model.fit(data=df[list(i)],estimator=MaximumLikelihoodEstimator)
            L = [cpd.to_factor() for cpd in sub_model.cpds]
            factors = L[0]
            for j in range(1,len(L)):
                factors = factor_product(factors,L[j])
            if 'Grunting' in i:
                sub_infer = VariableElimination(sub_model) 
                evi_0 = sub_infer.query(variables = ['Grunting'], evidence= {}, show_progress=False).values[0]
            clique_trees.add_factors(factors)

        OUR = (mar_pro(clique_trees,R).values[0][0])/evi_0
        sub_T += (time()-start)
        Bias += abs(OUR-ori_query)
        RMSE += (OUR-ori_query)**2

        
        start = time()
        learn_bn.cpds = []
        learn_bn.fit(df,estimator=MaximumLikelihoodEstimator)
        full_infer = BeliefPropagation(learn_bn) 
        full_query = full_infer.query(variables = ['CO2Report'], evidence= {'Grunting':0}, show_progress=False).values[0]
        full_T += time()-start

    
    sum_sub_T.append(sub_T)
    sum_full_T.append(full_T)
    sum_Bias.append(Bias/100)
    sum_RMSE.append((RMSE/100)** 0.5)
print(sum_sub_T,sum_full_T,sum_Bias,sum_RMSE)