## Implementation of Variable Elimination to approximate probability queries from a Bayesian Network.

In [1]:
import numpy as np
from calculations_helper import disect_trees, handle_dag_variable_elimination, join_distributions

In [3]:
def estimate_variable_elimination(queries: list[int], evidence: dict[int,bool], entire_network: dict) -> tuple[list[int], np.array]:
    """Given a Bayesian network and a list of query and evidence variables, return the probability distribution for all possible values of query variables

    Args:
        queries (list[int]): list of variables specified whose value probabilities we want to query
        evidence (list[tuple[int,bool]]): list of variables whose values are specified and hence affect query probabilities
        network (dict): underlying network which reveals probabilities of each node given its parents' values

    Returns:
        tuple[list[int],np.array]: probability distribution of possible combination values of each of the query variables (2^{#query variables}, 0 is all false and 2^{#query variables}-1 is all true)
    """
    # if given a polytree, break it up into different trees
    dag_map = disect_trees(entire_network)
    # map of each dag index to all of its query and evidence variables
    query_collections = {} 
    evidence_collections = {}
    for i, dag in dag_map.items():
        query_collections[i] = []
        evidence_collections[i] = []
        for v in queries:
            if str(v) in dag.keys():
                query_collections[i].append(v)
        for v in evidence.keys():
            if str(v) in dag.keys():
                evidence_collections[i].append(v)
        # we'll sort the variables, which will affect the value order in the soon-to-be-calulated probability distributions
        query_collections[i].sort()
    
    if len(dag_map) == 1:
        # only one directed acyclic graph
        return queries, handle_dag_variable_elimination(entire_network, queries, evidence)
    else:
        # each directed acyclic graph will output a probability distribution - we must join them all and keep track of the variables present
        reordered_queries = []
        prob_distributions = []
        for i, dag in dag_map.items():
            these_queries = query_collections[i]
            for v in these_queries:
                reordered_queries.append(v)
            this_evidence = {v:evidence[v] for v in evidence_collections[i]}
            prob_distributions.append(handle_dag_variable_elimination(dag, these_queries, this_evidence))
        return reordered_queries, join_distributions(prob_distributions)

In [4]:
import json

queries = [0]
evidence = {3:True,4:True}

with open('bn_test_1.json') as f:
    bayesian_network = json.load(f)
    print(estimate_variable_elimination(queries, evidence, bayesian_network))

([0], array([0.71582816, 0.28417184]))


In [5]:
import json

queries = [0, 3]
evidence = {2:True}

with open('bn_test_2.json') as f:
    bayesian_network = json.load(f)
    print(estimate_variable_elimination(queries, evidence, bayesian_network))

([0, 3], array([0.34953879, 0.056898  , 0.10097538, 0.49258784]))


In [6]:
import json

queries = [1]
evidence = {2:False}

with open('bn_test_3.json') as f:
    bayesian_network = json.load(f)
    print(estimate_variable_elimination(queries, evidence, bayesian_network))

([1], array([0.87746479, 0.12253521]))


In [7]:
import json

queries = [1, 3]
evidence = {2:False, 5:True}

with open('small_polytree.json') as f:
    bayesian_network = json.load(f)
    print(estimate_variable_elimination(queries, evidence, bayesian_network))

KeyError: 1

In [None]:
import json

queries = [1, 3]
evidence = {2:False, 5:True}

with open('big_polytree.json') as f:
    bayesian_network = json.load(f)
    print(estimate_variable_elimination(queries, evidence, bayesian_network))

IndexError: list index out of range