## Implementation of Variable Elimination to approximate probability queries from a Bayesian Network.

In [9]:
import numpy as np
from disjoint_set import DisjointSet
from calculations_helper import create_factors, handle_vars

In [None]:
def estimate_variable_elimination(queries: list[int], evidence: dict[int,bool], network: dict) -> np.array:
    """Given a Bayesian network and a list of query and evidence variables, return the probability distribution for all possible values of query variables

    Args:
        queries (list[int]): list of variables specified whose value probabilities we want to query
        evidence (list[tuple[int,bool]]): list of variables whose values are specified and hence affect query probabilities
        network (dict): underlying network which reveals probabilities of each node given its parents' values

    Returns:
        np.ndarray: probability distribution of possible combination values of each of the query variables (2^{#query variables}, 0 is all false and 2^{#query variables}-1 is all true)
    """
    # grab the list of factors and each factor has its own probability distribution - which will depend on its parents should they exist
    factor_index_to_factor, var_to_factor_indices = create_factors(network, evidence)

    # ultimately, factors will be merged, and thus their index will correspond to the same set variable - we do not want that showing up multiple times when we consider the relevant factors to a variable below
    factor_tracker = DisjointSet()
    for i in range(len(factor_index_to_factor)):
        factor_tracker.add_element(id=i)

    # figure out which variables need to be eliminated
    query_set = set(queries)
    evidence_set = set([pair[0] for pair in evidence.items()])
    hidden_vars = [i for i in range(len(network)) if i not in query_set and i not in evidence_set]
    # sort the hidden variables by the number of relevant factors
    hidden_vars.sort(key=lambda x : -len(var_to_factor_indices[x]))

    # now we go through and eliminate each hidden variable
    handle_vars(vars=hidden_vars, eliminate=True, factor_index_to_factor=factor_index_to_factor, factor_tracker=factor_tracker,var_to_factor_indices=var_to_factor_indices)
    result = handle_vars(vars=queries, eliminate=False, factor_index_to_factor=factor_index_to_factor, factor_tracker=factor_tracker,var_to_factor_indices=var_to_factor_indices) # this function also returns a factor
    return result / np.sum(result) # for normalization

In [None]:
import json

queries = [0]
evidence = {3:True,4:True}

with open('bn_test_1.json') as f:
    bayesian_network = json.load(f)
    print(estimate_variable_elimination(queries, evidence, bayesian_network))

[0.71582816 0.28417184]


In [None]:
import json

queries = [0, 3]
evidence = {2:True}

with open('bn_test_2.json') as f:
    bayesian_network = json.load(f)
    print(estimate_variable_elimination(queries, evidence, bayesian_network))

[0.34953879 0.056898   0.10097538 0.49258784]


In [None]:
import json

queries = [1]
evidence = {2:False}

with open('bn_test_3.json') as f:
    bayesian_network = json.load(f)
    print(estimate_variable_elimination(queries, evidence, bayesian_network))

[0.87746479 0.12253521]
