## Implementation of Variable Elimination to approximate probability queries from a Bayesian Network.

In [1]:
import numpy as np
from calculations_helper import break_up_polytree, handle_dag_variable_elimination, join_distributions

def estimate_variable_elimination(queries: list[int], evidence: dict[int,bool], entire_network: dict) -> tuple[list[int], np.array]:
    """Given a Bayesian network and a list of query and evidence variables, return the probability distribution for all possible values of query variables

    Args:
        queries (list[int]): list of variables specified whose value probabilities we want to query
        evidence (list[tuple[int,bool]]): list of variables whose values are specified and hence affect query probabilities
        network (dict): underlying network which reveals probabilities of each node given its parents' values

    Returns:
        tuple[list[int],np.array]: probability distribution of possible combination values of each of the query variables (2^{#query variables}, 0 is all false and 2^{#query variables}-1 is all true)
    """
    dag_map, query_collections, evidence_collections = break_up_polytree(entire_network, queries, evidence)
    
    if len(dag_map) == 1:
        # only one directed acyclic graph
        return queries, handle_dag_variable_elimination(entire_network, queries, evidence)
    else:
        # each directed acyclic graph will output a probability distribution - we must join them all and keep track of the variables present
        reordered_queries = []
        prob_distributions = []
        for i, dag in dag_map.items():
            these_queries = query_collections[i]
            for v in these_queries:
                reordered_queries.append(v)
            this_evidence = {v:evidence[v] for v in evidence_collections[i]}
            prob_distributions.append(handle_dag_variable_elimination(dag, these_queries, this_evidence))
        return reordered_queries, join_distributions(prob_distributions)

In [None]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('small_polytree.json') as f:
        bayesian_network = json.load(f)
        start_time = time.perf_counter()
        results.append(estimate_variable_elimination(queries, evidence, bayesian_network))
        end_time = time.perf_counter()
        times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\nProbabilities={res[1]}\nRuntime={t}\n\n")

In [None]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('big_polytree.json') as f:
        bayesian_network = json.load(f)
        start_time = time.perf_counter()
        results.append(estimate_variable_elimination(queries, evidence, bayesian_network))
        end_time = time.perf_counter()
        times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\nProbabilities={res[1]}\nRuntime={t}\n\n")

Variables=[1, 3]
Probabilities=[9.27595609e-01 6.15892885e-03 6.58085148e-02 4.36946829e-04]
Runtime=15.704179208012647


Variables=[4, 5, 7]
Probabilities=[0.60774379 0.39225621]
Runtime=0.1250058340083342


Variables=[2, 6, 10, 12]
Probabilities=[0.87277758 0.12722242]
Runtime=0.003926417004549876




In [None]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('giant_polytree.json') as f:
        bayesian_network = json.load(f)
        start_time = time.perf_counter()
        results.append(estimate_variable_elimination(queries, evidence, bayesian_network))
        end_time = time.perf_counter()
        times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\nProbabilities={res[1]}\nRuntime={t}\n\n")

Variables=[1, 3]
Probabilities=[1. 0. 0. 0.]
Runtime=937.5399549590074


Variables=[4, 5, 7]
Probabilities=[0.99128646 0.00871354]
Runtime=25.300940750006703


Variables=[2, 6, 10, 12]
Probabilities=[9.99999039e-01 9.61066056e-07 0.00000000e+00 0.00000000e+00]
Runtime=0.1896886659960728


