## Implementation of Variable Elimination to approximate probability queries from a Bayesian Network.

In [1]:
import numpy as np
from calculations_helper import break_up_polytree, handle_dag_variable_elimination, join_distributions

def estimate_variable_elimination(queries: list[int], evidence: dict[int,bool], entire_network: dict) -> tuple[list[int], np.array]:
    """Given a Bayesian network and a list of query and evidence variables, return the probability distribution for all possible values of query variables

    Args:
        queries (list[int]): list of variables specified whose value probabilities we want to query
        evidence (list[tuple[int,bool]]): list of variables whose values are specified and hence affect query probabilities
        network (dict): underlying network which reveals probabilities of each node given its parents' values

    Returns:
        tuple[list[int],np.array]: probability distribution of possible combination values of each of the query variables (2^{#query variables}, 0 is all false and 2^{#query variables}-1 is all true)
    """
    dag_map, query_collections, evidence_collections = break_up_polytree(entire_network, queries, evidence)
    
    if len(dag_map) == 1:
        # only one directed acyclic graph
        return queries, handle_dag_variable_elimination(entire_network, queries, evidence)
    else:
        # each directed acyclic graph will output a probability distribution - we must join them all and keep track of the variables present
        reordered_queries = []
        prob_distributions = []
        for i, dag in dag_map.items():
            these_queries = query_collections[i]
            for v in these_queries:
                reordered_queries.append(v)
            this_evidence = {v:evidence[v] for v in evidence_collections[i]}
            prob_distributions.append(handle_dag_variable_elimination(dag, these_queries, this_evidence))
        return reordered_queries, join_distributions(prob_distributions)

In [2]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('small_polytree.json') as f:
        bayesian_network = json.load(f)
        start_time = time.perf_counter()
        results.append(estimate_variable_elimination(queries, evidence, bayesian_network))
        end_time = time.perf_counter()
        times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\nProbabilities={res[1]}\nRuntime={t}\n\n")

Variables=[1, 3]
Probabilities=[0.00331494 0.50896417 0.18047446 0.30724643]
Runtime=0.0021042080043116584


Variables=[4, 5, 7]
Probabilities=[0.06153927 0.06153927 0.00106582 0.00106582 0.42994845 0.42994845
 0.00744646 0.00744646]
Runtime=0.0007107499986886978


Variables=[2, 6, 10, 12]
Probabilities=[0.02629805 0.00179495 0.01383838 0.00094452 0.01002877 0.0006845
 0.00770981 0.00052623 0.3990633  0.02723767 0.20999231 0.01433282
 0.15218283 0.01038709 0.11699351 0.00798528]
Runtime=0.0016997500060824677




In [3]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('big_polytree.json') as f:
        bayesian_network = json.load(f)
        start_time = time.perf_counter()
        results.append(estimate_variable_elimination(queries, evidence, bayesian_network))
        end_time = time.perf_counter()
        times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\nProbabilities={res[1]}\nRuntime={t}\n\n")

Variables=[1, 3]
Probabilities=[0.83778059 0.0993041  0.05624809 0.00666722]
Runtime=0.04892166699573863


Variables=[4, 5, 7]
Probabilities=[0.31336413 0.17118614 0.01793244 0.00979623 0.29834193 0.16297973
 0.01707279 0.00932661]
Runtime=0.05232854199130088


Variables=[2, 6, 10, 12]
Probabilities=[1.28535221e-02 9.66295800e-04 4.94529378e-01 3.71774877e-02
 3.85230147e-03 2.89606436e-04 1.48214336e-01 1.11423849e-02
 2.36452524e-03 1.77759122e-04 9.09732899e-02 6.83914550e-03
 4.49132563e-03 3.37646682e-04 1.72800299e-01 1.29906964e-02]
Runtime=0.001534499999252148




In [None]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('giant_polytree.json') as f:
        bayesian_network = json.load(f)
        start_time = time.perf_counter()
        results.append(estimate_variable_elimination(queries, evidence, bayesian_network))
        end_time = time.perf_counter()
        times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\nProbabilities={res[1]}\nRuntime={t}\n\n")