## Implementation of Gibbs Sampling to approximate probability queries from a Bayesian Network.

In [1]:
import numpy as np
from calculations_helper import break_up_polytree, handle_dag_gibbs_sampling, join_distributions

def estimate_gibbs(iterations: int, network: dict, queries: list[int], evidence: dict[int,bool]) -> np.array:
    """Generate an estimate for the probability distribution for a given set of query variables and evidence values

    Args:
        iterations (int): number of samples to take before we go with the estimate
        network (dict): underlying bayesian network
        queries (list[int]): list of query variables
        evidence (dict[int,bool]): list of evidence variables with their respective values

    Returns:
        np.array: estimated probability distribution for the different combinations the query variables can take on
    """
    dag_map, query_collections, evidence_collections = break_up_polytree(network, queries, evidence)
    
    if len(dag_map) == 1:
        # only one directed acyclic graph
        return queries, handle_dag_gibbs_sampling(iterations, network, queries, evidence)
    else:
        # each directed acyclic graph will output a probability distribution - we must join them all and keep track of the variables present
        reordered_queries = []
        prob_distributions = []
        for i, dag in dag_map.items():
            these_queries = query_collections[i]
            for v in these_queries:
                reordered_queries.append(v)
            this_evidence = {v:evidence[v] for v in evidence_collections[i]}
            prob_distributions.append(handle_dag_gibbs_sampling(iterations, dag, these_queries, this_evidence))
        return reordered_queries, join_distributions(prob_distributions)

In [2]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('small_polytree.json') as f:
        bayesian_network = json.load(f)
        start_time = time.perf_counter()
        results.append(estimate_gibbs(10000, bayesian_network, queries, evidence))
        end_time = time.perf_counter()
        times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\nProbabilities={res[1]}\nRuntime={t}\n\n")

Variables=[1, 3]
Probabilities=[1.000e-04 9.494e-01 5.900e-03 4.460e-02]
Runtime=0.5411861250031507


Variables=[4, 5, 7]
Probabilities=[0.0812 0.0411 0.0025 0.0015 0.5153 0.3278 0.0194 0.0112]
Runtime=0.5305484999989858


Variables=[2, 6, 10, 12]
Probabilities=[0.01087731 0.00622269 0.02563483 0.01466517 0.01284922 0.00735078
 0.0260801  0.0149199  0.07741337 0.04428663 0.25221365 0.14428635
 0.05400489 0.03089511 0.17702663 0.10127337]
Runtime=0.7587917920027394




In [3]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('big_polytree.json') as f:
        bayesian_network = json.load(f)
        start_time = time.perf_counter()
        results.append(estimate_gibbs(10000, bayesian_network, queries, evidence))
        end_time = time.perf_counter()
        times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\nProbabilities={res[1]}\nRuntime={t}\n\n")

Variables=[1, 3]
Probabilities=[0.7564 0.014  0.2142 0.0154]
Runtime=1.7315806249971502


Variables=[4, 5, 7]
Probabilities=[0.4291 0.3506 0.052  0.0245 0.0916 0.0355 0.01   0.0067]
Runtime=1.6774743749992922


Variables=[2, 6, 10, 12]
Probabilities=[1.460e-02 3.000e-04 1.944e-01 8.770e-02 9.000e-03 2.700e-03 4.644e-01
 1.122e-01 3.900e-03 0.000e+00 1.920e-02 1.320e-02 1.500e-03 0.000e+00
 6.820e-02 8.700e-03]
Runtime=1.7536008749884786




In [4]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('giant_polytree.json') as f:
        bayesian_network = json.load(f)
        start_time = time.perf_counter()
        results.append(estimate_gibbs(10000, bayesian_network, queries, evidence))
        end_time = time.perf_counter()
        times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\nProbabilities={res[1]}\nRuntime={t}\n\n")

Variables=[1, 3]
Probabilities=[0.1278 0.7523 0.069  0.0509]
Runtime=3.353126333997352


Variables=[4, 5, 7]
Probabilities=[0.1    0.0082 0.3992 0.0087 0.144  0.0117 0.3248 0.0034]
Runtime=3.300671583012445


Variables=[2, 6, 10, 12]
Probabilities=[0.039  0.0011 0.0114 0.0027 0.1936 0.0152 0.092  0.0409 0.0728 0.0117
 0.1395 0.0139 0.1958 0.022  0.1334 0.015 ]
Runtime=3.4004718749929452


