## Implementation of Gibbs Sampling to approximate probability queries from a Bayesian Network.

In [14]:
import random
import numpy as np
from calculations_helper import find_corresponding_rows, calculate_probability

def estimate_gibbs(iterations: int, network: dict, queries: list[int], evidence: dict[int,bool]) -> np.array:
    """Generate an estimate for the probability distribution for a given set of query variables and evidence values

    Args:
        iterations (int): number of samples to take before we go with the estimate
        network (dict): underlying bayesian network
        queries (list[int]): list of query variables
        evidence (dict[int,bool]): list of evidence variables with their respective values

    Returns:
        np.array: estimated probability distribution for the different combinations the query variables can take on
    """
    prob_distribution = np.zeros(shape=1<<len(queries))
    
    # create list of non-evidence variables
    non_evidence_variables = [int(i) for i in network.keys() if int(i) not in evidence.keys()]
    
    current_evidence = evidence
    # randomly initialize all the variables
    for v in non_evidence_variables:
        current_evidence[v] = (random.random() < 0.5)

    for _ in range(iterations):
        # perform the Gibbs algorithm this many times
        var_to_change = non_evidence_variables[int(random.random()*len(non_evidence_variables))]
        del current_evidence[var_to_change]
        # simulate the probability of var_to_change being true given everything else
        p = calculate_probability(var_to_change, current_evidence, network)
        current_evidence[var_to_change] = (random.random() < p)
        # see how our queries showed up
        query_values = [1 if current_evidence[q] else 0 for q in queries]
        # find the entry in our probability distribution that corresponds with this combination of query values
        posn = find_corresponding_rows(query_values, queries, queries)[0]
        # update the distribution accordingly
        prob_distribution[posn] += 1.0

    return prob_distribution / iterations # normalization into a probability

In [15]:
import json

queries = [0]
evidence = {3:True,4:True}

with open('bn_test_1.json') as f:
    bayesian_network = json.load(f)
    print(estimate_gibbs(10000, bayesian_network, queries, evidence))

[0.6969 0.3031]


In [16]:
import json

queries = [0, 3]
evidence = {2:True}

with open('bn_test_2.json') as f:
    bayesian_network = json.load(f)
    print(estimate_gibbs(10000, bayesian_network, queries, evidence))

[0.3259 0.0563 0.1023 0.5155]


In [17]:
import json

queries = [1]
evidence = {2:False}

with open('bn_test_3.json') as f:
    bayesian_network = json.load(f)
    print(estimate_gibbs(10000, bayesian_network, queries, evidence))

[0.8801 0.1199]
