## Implementation of Metropolis Hastings to approximate probability queries from a Bayesian Network.

In [17]:
import numpy as np
from calculations_helper import break_up_polytree, handle_dag_metropolis_hastings, join_distributions

def estimate_metropolis_hastings(p: float, iterations: int, network: dict, queries: list[int], evidence: dict[int,bool]) -> np.array:
    """Generate an estimate for the probability distribution for a given set of query variables and evidence values

    Args:
        p (float): determines probability of generating the next state via either Gibbs Sampling or Likelihood Weighting
        iterations (int): number of samples to take before we go with the estimate
        network (dict): underlying bayesian network
        queries (list[int]): list of query variables
        evidence (dict[int,bool]): list of evidence variables with their respective values

    Returns:
        np.array: estimated probability distribution for the different combinations the query variables can take on
    """
    dag_map, query_collections, evidence_collections = break_up_polytree(network, queries, evidence)
    
    if len(dag_map) == 1:
        # only one directed acyclic graph
        return queries, handle_dag_metropolis_hastings(iterations, p, network, queries, evidence)
    else:
        # each directed acyclic graph will output a probability distribution - we must join them all and keep track of the variables present
        reordered_queries = []
        prob_distributions = []
        for i, dag in dag_map.items():
            these_queries = query_collections[i]
            for v in these_queries:
                reordered_queries.append(v)
            this_evidence = {v:evidence[v] for v in evidence_collections[i]}
            prob_distributions.append(handle_dag_metropolis_hastings(iterations, p, dag, these_queries, this_evidence))
        return reordered_queries, join_distributions(prob_distributions)

In [18]:
import json

queries = [0]
evidence = {3:True,4:True}

with open('bn_test_1.json') as f:
    bayesian_network = json.load(f)
    print(estimate_metropolis_hastings(p=0.75, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.85, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.95, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))

([0], array([0.63498007, 0.36501993]))
([0], array([0.62399139, 0.37600861]))
([0], array([0.6261972, 0.3738028]))


In [19]:
import json

queries = [0, 3]
evidence = {2:True}

with open('bn_test_2.json') as f:
    bayesian_network = json.load(f)
    print(estimate_metropolis_hastings(p=0.75, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.85, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.95, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))

([0, 3], array([0.35424193, 0.20473601, 0.13632263, 0.30469943]))
([0, 3], array([0.3582112 , 0.21026425, 0.14109589, 0.29042866]))
([0, 3], array([0.3491526 , 0.21747754, 0.14070791, 0.29266195]))


In [20]:
import json

queries = [1]
evidence = {2:False}

with open('bn_test_3.json') as f:
    bayesian_network = json.load(f)
    print(estimate_metropolis_hastings(p=0.75, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.85, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.95, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))

([1], array([0.68271642, 0.31728358]))
([1], array([0.65775061, 0.34224939]))
([1], array([0.64013862, 0.35986138]))


In [21]:
import json

queries = [1, 3]
evidence = {2:False, 5:True}

with open('small_polytree.json') as f:
    bayesian_network = json.load(f)
    print(estimate_metropolis_hastings(p=0.75, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.85, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.95, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))

([1, 3], array([0.1973437 , 0.32385414, 0.23585324, 0.24294891]))
([1, 3], array([0.19916366, 0.31220531, 0.25466806, 0.23396297]))
([1, 3], array([0.20207879, 0.30299126, 0.2428159 , 0.25211404]))


In [22]:
import json

queries = [1, 3]
evidence = {2:False, 5:True}

with open('big_polytree.json') as f:
    bayesian_network = json.load(f)
    print(estimate_metropolis_hastings(p=0.75, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.85, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))
    print(estimate_metropolis_hastings(p=0.95, network=bayesian_network, queries=queries, evidence=evidence, iterations=10000))

([1, 3], array([0.27625956, 0.24145218, 0.24485337, 0.2374349 ]))
([1, 3], array([0.27474401, 0.23484631, 0.24821169, 0.24219799]))
([1, 3], array([0.25674414, 0.24618258, 0.24983827, 0.24723501]))
