## Implementation of Metropolis Hastings to approximate probability queries from a Bayesian Network.

In [2]:
import numpy as np
from calculations_helper import break_up_polytree, handle_dag_metropolis_hastings, join_distributions

def estimate_metropolis_hastings(p: float, iterations: int, network: dict, queries: list[int], evidence: dict[int,bool]) -> tuple[list[int], float, np.array]:
    """Generate an estimate for the probability distribution for a given set of query variables and evidence values

    Args:
        p (float): determines probability of generating the next state via either Gibbs Sampling or Likelihood Weighting
        iterations (int): number of samples to take before we go with the estimate
        network (dict): underlying bayesian network
        queries (list[int]): list of query variables
        evidence (dict[int,bool]): list of evidence variables with their respective values

    Returns:
        tuple[list[int], float, np.array]: list of variables, attached probability for using Gibbs for state transition, and resulting probability distribution
    """
    dag_map, query_collections, evidence_collections = break_up_polytree(network, queries, evidence)
    
    if len(dag_map) == 1:
        # only one directed acyclic graph
        return queries, handle_dag_metropolis_hastings(iterations, p, network, queries, evidence)
    else:
        # each directed acyclic graph will output a probability distribution - we must join them all and keep track of the variables present
        reordered_queries = []
        prob_distributions = []
        for i, dag in dag_map.items():
            these_queries = query_collections[i]
            for v in these_queries:
                reordered_queries.append(v)
            this_evidence = {v:evidence[v] for v in evidence_collections[i]}
            prob_distributions.append(handle_dag_metropolis_hastings(iterations, p, dag, these_queries, this_evidence))
        return reordered_queries, p, join_distributions(prob_distributions)

In [8]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('small_polytree.json') as f:
        bayesian_network = json.load(f)
        for p in [0.75, 0.85, 0.95]:
            start_time = time.perf_counter()
            results.append(estimate_metropolis_hastings(p, 10000, bayesian_network, queries, evidence))
            end_time = time.perf_counter()
            times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\np-value={res[1]}\nProbabilities={res[2]}\nRuntime={t}\n\n")

Variables=[1, 3]
p-value=0.75
Probabilities=[0.18931772 0.32757266 0.24118717 0.24192246]
Runtime=1.6600964999961434


Variables=[1, 3]
p-value=0.85
Probabilities=[0.20630713 0.30986505 0.24308437 0.24074346]
Runtime=1.7457819999981439


Variables=[1, 3]
p-value=0.95
Probabilities=[0.20072885 0.30910001 0.24681479 0.24335635]
Runtime=1.834407583999564


Variables=[4, 5, 7]
p-value=0.75
Probabilities=[0.13831591 0.11672678 0.10422099 0.09986396 0.16920242 0.14722573
 0.11889206 0.10555216]
Runtime=1.7616162079939386


Variables=[4, 5, 7]
p-value=0.85
Probabilities=[0.14967966 0.11939538 0.10319598 0.10199432 0.16479181 0.14480508
 0.11182282 0.10431496]
Runtime=1.8516859999945154


Variables=[4, 5, 7]
p-value=0.95
Probabilities=[0.1448357  0.12126825 0.1125773  0.10473036 0.15405813 0.13531639
 0.11570923 0.11150464]
Runtime=1.9257819159975043


Variables=[2, 6, 10, 12]
p-value=0.75
Probabilities=[0.05855723 0.04426494 0.07344582 0.05551962 0.06516425 0.04925936
 0.0745101  0.05632414 0

In [7]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('big_polytree.json') as f:
        bayesian_network = json.load(f)
        for p in [0.75, 0.85, 0.95]:
            start_time = time.perf_counter()
            results.append(estimate_metropolis_hastings(p, 10000, bayesian_network, queries, evidence))
            end_time = time.perf_counter()
            times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\np-value={res[1]}\nProbabilities={res[2]}\nRuntime={t}\n\n")

Variables=[1, 3]
p-value=0.75
Probabilities=[0.26917267 0.24336986 0.25103105 0.23642642]
Runtime=4.837213082995731


Variables=[1, 3]
p-value=0.85
Probabilities=[0.26996416 0.24636012 0.24653633 0.23713939]
Runtime=4.918236666999292


Variables=[1, 3]
p-value=0.95
Probabilities=[0.25586532 0.24890219 0.25182235 0.24341015]
Runtime=5.114544458003365


Variables=[4, 5, 7]
p-value=0.75
Probabilities=[0.13618174 0.13051462 0.12444543 0.12143157 0.11916144 0.11994307
 0.11932365 0.12899848]
Runtime=4.847838083995157


Variables=[4, 5, 7]
p-value=0.85
Probabilities=[0.12705907 0.12732679 0.12756214 0.12397261 0.1216634  0.11877846
 0.12430801 0.12932951]
Runtime=5.055886332993396


Variables=[4, 5, 7]
p-value=0.95
Probabilities=[0.1294216  0.12831433 0.12310165 0.12356419 0.12659919 0.12111535
 0.12462939 0.1232543 ]
Runtime=5.213163124994026


Variables=[2, 6, 10, 12]
p-value=0.75
Probabilities=[0.05821356 0.06096582 0.06358119 0.06558649 0.05983482 0.06345476
 0.06884646 0.06481816 0.0601

In [5]:
import json
import time

query_list = [[1, 3], [4, 5, 7], [2,6,10,12]]
evidence_list = [{2:False, 5:True}, {2:True, 3:False, 6:False}, {3:False, 4:False, 5:True, 15:True}]

times = []
results = []

for queries,evidence in zip(query_list,evidence_list):
    with open('giant_polytree.json') as f:
        bayesian_network = json.load(f)
        for p in [0.75, 0.85, 0.95]:
            start_time = time.perf_counter()
            results.append(estimate_metropolis_hastings(p, 10000, bayesian_network, queries, evidence))
            end_time = time.perf_counter()
            times.append(end_time-start_time)
for res, t in zip(results, times):
    print(f"Variables={res[0]}\np-value={res[1]}\nProbabilities={res[2]}\nRuntime={t}\n\n")

Variables=[1, 3]
p-value=0.75
Probabilities=[0.24639044 0.2999422  0.23218211 0.22148525]
Runtime=9.197118417010643


Variables=[1, 3]
p-value=0.85
Probabilities=[0.2466703  0.28246182 0.23661104 0.23425684]
Runtime=9.421233959001256


Variables=[1, 3]
p-value=0.95
Probabilities=[0.24460401 0.26347447 0.24530656 0.24661496]
Runtime=9.745251499989536


Variables=[4, 5, 7]
p-value=0.75
Probabilities=[0.12532927 0.12405012 0.12357531 0.12063595 0.12567895 0.1211324
 0.12925349 0.13034451]
Runtime=9.310136083004181


Variables=[4, 5, 7]
p-value=0.85
Probabilities=[0.1244595  0.12570517 0.12707016 0.13019385 0.11906085 0.12477376
 0.12032198 0.12841473]
Runtime=9.979918416996952


Variables=[4, 5, 7]
p-value=0.95
Probabilities=[0.13099254 0.12148546 0.12348017 0.12987099 0.1262263  0.12400699
 0.12034985 0.12358771]
Runtime=10.544327499999781


Variables=[2, 6, 10, 12]
p-value=0.75
Probabilities=[0.06376424 0.06242175 0.06270001 0.06176003 0.06565444 0.06295465
 0.06604153 0.05898606 0.0592