## Implementation of Variable Elimination to approximate probability queries from a Bayesian Network.

In [11]:
# Given an arbitrary Bayesian Network, an arbitrary set of query variables, and arbitrary evidence, we want to be able to calculate the probability of said variable...
import numpy as np

def join(first: np.ndarray, second: np.ndarray, row_pairings: dict[int,list[int]]) -> np.ndarray:
    """Given two distributions and the variable to to join on, join the two distributions into a third

    Args:
        first (np.ndarray): first distribution (smaller or same size)
        second (np.ndarray): second distribution (larger or same size)
        row_pairings (dict[int,list[int]]): map of rows of smaller first distribution mapping to rows of second distribution they will multiply

    Returns:
        np.ndarray: resulting distribution after joining
    """
    result = np.zeros(second.shape)
    for first_index, second_indices in row_pairings.items():
        for s in second_indices:
            result[s] += first[first_index] * second[s]
    return result

def eliminate(distribution: np.ndarray, pairings: list[tuple[int,int]]) -> np.ndarray:
    """eliminate a variable by summing over the distribution at the specified index pairs

    Args:
        distribution (np.ndarray): pairs of indices we add together

    Returns:
        np.ndarray: resulting distribution (dimensions will be half as large)
    """
    result = np.ndarray(shape=len(pairings))
    for i, pair in enumerate(pairings):
        result[i] = distribution[pair[0]] + distribution[pair[1]]
    return result

In [4]:
distr_1 = np.array([.000999, .00029, .93906, .00095])
distr_2 = np.array([.95,.05,.55,.45,.17,.83,.23,.77])
joined = join(distr_1, distr_2, {0:[0,1],1:[2,3],2:[4,5],3:[6,7]})

In [12]:
eliminate(joined, pairings=[(0,2),(1,3),(4,6),(5,7)])

array([1.108550e-03, 1.804500e-04, 1.598587e-01, 7.801513e-01])

In [None]:
def return_query_probabilities(queries: list[int], evidence: list[tuple[int,bool]], network: dict) -> np.ndarray:
    """Given a Bayesian network and a list of query and evidence variables, return the probability distribution for all possible values of query variables

    Args:
        queries (list[int]): list of variables specified whose value probabilities we want to query
        evidence (list[tuple[int,bool]]): list of variables whose values are specified and hence affect query probabilities
        network (dict): underlying network which reveals probabilities of each node given its parents' values

    Returns:
        np.ndarray: probability distribution of possible combination values of each of the query variables (2^{#query variables}, 0 is all false and 2^{#query variables}-1 is all true)
    """
    # we need a map of variables to their respective factors (some of which will be shared)
    factors = {i:[] for i in network.keys()}
    # each factor has its own probability distribution - which may or may not depend on its parents
    for i, info in network.items():
        parents = info["parents"]
        prob_array = np.ndarray([pair[1] for pair in info["prob"]])
        # now double the size of this probability array by putting the complement probability in front of each of these probabilities
        new_prob_array = np.zeros(2*len(prob_array))
        for i in range(len(prob_array)):
            new_prob_array[2*i] = 1 - prob_array[i]
            new_prob_array[2*i+1] = prob_array[i]
        factors[i].append(new_prob_array)
        for p in parents:
            factors[p].append(new_prob_array)