In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0,'../../modules')

In [45]:
import numpy as np
import factors

# Inference
In inference problems we want to figure out a distribution over a variable given other variables we do and do not know. We want:

$$
\begin{aligned}
  P(Y|X_\text{known}) &= \sum_{X_\text{unknown}}P(Y,X_\text{unknown}|X_\text{known}) \\
\end{aligned}
$$
So we condition on known variables and then marginalize out the unknown variables we are not interested in. <br>
In Bayesian networks we can do exact inference with the factor representation.<br>
If we can represent a joint distribution with a factor, then conditioning and marginalizing are very simple, just using the formula above. Say you are interested in knowing whether you will need to get tea from the shop on the way home, given who is at home (using 0 for False and 1 for True):

In [3]:
tea_factor = factors.Factor(["out of tea","mums home","dads home"],[2,2,2])
tea_factor.set([0,0,0],0.15)
tea_factor.set([0,0,1],0.05)
tea_factor.set([0,1,0],0.1)
tea_factor.set([0,1,1],0.35)
tea_factor.set([1,0,0],0.0)
tea_factor.set([1,0,1],0.1)
tea_factor.set([1,1,0],0.05)
tea_factor.set([1,1,1],0.2)
print(tea_factor)

out of tea  mums home  dads home  Values (10 dp)
0           0          0          0.15
0           0          1          0.05
0           1          0          0.1
0           1          1          0.35
1           0          0          0.0
1           0          1          0.1
1           1          0          0.05
1           1          1          0.2



And you know that dad isn't home, but you aren't sure about mum.

In [4]:
set_dads_home_to_0 = factors.drop_variables(tea_factor,["dads home"],[0])
conditoned_dads_home_0 = factors.condition(set_dads_home_to_0)
marginalize_mums_home = factors.marginalize(conditoned_dads_home_0,["mums home"])
print(marginalize_mums_home)

out of tea  Values (10 dp)
0           0.8333333333
1           0.1666666667



This can be checked very easily with rejection sampling (explained in next notebook) just using samples from the joint and discarding those for which the conditional is not correct.

In [5]:
array = tea_factor.array
indexes = tea_factor.indexes
value_grid = np.mgrid[:2,:2,:2]
total_not_out_of_tea = 0
total_out_of_tea = 0
for sample in range(10000):
    row = np.random.choice(np.arange(8),p=tea_factor.array.reshape(-1))
    setting = indexes[row]
    if(setting[2]==0):
        if(setting[0]==0):
            total_not_out_of_tea+=1
        else:
            total_out_of_tea+=1
print("estimated not out of tea prob:",total_not_out_of_tea/(total_not_out_of_tea+total_out_of_tea))
print("estimated out of tea prob    :",total_out_of_tea/(total_not_out_of_tea+total_out_of_tea))

estimated not out of tea prob: 0.8241721854304636
estimated out of tea prob    : 0.17582781456953642


So, exact inference on a factor is easy. However, creating the full joint table is very expensive for high numbers of variables. Fortunately it is possible to condition and marginalize out variables at the deconstructed level:

# The Sum-Product Algorithm
Say we have a joint probability which can be written in terms of conditionals, e.g: <br>
$$P(A,B,C,D)=P(D|B)P(B|A,C)P(A)P(C)$$
which in turn can be written in terms of factors:
$$P(A,B,C,D)=\phi_1(D,B)\phi_2(B,A,C)\phi_3(A)\phi_4(C)$$
Then we can condition on each variable we know and repeatedly perform factor multiplication and marginalization of unknown variables. <br>

In [288]:
factor1 = factors.Factor(["D","B"],[2,2])
factor1.set_all([0.6,0.3,0.4,0.7])
factor2 = factors.Factor(["B","A","C"],[2,2,2])
factor2.set_all([0.2,0.6,0.1,0.5,0.8,0.4,0.9,0.5])
factor3 = factors.Factor(["A"],2)
factor3.set_all([0.25,0.75])
factor4 = factors.Factor(["C"],2)
factor4.set_all([0.65,0.35])
all_factors = [factor1,factor2,factor3,factor4]
evidence_names = ["D"]
evidence_vals = [1]
unknown_names = ["A","C"]

In [289]:
def sum_product_variable_elimination(all_factors,known_vars,evidence,unknown_vars):
    new_factors = []
    for f in all_factors:
        deleted_f = factors.drop_variables(f,known_vars,evidence)
        if(deleted_f!=None):
            new_factors.append(deleted_f)
    
    for unknown_var in unknown_vars:
        factors_to_combine = []
        factors_to_exclude = []
        for f in new_factors:
            if(unknown_var in f.names):
                factors_to_combine.append(f)
            else:
                factors_to_exclude.append(f)
        new_factors = factors_to_exclude
        if(len(factors_to_combine)>0):
            combined_factor = factors.multiple_factor_product(factors_to_combine)
            combined_factor = factors.marginalize(combined_factor,[unknown_var])
            if(not isinstance(combined_factor,(int,float))):
                new_factors.append(combined_factor)
    if(len(new_factors)>1):
        combined_factor = factors.multiple_factor_product(new_factors)
        new_factors = [combined_factor]
    return factors.condition(new_factors[0])

In [291]:
print(sum_product_variable_elimination(all_factors,evidence_names,evidence,unknown_names))

B  Values (10 dp)
0  0.1708299758
1  0.8291700242



### We can check this gives the same answer with the full joint factor:

In [294]:
def full_joint_elimination(all_factors,known_vars,evidence,unknown_vars):
    full_joint_factor = factors.multiple_factor_product(all_factors)
    set_vars = factors.drop_variables(full_joint_factor,known_vars,evidence)
    marginalized = factors.marginalize(set_vars,unknown_vars)
    if(marginalized!=None):
        normalized = factors.condition(marginalized)
        return normalized
    else:
        normalized = factors.condition(set_vars)
        return normalized

In [299]:
print(full_joint_elimination(all_factors,evidence_names,evidence,unknown_names))

B  Values (10 dp)
0  0.1708299758
1  0.8291700242



Code to run these algorithms is in the exact_inference.py file:

In [300]:
import exact_inference

In [308]:
f_sumP = exact_inference.sum_product_variable_elimination(all_factors,["A"],[0],["B","C"])
print(f_sumP)

D  Values (10 dp)
0  0.402
1  0.598



In [309]:
f_full_table = exact_inference.full_joint_elimination(all_factors,["A"],[0],["B","C"])
print(f_full_table)

D  Values (10 dp)
0  0.402
1  0.598



I am fairly sure the sum product code is correct, but there might be some edge case issues.
Notebook 2 in this chapter discusses approximations with sampling. 