In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0,'../../modules')

In [2]:
import numpy as np
import factors
import exact_inference

# Sampling top down
When sampling a pgm one option is to make the full joint and sample from that. This requires making one large factor which might not be viable. Another option is to sample the top (root) nodes first, then condition all further samples on their parent nodes. This means you can sample the full joint without needing to merge all factors. <br>
Example: Making the graph $A$ and $B$ (independent) cause $C$ which then causes $D$ and $E$ (independent given $C$)

In [190]:
# making random values
A = factors.Factor(["A"],[2])
B = factors.Factor(["B"],[2])
C = factors.Factor(["C","A","B"],[2,2,2])
D = factors.Factor(["D","C"],[2,2])
E = factors.Factor(["E","C"],[2,2])
all_factors = []
for f in [A,B,C,D,E]:
    f.set_all(np.random.rand(np.prod(f.array.shape)))
    cond_f = factors.condition(f,list(f.names[1:]))
    all_factors.append(cond_f)
for f in all_factors:
    print(f)

A  Values (10 dp)
0  0.3485497205
1  0.6514502795

B  Values (10 dp)
0  0.0794785344
1  0.9205214656

C  A  B  Values (10 dp)
0  0  0  0.2523884772
0  0  1  0.3357885935
0  1  0  0.225960047
0  1  1  0.4307946056
1  0  0  0.7476115228
1  0  1  0.6642114065
1  1  0  0.774039953
1  1  1  0.5692053944

D  C  Values (10 dp)
0  0  0.5741510951
0  1  0.2652084461
1  0  0.4258489049
1  1  0.7347915539

E  C  Values (10 dp)
0  0  0.9837327189
0  1  0.401983203
1  0  0.0162672811
1  1  0.598016797



In [191]:
# assumes the first variable in each factor is dependent on all others in the factor
def joint_sample_top_down(all_factors):
    assigned_variable_names = []
    variable_assignments = []
    remaining_factors = all_factors.copy()
    
    while(len(remaining_factors)>0):
        new_assigned_variable_names = assigned_variable_names.copy()
        new_variable_assignments = variable_assignments.copy()
        new_remaining_factors = []
        for f in remaining_factors:
            if(len(f.names)==1 or np.prod([i in assigned_variable_names for i in f.names[1:]])==1):
                conditioned_factor = factors.drop_variables(f,assigned_variable_names,variable_assignments)
                new_variable_assignments.append(factors.sample(conditioned_factor,1)[0][0])
                new_assigned_variable_names.append(f.names[0])
            else:
                new_remaining_factors.append(f)
        assigned_variable_names = new_assigned_variable_names
        variable_assignments = new_variable_assignments
        remaining_factors = new_remaining_factors
    return assigned_variable_names,variable_assignments

**Checking the results:**

In [192]:
all_samples = []
for n in range(5000):
    names,assignments = joint_sample_top_down(all_factors)
    all_samples.append(assignments)
all_samples = np.array(all_samples)
prob_C_sampled = factors.Factor(["C"],[2])
prob_C_sampled.set([0],np.sum(all_samples[:,2]==0)/all_samples.shape[0])
prob_C_sampled.set([1],np.sum(all_samples[:,2]==1)/all_samples.shape[0])
print(prob_C_sampled)

C  Values (10 dp)
0  0.3876
1  0.6124



In [193]:
prob_C_exact = exact_inference.sum_product_variable_elimination(all_factors,[],[],["A","B","D","E"])
print(prob_C_exact)

C  Values (10 dp)
0  0.3847643398
1  0.6152356602



# Direct Sampling, Likelihood weighted sampling, Gibbs Sampling
One problem with the above is that you can only sample the full joint this way. But often we need to sample a conditional distribution instead. If it is difficult to sample if the observed variables are the bottom nodes in a pgm, as it means sampling all parent nodes backwards. Whereas if the root nodes are known then it is easy, as above. There are a few options to deal with this.

### Direct Sampling
With direct sampling you sample the full joint like above and simply throw away all samples which don't match the observed variables. This is basically rejection sampling. Say we want to know $P(C)$ as above, but only if $A$ is 0 (using the same samples as before)

In [194]:
samples_where_A0 = all_samples[all_samples[:,0]==0]
prob_C_given_A_sampled = factors.Factor(["C"],[2])
prob_C_given_A_sampled.set([0],np.sum(samples_where_A0[:,2]==0)/samples_where_A0.shape[0])
prob_C_given_A_sampled.set([1],np.sum(samples_where_A0[:,2]==1)/samples_where_A0.shape[0])
print(prob_C_given_A_sampled)

C  Values (10 dp)
0  0.3337209302
1  0.6662790698



**checking the approximation is close to the exact value**

In [195]:
prob_C_given_A_exact = exact_inference.sum_product_variable_elimination(all_factors,["A"],[0],["B","D","E"])
print(prob_C_given_A_exact)

C  Values (10 dp)
0  0.3291600744
1  0.6708399256



### Likelihood weighting
With Likelihood weighting you sample all unknown variables as normal, but set the known variables to their value. This means all children are correctly sampled based on the parent values, but the parents are not sampled based on the children. So, you are sampling correct looking variables, but with a slightly incorect distribution. This is Importance Sampling applied to factors.
$$P(X=x_n) = \int \mathbb{1}(x=x_n)p(x) dx \approx \frac{1}{N} \sum_{i=1}^N \mathbb{1}(x_i=x_n) $$
Becomes:
$$\int \mathbb{1}(x=x_n)p(x) dx \approx \frac{1}{\sum \frac{p(x_n)}{q(x_n)}} \sum_{i=1}^N \frac{\mathbb{1}(x_i=x_n)p(x_n)}{q(x_n)} $$
Using the formula for normalized importance sampling. The distribution we sample from, $q(x)$, is constructed as above by setting observed values. The true probability $p(x)$ is the multiplication of all of the normalized conditional probabilities regardless of whether the observed value was set or not. $q(x)$ is the same, but $1$ whereever the value was assigned by evidence. Therefore, $\frac{p(x)}{q(x)}$ (the weight) is just the product of the probabilities at the observed variables.

In [196]:
# assumes the first variable in each factor is dependent on all others in the factor
def likelihood_weighting_top_down(all_factors,known_vars,evidence):
    assigned_variable_names = []
    variable_assignments = []
    weight = 1
    remaining_factors = all_factors.copy()
    
    while(len(remaining_factors)>0):
        new_assigned_variable_names = assigned_variable_names.copy()
        new_variable_assignments = variable_assignments.copy()
        new_remaining_factors = []
        for f in remaining_factors:
            if(len(f.names)==1 or np.prod([i in assigned_variable_names for i in f.names[1:]])==1):
                var_dropped_factor = factors.drop_variables(f,assigned_variable_names,variable_assignments)
                conditioned_factor = factors.condition(var_dropped_factor)
                if(f.names[0] in known_vars):
                    evid = evidence[known_vars.index(f.names[0])]
                    new_variable_assignments.append(evid)
                    weight *= conditioned_factor.get([evid])
                else:
                    sample = factors.sample(conditioned_factor,1)[0][0]
                    new_variable_assignments.append(sample)
                    
                new_assigned_variable_names.append(f.names[0])
            else:
                new_remaining_factors.append(f)
        assigned_variable_names = new_assigned_variable_names
        variable_assignments = new_variable_assignments
        remaining_factors = new_remaining_factors
    return assigned_variable_names,variable_assignments,weight

**Checking results again:**

In [197]:
all_LW_samples = []
all_LW_weights = []
for n in range(1000):
    names,assignments,weight = likelihood_weighting_top_down(all_factors,["A"],[0])
    all_LW_samples.append(assignments)
    all_LW_weights.append(weight)
all_LW_samples = np.array(all_LW_samples)
all_LW_weights = np.array(all_LW_weights)

prob_C_given_A_LW_sampled = factors.Factor(["C"],[2])
prob_C_given_A_LW_sampled.set([0],np.sum(all_LW_weights*(all_LW_samples[:,2]==0))/np.sum(all_LW_weights))
prob_C_given_A_LW_sampled.set([1],np.sum(all_LW_weights*(all_LW_samples[:,2]==1))/np.sum(all_LW_weights))
print(prob_C_given_A_LW_sampled)

C  Values (10 dp)
0  0.352
1  0.648



### Gibbs sampling
An alternative was to do inference is to generate samples of the conditional distribution using gibbs sampling. Gibbs starts with a random set of variable values and proceedes by conditioning each variable on all variables within its markov blanket. This makes it tractable. It is a variant of the Markov Chain Monte Carlo (MCMC) technique for generating samples. 

In [550]:
def GibbsStep(all_variable_markov_blankets,fixed_variables,all_variable_names,current_state_values):
    for var_name in all_variable_names:
        if(not var_name in fixed_variables):
            joint = all_variable_markov_blankets[all_variable_names.index(var_name)]
            index = all_variable_names.index(var_name)
            other_var_names = list(all_variable_names[:index])+list(all_variable_names[(index+1):])
            other_var_vals = list(current_state_values[:index])+list(current_state_values[(index+1):])
            slc = [slice(None)]*len(joint.names)
            for i in range(len(all_variable_names)):
                if(all_variable_names[i] in joint.names):
                    slc[joint.names.index(all_variable_names[i])]=slice(current_state_values[i],current_state_values[i]+1)
            joint_index = joint.names.index(var_name)
            slc[joint_index]=slice(0,joint.array.shape[joint_index])
            array_slice = np.squeeze(joint.array[tuple(slc)])
            norm_array_slice = array_slice/np.sum(array_slice)
            cond_joint = factors.drop_variables(joint,other_var_names,other_var_vals)
            sample = np.random.choice(np.arange(joint.array.shape[joint_index]),1,p=norm_array_slice)
            #print(sample)
            current_state_values[index]=sample
    return current_state_values

def GibbsSampling(all_factors,known_vars,evidence,N):
    all_names = []
    for f in all_factors:
        all_names+=list(f.names)
    all_names = list(np.unique(all_names))
    current_state = np.zeros(len(all_names)).astype(int) # start with 0's
    for i,name in enumerate(all_names):
        for f in all_factors: # find a factor to get the sample
            if(name in f.names):
                shape = f.array.shape[list(f.names).index(name)]
                current_state[i]=np.random.randint(0,shape)
                break
    for i in range(len(known_vars)):
        current_state[all_names.index(known_vars[i])]=evidence[i]
    
    all_variable_markov_blankets = []
    for var_name in all_names:
        markov_blanket = []
        for f in all_factors:
            if(var_name in f.names):
                markov_blanket.append(f)
        all_variable_markov_blankets.append(factors.multiple_factor_product(markov_blanket))
    
    all_visited_states = []
    for n in range(N):
        current_state = GibbsStep(all_variable_markov_blankets,known_vars,all_names,current_state)
        all_visited_states.append(current_state.copy())
    return np.array(all_visited_states)

In [551]:
samples = GibbsSampling(all_factors,["A"],[0],10000)

In [554]:
average_C_given_A = np.mean(samples[100::10]==0,axis=0)[2]
print(average_C_given_A)

0.3414141414141414


As expected, this value matches the exact value closely

In [555]:
print(exact_inference.sum_product_variable_elimination(all_factors,["A"],[0],["B","D","E"]))

C  Values (10 dp)
0  0.3291600744
1  0.6708399256

