In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0,'../../modules')

In [2]:
import numpy as np
import factors
import exact_inference

# Sampling top down
When sampling a pgm one option is to make the full joint and sample from that. This requires making one large factor which might not be viable. Another option is to sample the top (root) nodes first, then condition all further samples on their parent nodes. This means you can sample the full joint without needing to merge all factors. <br>
Example: Making the graph $A$ and $B$ (independent) cause $C$ which then causes $D$ and $E$ (independent given $C$)

In [15]:
# making random values
A = factors.Factor(["A"],[2])
B = factors.Factor(["B"],[2])
C = factors.Factor(["C","A","B"],[2,2,2])
D = factors.Factor(["D","C"],[2,2])
E = factors.Factor(["E","C"],[2,2])
all_factors = []
for f in [A,B,C,D,E]:
    f.set_all(np.random.rand(np.prod(f.array.shape)))
    cond_f = factors.condition(f,list(f.names[1:]))
    all_factors.append(cond_f)
for f in all_factors:
    print(f)

A  Values (10 dp)
0  0.1479563948
1  0.8520436052

B  Values (10 dp)
0  0.97984558
1  0.02015442

C  A  B  Values (10 dp)
0  0  0  0.4746630658
0  0  1  0.6171417913
0  1  0  0.1049336214
0  1  1  0.5606639895
1  0  0  0.5253369342
1  0  1  0.3828582087
1  1  0  0.8950663786
1  1  1  0.4393360105

D  C  Values (10 dp)
0  0  0.4578824282
0  1  0.0989491596
1  0  0.5421175718
1  1  0.9010508404

E  C  Values (10 dp)
0  0  0.428876864
0  1  0.8124597115
1  0  0.571123136
1  1  0.1875402885



In [16]:
# assumes the first variable in each factor is dependent on all others in the factor
def joint_sample_top_down(all_factors):
    assigned_variable_names = []
    variable_assignments = []
    remaining_factors = all_factors.copy()
    
    while(len(remaining_factors)>0):
        new_assigned_variable_names = assigned_variable_names.copy()
        new_variable_assignments = variable_assignments.copy()
        new_remaining_factors = []
        for f in remaining_factors:
            if(len(f.names)==1 or np.prod([i in assigned_variable_names for i in f.names[1:]])==1):
                conditioned_factor = factors.drop_variables(f,assigned_variable_names,variable_assignments)
                new_variable_assignments.append(factors.sample(conditioned_factor,1)[0][0])
                new_assigned_variable_names.append(f.names[0])
            else:
                new_remaining_factors.append(f)
        assigned_variable_names = new_assigned_variable_names
        variable_assignments = new_variable_assignments
        remaining_factors = new_remaining_factors
    return assigned_variable_names,variable_assignments

**Checking the results:**

In [17]:
all_samples = []
for n in range(5000):
    names,assignments = joint_sample_top_down(all_factors)
    all_samples.append(assignments)
all_samples = np.array(all_samples)
prob_C_sampled = factors.Factor(["C"],[2])
prob_C_sampled.set([0],np.sum(all_samples[:,2]==0)/all_samples.shape[0])
prob_C_sampled.set([1],np.sum(all_samples[:,2]==1)/all_samples.shape[0])
print(prob_C_sampled)

C  Values (10 dp)
0  0.1604
1  0.8396



In [18]:
prob_C_exact = exact_inference.sum_product_variable_elimination(all_factors,[],[],["A","B","D","E"])
print(prob_C_exact)

C  Values (10 dp)
0  0.1678883296
1  0.8321116704



# Direct Sampling, Likelihood weighted sampling, Gibbs Sampling
One problem with the above is that you can only sample the full joint this way. But often we need to sample a conditional distribution instead. If it is difficult to sample if the observed variables are the bottom nodes in a pgm, as it means sampling all parent nodes backwards. Whereas if the root nodes are known then it is easy, as above. There are a few options to deal with this.

### Direct Sampling
With direct sampling you sample the full joint like above and simply throw away all samples which don't match the observed variables. This is basically rejection sampling. Say we want to know $P(C)$ as above, but only if $A$ is 0 (using the same samples as before)

In [19]:
samples_where_A0 = all_samples[all_samples[:,0]==0]
prob_C_given_A_sampled = factors.Factor(["C"],[2])
prob_C_given_A_sampled.set([0],np.sum(samples_where_A0[:,2]==0)/samples_where_A0.shape[0])
prob_C_given_A_sampled.set([1],np.sum(samples_where_A0[:,2]==1)/samples_where_A0.shape[0])
print(prob_C_given_A_sampled)

C  Values (10 dp)
0  0.4706703911
1  0.5293296089



**checking the approximation is close to the exact value**

In [24]:
prob_C_given_A_exact = exact_inference.sum_product_variable_elimination(all_factors,["A"],[0],["B","D","E"])
print(prob_C_given_A_exact)

C  Values (10 dp)
0  0.4775346419
1  0.5224653581



### Likelihood weighting
With Likelihood weighting you sample all unknown variables as normal, but set the known variables to their value. This means all children are correctly sampled based on the parent values, but the parents are not sampled based on the children. So, you are sampling correct looking variables, but with a slightly incorect distribution. This is Importance Sampling applied to factors.
$$P(X=x_n) = \int \mathbb{1}(x=x_n)p(x) dx \approx \frac{1}{N} \sum_{i=1}^N \mathbb{1}(x_i=x_n) $$
Becomes:
$$\int \mathbb{1}(x=x_n)p(x) dx \approx \frac{1}{\sum \frac{p(x_n)}{q(x_n)}} \sum_{i=1}^N \frac{\mathbb{1}(x_i=x_n)p(x_n)}{q(x_n)} $$
Using the formula for normalized importance sampling. The distribution we sample from, $q(x)$, is constructed as above by setting observed values. The true probability $p(x)$ is the multiplication of all of the normalized conditional probabilities regardless of whether the observed value was set or not. $q(x)$ is the same, but $1$ whereever the value was assigned by evidence. Therefore, $\frac{p(x)}{q(x)}$ (the weight) is just the product of the probabilities at the observed variables.

In [21]:
# assumes the first variable in each factor is dependent on all others in the factor
def likelihood_weighting_top_down(all_factors,known_vars,evidence):
    assigned_variable_names = []
    variable_assignments = []
    weight = 1
    remaining_factors = all_factors.copy()
    
    while(len(remaining_factors)>0):
        new_assigned_variable_names = assigned_variable_names.copy()
        new_variable_assignments = variable_assignments.copy()
        new_remaining_factors = []
        for f in remaining_factors:
            if(len(f.names)==1 or np.prod([i in assigned_variable_names for i in f.names[1:]])==1):
                var_dropped_factor = factors.drop_variables(f,assigned_variable_names,variable_assignments)
                conditioned_factor = factors.condition(var_dropped_factor)
                if(f.names[0] in known_vars):
                    evid = evidence[known_vars.index(f.names[0])]
                    new_variable_assignments.append(evid)
                    weight *= conditioned_factor.get([evid])
                else:
                    sample = factors.sample(conditioned_factor,1)[0][0]
                    new_variable_assignments.append(sample)
                    
                new_assigned_variable_names.append(f.names[0])
            else:
                new_remaining_factors.append(f)
        assigned_variable_names = new_assigned_variable_names
        variable_assignments = new_variable_assignments
        remaining_factors = new_remaining_factors
    return assigned_variable_names,variable_assignments,weight

**Checking results again:**

In [25]:
all_LW_samples = []
all_LW_weights = []
for n in range(1000):
    names,assignments,weight = likelihood_weighting_top_down(all_factors,["A"],[0])
    all_LW_samples.append(assignments)
    all_LW_weights.append(weight)
all_LW_samples = np.array(all_LW_samples)
all_LW_weights = np.array(all_LW_weights)

prob_C_given_A_LW_sampled = factors.Factor(["C"],[2])
prob_C_given_A_LW_sampled.set([0],np.sum(all_LW_weights*(all_LW_samples[:,2]==0))/np.sum(all_LW_weights))
prob_C_given_A_LW_sampled.set([1],np.sum(all_LW_weights*(all_LW_samples[:,2]==1))/np.sum(all_LW_weights))
print(prob_C_given_A_LW_sampled)

C  Values (10 dp)
0  0.469
1  0.531



### Gibbs sampling