# Problem 1

We use the provided package. However, its message passing methods are numerically unstable, so we edit those.

PS 3 - Question 2 <br>
Inference and  Representation<br>
NYU Center for Data Science<br>
October 3, 2017

It is a Python adaptation of the Matlab code provided in Brown University CS242 Homework 1:
http://cs.brown.edu/courses/cs242/assignments/
The factor graph library (fglib) is a Python 3 package to simulate message passing on factor graphs: https://github.com/danbar/fglib

# We use an edit of the belief_propagation method from the inference package which has numerically stable estimation through a custom `spa` method and returns all marginal distributions

In [1]:
import numpy as np 
import networkx as nx
from fglib import graphs, nodes, rv, inference

class LoggedDiscrete(rv.Discrete):
    """
    Same as discrete RV, but with pmf expressed as logarithms
    
    We override the unity, __mul__, normalize, marginalize, argmax methods
    Notably not overridden is the maximize method
    """
    
    def __init__(self, raw_pmf, *args):
        super().__init__(raw_pmf, *args)
    
    @classmethod
    def unity(cls, *args):
        """Initialize unit element of a discrete random variable.
        Args:
            *args: Instances of the class VNode representing the variables of
                the probability mass function. The number of the positional
                arguments must match the number of dimensions of the Numpy
                array.
        Raises:
            ParameterException: An error occurred initializing with invalid
                parameters.
        """
        n = len(args)
        return cls(np.zeros((1,) * n), *args)
    
    
    def __mul__(self, other):
        """Multiply other with self and return the result.
        Args:
            other: Multiplier for the discrete random variable.
        Returns:
            A new discrete random variable representing the multiplication.
        """
        # Verify dimensions of multiplicand and multiplier.
        if len(self.dim) < len(other.dim):
            self._expand(other.dim, other.pmf.shape)
        elif len(self.dim) > len(other.dim):
            other._expand(self.dim, self.pmf.shape)

        pmf = self.pmf + other.pmf

        return LoggedDiscrete(pmf, *self.dim)
    
#     def __add__(self, other):
#         return self.__mul__(other)
    
    
    def normalize(self):
        """Normalize probability mass function."""
        m = np.max(self.pmf)
        # subtract max to avoid numerical underflow
        pmf = np.exp(self.pmf - m)
        # normalize
        pmf = pmf / np.abs(np.sum(pmf))
        # back to logs
        pmf = np.log(pmf)
        return LoggedDiscrete(pmf, *self.dim)
    
    
    def marginalize(self, *dims, normalize=True):
        """Return the marginal for given dimensions.
        The probability mass function of the discrete random variable
        is marginalized along the given dimensions.
        Args:
            *dims: Instances of discrete random variables, which should be
                marginalized out.
            normalize: Boolean flag if probability mass function should be
                normalized after marginalization.
        Returns:
            A new discrete random variable representing the marginal.
        """
        axis = tuple(idx for idx, d in enumerate(self.dim) if d in dims)
#         print('Premarginalization is')
#         print(np.exp(self.pmf))
        m = np.max(self.pmf)
        e = np.exp(self.pmf - m)
        pmf = np.sum(e, axis)
        pmf = np.log(pmf) + m
#         print('Post marginalization is ')
#         print(np.exp(pmf))
        
#         pmf = np.sum(self.pmf, axis)
        if normalize:
            m = np.max(pmf)
            e = np.exp(pmf - m)
            pmf = np.log(e / np.sum(e)) + m

        new_dims = tuple(d for d in self.dim if d not in dims)
        return LoggedDiscrete(pmf, *new_dims)
    
    
    def argmax(self, dim=None):
        """Return the dimension index of the maximum.
        Args:
            dim: An optional discrete random variable along a marginalization
                should be performed and the maximum is searched over the
                remaining dimensions. In the case of None, the maximum is
                search along all dimensions.
        Returns:
            An integer representing the dimension of the maximum.
        """
        if dim is None:
            return np.unravel_index(self.pmf.argmax(), self.pmf.shape)
        m = self.marginalize(dim)
        return np.argmin(m.pmf)
    
    def from_discrete(cls, discrete):
        """
        Converts a discrete RV object to a LoggedDiscrete object
        """

In [2]:
class LoggedFNode(nodes.FNode):
    
    def __init__(self, label, factor=None):
        """Create a factor node."""
        super().__init__(label, factor)
    
    def spa(self, tnode):
        """Return message of the sum-product algorithm."""
        # Initialize with local factor
        msg = self.factor

        # Product over incoming messages
        for n in self.neighbors(tnode):
#             print('Starting message is') 
#             print(np.exp(msg.pmf))
#             print('With type')
#             print(type(msg))
            msgs = self.graph[n][self]['object'].get_message(n, self)
#             print('Added message is')
#             print(msgs)
#             print(np.exp(msgs.pmf))
#             print('With type')
#             print(type(msgs))
            msg *= self.graph[n][self]['object'].get_message(n, self)
#             print('Final message is ')
#             print(np.exp(msg.pmf))
#             print('Next loop now \n')
        
#         print('Type at marginalization')
#         print(type(msg))
        # Integration/Summation over incoming variables
        for n in self.neighbors(tnode):
            msg = msg.marginalize(n, normalize=False)

        return msg
    
class LoggedVNode(nodes.VNode):
    
    def __init__(self, label, rv_type, observed=False):
        """Create a variable node."""
        super().__init__(label, rv_type, observed)
    
    def belief(self, normalize=True):
        """Return belief of the variable node.
        Args:
            normalize: Boolean flag if belief should be normalized.
        """
        iterator = self.graph.neighbors(self)

        # Pick first node
        n = next(iterator)

        # Product over all incoming messages
        belief = self.graph[n][self]['object'].get_message(n, self)
        for n in iterator:
            belief *= self.graph[n][self]['object'].get_message(n, self)

        if normalize:
            belief = belief.normalize()

        return belief
    
    
    def maximum(self, normalize=True):
        """Return the maximum probability of the variable node.
        Args:
            normalize: Boolean flag if belief should be normalized.
        """
        b = self.belief(normalize)
        return np.amin(b.pmf)
    
    
    def argmax(self):
        """Return the argument for maximum probability of the variable node."""
        # In case of multiple occurrences of the maximum values,
        # the indices corresponding to the first occurrence are returned.
        b = self.belief()
        return b.argmin(self)
    
    
    def spa(self, tnode):
        """Return message of the sum-product algorithm."""
        if self.observed:
            return self.init
        else:
            # Initial message
            msg = self.init
#             print('Measuring pre type of vnode spa')
#             print(type(msg))
            # Product over incoming messages
            for n in self.neighbors(tnode):
#                 print('Adding message of type')
#                 print(type(self.graph[n][self]['object'].get_message(n, self)))
#                 print('For {} and {}'.format(str(n), str(self)))
                msg *= self.graph[n][self]['object'].get_message(n, self)
#             print('Measuring post type of vnode spa')
#             print(type(msg))
#             print('\n')
            return msg
        

In [3]:
def get_beliefs(fg):
    """Belief propagation.
    Perform exact inference on tree structured fgs.
    Return the belief of all query_nodes.
    """ 
    # pick a random RV to start
    from random import choice
    query_node = choice(fg.get_vnodes())

    # Depth First Search to determine edges
    dfs = nx.dfs_edges(fg, query_node)

    # Convert tuple to reversed list
    backward_path = list(dfs)
    forward_path = reversed(backward_path)    

    # Messages in forward phase
    for (v, u) in forward_path:  # Edge direction: u -> v
        msg = u.spa(v)
        fg[u][v]['object'].set_message(u, v, msg)

    # Messages in backward phase
    for (u, v) in backward_path:  # Edge direction: u -> v
        msg = u.spa(v)
        fg[u][v]['object'].set_message(u, v, msg)

    # Retrieve marginal distributions
    beliefs = []
    for n in fg.get_vnodes():
        beliefs.append(n.belief())
    
    return beliefs

In [4]:
# Create factor graph
fg = graphs.FactorGraph()

# Create variable nodes
x1 = LoggedVNode("x1", LoggedDiscrete)
x2 = LoggedVNode("x2", LoggedDiscrete)
x3 = LoggedVNode("x3", LoggedDiscrete)
x4 = LoggedVNode("x4", LoggedDiscrete)

# Create factor nodes
f12 = LoggedFNode("f12")
f234 = LoggedFNode("f234")
f3 = LoggedFNode("f3")
f4 = LoggedFNode("f4")

# Add nodes to factor graph
fg.set_nodes([x1, x2, x3, x4])
fg.set_nodes([f12, f234, f3,f4 ])

# Add edges to factor graph
fg.set_edge(x1, f12)
fg.set_edge(f12, x2)
fg.set_edge(x2, f234)
fg.set_edge(f234, x3)
fg.set_edge(f234, x4)
fg.set_edge(x3, f3)
fg.set_edge(x4, f4)

#add potential for f_3: p(x3)
dist_f3 = np.log([0.5, 0.5]).tolist()
f3.factor = LoggedDiscrete(dist_f3,x3)

#add potential for f_4: p(x4)
dist_f4 = np.log([0.4,0.6]).tolist()
f4.factor = LoggedDiscrete(dist_f4,x4)

# add potential for f_{234}: p(x2, x3, x4) = p(x2|x3,x4) p(x3,x4)
px3x4=np.log(np.outer(np.exp(dist_f3),np.exp(dist_f4))) # WHAT is this
px3x4=np.reshape(px3x4, np.shape(px3x4)+(1,))
px2_conditioned_x3x4=np.log([[[0.2,0.8],
                     [0.25,0.75],],
                     [[0.7,0.3],
                     [0.3,0.7]]]).tolist()

dist_f234 =px3x4+px2_conditioned_x3x4 # __mul__
f234.factor = LoggedDiscrete(dist_f234,x3,x4,x2)

# add potential for f_{12}:  p (x1,x2) = p(x1 | x2) p(x2)
px1_conditioned_x2 = np.log([[0.5,0.5],
                     [0.7,0.3]]).tolist()
# px2= np.sum(dist_f234, axis=(0,1)) 
# REPLACED with marginalization
# use log-sum-exp
m = np.max(dist_f234)
px2 = np.log(np.exp(dist_f234 - m).sum(axis=(0,1))) + m
dist_f12 = px2[:,np.newaxis]+px1_conditioned_x2 # __mul__
f12.factor = LoggedDiscrete(dist_f12,x2,x1)
# Perform sum-product algorithm on factor graph
# and request belief of variable node x1
# belief = inference.sum_product(fg, x3)

In [5]:
beliefs = get_beliefs(fg)
# Print belief of variable nodes
print("Belief of variable nodes ")
for belief in beliefs:
    print(np.exp(belief.pmf))

Belief of variable nodes 
[0.65897284 0.34102716]
[0.20513578 0.79486422]
[0.52640912 0.47359088]
[0.28679718 0.71320282]


In [66]:
np.exp(px3x4)

array([[[0.2],
        [0.3]],

       [[0.2],
        [0.3]]])

In [41]:
np.exp(px2_conditioned_x3x4)

array([[[0.2 , 0.8 ],
        [0.25, 0.75]],

       [[0.7 , 0.3 ],
        [0.3 , 0.7 ]]])

In [67]:
beliefs = get_beliefs(fg)
# Print belief of variable nodes
print("Belief of variable nodes ")
for belief in beliefs:
    print(np.exp(belief.pmf))

Starting message is
[[[0.04  0.16 ]
  [0.075 0.225]]

 [[0.14  0.06 ]
  [0.09  0.21 ]]]
Added message is
[-0.69314718 -0.69314718]
[0.5 0.5]
Final message is 
[[[0.02   0.08  ]
  [0.0375 0.1125]]

 [[0.07   0.03  ]
  [0.045  0.105 ]]]
Next loop now 

Starting message is
[[[0.02   0.08  ]
  [0.0375 0.1125]]

 [[0.07   0.03  ]
  [0.045  0.105 ]]]
Added message is
[-0.91629073 -0.51082562]
[0.4 0.6]
Final message is 
[[[0.008  0.032 ]
  [0.0225 0.0675]]

 [[0.028  0.012 ]
  [0.027  0.063 ]]]
Next loop now 

Starting message is
[[0.1725 0.1725]
 [0.4585 0.1965]]
Added message is
[[0.]
 [0.]]
[[1.]
 [1.]]
Final message is 
[[0.1725 0.1725]
 [0.4585 0.1965]]
Next loop now 

Starting message is
[[0.1725 0.1725]
 [0.4585 0.1965]]
Added message is
[-2.4592389  -1.74583054]
[0.0855 0.1745]
Final message is 
[[0.01474875 0.01474875]
 [0.08000825 0.03428925]]
Next loop now 

Starting message is
[[[0.04  0.16 ]
  [0.075 0.225]]

 [[0.14  0.06 ]
  [0.09  0.21 ]]]
Added message is
[-1.06421086 -0.423

In [49]:
# initialize factor to variable data structure
# keys are factors, variables
f2v = {}
for f in fg.get_fnodes():
    vs = f.neighbors()
    for v in vs:
        f2v[f][v] = 0

# initialize variable to factor data structure
# keys are variables, factors
v2f = {}
for v in fg.get_vnodes():
    fs = v.neighbors()
    for f in fs:
        v2f[v][f] = 1

# variable to factor pass
for f in fg.get_fnodes():
    vs = f.neighbors()
    for v in vs:
        s = 0 # log-message sum
        # sum up the logged factor to variable messages for v
        for f_ in v.neighbors(f):
            s = s + f2v[f_][v]
        v2f[v][f] = s
        
# factor to variable pass
for v in df.get_vnodes():
    fs = v.neighbors()
    ss = {}
    # get sum of log-messages
    for f in fs:
        s = 0 # log-message sum
        # sum up the logged variable to factor messages for v
        for v_ in f.neighbors(v):
            s = s + v2f[v_][f]
        ss[f] = s
    # subtract the max to ensure numerical instability upon exponentiation
    m = max(ss.values())
    for f in fs:
        s2p = 0
        for POSSIBLE SLICE OF PMF:
            s2p = s2p + f.marginalize(v) * np.exp(s - m)
        # transform back to log space
        f2v[f][v] = np.log(s2p)
        
    # operate the 

f12 x1
f12 x2
f234 x2
f234 x3
f234 x4
f3 x3
f4 x4


In [5]:
query_node = fg.get_vnodes()[0]

# Depth First Search to determine edges
dfs = nx.dfs_edges(fg, query_node)

# Convert tuple to reversed list
backward_path = list(dfs)
forward_path = reversed(backward_path)    

In [6]:
a = next(forward_path)

In [7]:
np.exp(a[0].spa(a[1]).pmf)

AttributeError: 'NoneType' object has no attribute 'dim'

(<__main__.LoggedVNode at 0x7f545fc88f28>,
 <__main__.LoggedFNode at 0x7f545fca90b8>)

In [12]:
self.graph[n][self]['object'].get_message(n, self)

In [14]:
self.init.pmf

array([0.])

In [9]:
self = a[0]
tnode = a[1]

if self.observed:
    msg = self.init
else:
    # Initial message
    msg = self.init

    # Product over incoming messages
    for n in self.neighbors(tnode):
        msg += self.graph[n][self]['object'].get_message(n, self)

msg.pmf

AttributeError: 'NoneType' object has no attribute 'dim'

In [97]:
self.init.pmf

array([0.])

In [110]:
n = list(self.neighbors(tnode))[0]

In [114]:
self.graph[n][self]['object'].get_message(n, self).pmf

array([-24.44677742, -22.66631196])

In [109]:
np.exp(list(self.neighbors(tnode))[0].factor.pmf)

array([[[0.04 , 0.16 ],
        [0.075, 0.225]],

       [[0.14 , 0.06 ],
        [0.09 , 0.21 ]]])