In [2]:
import numpy as np
from tqdm import tqdm

### Exact Inference

- Edges are potential functions -> p(A,B) propto phi(A,B)
- Joint probability distribution factorises into cliques potentials
- 

In [3]:
N = 10
lattice = np.reshape(np.arange(1, N*N+1), (N, N))

In [4]:
def potential_grid(beta):
    return np.exp(beta * np.eye(2))

In [5]:
def get_nearest_neighbors(i, j, N):
    # only right and below neighbours to avoid double counting
    neighbors = []
    if i > 0:
        neighbors.append((i-1, j))
    if i < N-1:
        neighbors.append((i+1, j))
    if j > 0:
        neighbors.append((i, j-1))
    if j < N-1:
        neighbors.append((i, j+1))
        
    return neighbors

In [26]:
def ising_exact_inference(temp):
    N = 10

    # Form the potentials for each pair of neighboring variables
    potentials = potential_grid(temp)

    # Create a grid of variables
    S = np.arange(1, N*N+1).reshape((N, N))

    # store the potentials in a dictionary - keyed by the index of the factor
    phi = {}
    # Create a graph of indices for the pairwise potentials - dimensions (x1, x2, y1, y2)
    factor_graph = np.zeros((N, N, N, N), dtype=int)

    c = 0
    # Compute pairwise potentials and factor graph
    for s1 in range(1, N*N+1):
        i1, j1 = np.argwhere(S == s1)[0]
        for s2 in range(s1+1, N*N+1):
            i2, j2 = np.argwhere(S == s2)[0]
            # If the variables are neighbors store potentials in phi and index in factor graph
            if (j1 == j2 and np.abs(i1 - i2) == 1) or (i1 == i2 and np.abs(j1 - j2) == 1):
                # increment factor index and store potential
                c += 1
                phi[c] = potentials
                # build factor graph - undirected graph therefore add both directions
                factor_graph[i1, i2, j1, j2] = c
                factor_graph[i2, i1, j2, j1] = c

    # Create row potentials - product of all pairwise potentials in the row
    rowphi = [np.ones((2, 2))]
    # traverse the rows - top to bottom
    for i in range(1, N-1):
        # initialise row potential as identity matrix
        rowphi_i = np.ones((2, 2))
        # traverse the columns - left to right
        for j in range(1, N-1):
            # multiply by below neighbour potentials
            rowphi_i = np.multiply(rowphi_i, phi[factor_graph[i, i, j, j+1]])
            # multiply by right neighbour potentials
            rowphi_i = np.multiply(rowphi_i, phi[factor_graph[i, i+1, j, j]])

        # multiply by below neighbour potentials for last column 
        # (no right neighbour since last column)
        rowphi_i = np.multiply(rowphi_i, phi[factor_graph[i, i+1, N-1, N-1]])
        rowphi.append(rowphi_i)

    # Compute the row potential for the last row
    # (no below neighbour since last row)
    rowphi_N = np.ones((2, 2))
    for j in range(N-1):
        rowphi_N = np.multiply(rowphi_N, phi[factor_graph[N-1, N-1, j, j+1]])
    rowphi.append(rowphi_N)

    # Belief propagation on collapsed graph
    # Initialise messages
    messages = [np.ones((2, 2))]
    for i in range(1, N):
        messages.append(np.matmul(rowphi[i], messages[i-1]))

    # Compute the marginals
    marginals = []
    for i in range(N):
        marginals.append(np.multiply(messages[i], rowphi[i]))
        marginals[i] = marginals[i] / np.sum(marginals[i])

    # normalise the marginals
    marginals = np.array(marginals)
    
    return marginals, logZ


# Call the function
marginals, logZ = ising_exact_inference(1)

In [31]:
joint = marginals[0]*marginals[9]
joint = joint / np.sum(joint)
joint

array([[4.99938303e-01, 6.16972880e-05],
       [6.16972880e-05, 4.99938303e-01]])

### Gibbs Sampling

Algorithm
- sample state, x
- sample from proposal, x' (symmetric)
- compute acceptance ratio, a
- with probability a x = x'
- save x

In [7]:
def initial_state(N):
    return np.random.choice([-1, 1], size=(N, N))

In [33]:
import random
def gibbs_sampling(max_iter=10000, burn_in=1000, beta=1.0, N=10):

    # initialise the state
    state = initial_state(N)
    initial = state.copy()

    potentials = potential_grid(beta)

    samples = []
    for _ in range(max_iter):
        flip_count = 0
        for i in range(N):
            for j in range(N):
                # compute the conditional probability
                # p(x_ij = 1 | x_-ij) proportional to product of NN potentials
                nn_potentials = []
                for n in get_nearest_neighbors(i, j, N):
                    nn_potentials.append(potentials[state[i, j], state[n[0], n[1]]])
                p_tilde = np.prod(nn_potentials)
                # normalise
                p = p_tilde / (p_tilde + 1)
                # sample from the conditional distribution
                new_state = np.random.choice([-1, 1], p=[1-p, p])
                # if state flips update the state and flip count
                if new_state != state[i, j]:
                    flip_count += 1
                    state[i, j] = new_state
        # store the state
        samples.append(state.copy())
        # if no state flips in the last iteration break - fully converged
        if len(samples) > burn_in and flip_count == 0:
            break

    return samples[burn_in:], initial

In [40]:
samples, initial = gibbs_sampling(max_iter=1000, burn_in=100, beta=0.1, N=10)

In [41]:
def prob_distribution(samples, N=10):
    # compute the empirical distribution by counting the number of times each state is positive
    p = np.zeros((N, N))
    for sample in samples:
        for i in range(N):
            for j in range(N):
                if sample[i, j] == 1:
                    p[i, j] += 1
    # normalise
    p /= len(samples)

    return p

In [49]:
marginals = prob_distribution(samples)
joint_table = np.zeros([2,2])

joint_table[0,0] = marginals[0,9]*marginals[9,9]
joint_table[0,1] = marginals[0,9]*(1-marginals[9,9])
joint_table[1,0] = (1-marginals[0,9])*marginals[9,9]
joint_table[1,1] = (1-marginals[0,9])*(1-marginals[9,9])

joint_table

array([[0.30493827, 0.24395062],
       [0.25061728, 0.20049383]])

### Mean-Field Approximation

Process
- Determine factorisable distribution q
- ELBO -> KL Divergence


In [12]:
def proposal_distribution(alpha):
    q = np.exp(alpha) / (np.exp(alpha) + np.exp(-alpha))
    return q

In [13]:
def compute_elbo(alpha, beta, N):

    binary_entropy = 0
    energy = 0
    for i in range(N):
        for j in range(N):
            binary_entropy += np.log(np.exp(alpha[i,j]) + np.exp(-alpha[i,j])) - alpha[i,j] * np.tanh(alpha[i,j])
            for n in get_nearest_neighbors(i, j, N):
                energy += beta * np.tanh(alpha[i,j]) * np.tanh(alpha[n[0],n[1]])

    return binary_entropy + energy

In [14]:
def update_alpha(alpha, beta, N):

    for i in range(N):
        for j in range(N):
            for n in get_nearest_neighbors(i, j, N):
                    alpha[i,j] += beta * np.tanh(alpha[n[0],n[1]])

    return alpha

In [75]:
def coordinate_ascent(max_iter, beta, N, tolerance):
 
    state_0 = initial_state(N)
    alpha = np.random.uniform(size=(N, N))
    state = state_0.copy()

    # Placeholder for storing previous ELBO value
    prev_elbo = -np.inf

    iter = 0
    for _ in tqdm(range(max_iter)):
        iter += 1

        for i in range(N):
            for j in range(N):
                u = np.random.uniform()
                q = proposal_distribution(alpha[i,j])
                if u < q:
                    state[i,j] = 1
                else:
                    state[i,j] = -1
        # Compute ELBO
        elbo = compute_elbo(alpha, beta, N)
        
        # Check for convergence
        if np.abs(elbo - prev_elbo) == tolerance or elbo < prev_elbo:
            break
        
        alpha = update_alpha(alpha, beta, N)
        
        # Update previous ELBO value for convergence check
        prev_elbo = elbo

    return alpha, iter

In [79]:
alpha, iter = coordinate_ascent(max_iter=10000, beta=0.01, N=10, tolerance=1e-10)

  0%|          | 1/10000 [00:00<00:30, 323.53it/s]


In [80]:
alpha, iter

(array([[0.13140281, 0.72132764, 0.73518608, 0.78468446, 0.82251973,
         0.90454701, 0.09555338, 0.02281779, 0.11798546, 0.7884537 ],
        [0.6629321 , 0.69301719, 0.55863662, 0.15471899, 0.55757706,
         0.5412679 , 0.32427463, 0.46268601, 0.7794854 , 0.13457903],
        [0.13845465, 0.13638283, 0.71304976, 0.88536987, 0.94727623,
         0.7207491 , 0.93352044, 1.00054533, 0.80659246, 0.82893212],
        [0.06493832, 0.21994288, 0.88116516, 0.61229848, 0.74344839,
         0.55680402, 0.73720125, 0.81049918, 0.25390939, 0.11916099],
        [0.82992592, 0.63642282, 0.44532464, 0.46208465, 0.89259539,
         0.97898143, 0.73863817, 0.03656629, 0.7317334 , 0.41734226],
        [0.98774019, 0.7910785 , 0.12855522, 0.57970764, 0.17680485,
         0.49443661, 0.34259555, 0.80775986, 0.80361764, 0.73428685],
        [0.56513787, 0.03846341, 0.57194786, 0.02920397, 0.54696884,
         0.5453011 , 0.03718055, 0.48915341, 1.00947621, 0.37162815],
        [0.88669831, 0.6087

In [81]:
marginals = proposal_distribution(alpha)

joint_table = np.zeros([2,2])

joint_table[0,0] = marginals[0,9]*marginals[9,9]
joint_table[0,1] = marginals[0,9]*(1-marginals[9,9])
joint_table[1,0] = (1-marginals[0,9])*marginals[9,9]
joint_table[1,1] = (1-marginals[0,9])*(1-marginals[9,9])

joint_table

array([[0.69497813, 0.13378795],
       [0.14359157, 0.02764234]])