In [15]:
import numpy as np
from theano import shared, function, scan, tensor as T

In [9]:
batch_size = 20
num_vis = 15
num_hid = 10

## (Bernoulli) Restricted Bolztman Machine (RBM)

* binary valued with $v \in \{0,1\}$

* wake (positive) phase - visible units clamped
* sleep (negative) phase - unclamped units

In [10]:
# parameters
W = shared(np.random.rand(num_vis, num_hid), allow_downcast=True)
b_v = shared(np.random.rand(num_vis,), allow_downcast=True)
b_h = shared(np.random.rand(num_hid,), allow_downcast=True)

In [156]:
v = T.fmatrix('v') # (batch_size, num_vis)
h = T.fmatrix('h') # (batch_size, num_hid)

In [90]:
def sample_h_given_v(self, v):
    """
    v ... visible unit of shape (batch_size, num_vis)
    h ... hidden unit of shape (batch_size, num_hid)
    sampling h ~ p(h_i=1|v) = sigm( v^T W_{i} + b^h_i)
    """
    vW = T.dot(v, self.W) # (batch_size, num_hid)
    h_pre = vW + self.b_h 
    h_mean = T.nnet.sigmoid(h_pre)
    h_samp = self.srng.binomial(size=h_mean.shape, n=1, p=h_mean,
                                dtype=theano.config.floatX)
    return [h_pre, h_mean, h_samp] # 3x(batch_size, num_hid)

def sample_v_given_s_h(self, s, h):
    """
    v ... visible of shape (batch_size, num_vis)
    h ... spike hidden of shape (batch_size, num_vis)
    sampling v ~ p(v_j=1|h) = sigm(W^T_{j} h^T + b_j)
    """
    Wh = T.dot(h, self.W.T) # (batch_size, num_vis)
    v_pre = Wh + self.b_v
    v_mean = T.nnet.sigmoid(v_pre)
    v_samp = self.srng.binomial(size=h_mean.shape, n=1, p=h_mean,
                                dtype=theano.config.floatX)
    return [v_pre, v_mean, v_samp] # 3x(batch_size, num_vis)

Energy function:

$$E(v,h) = - v^T W h - b_h^T h - b_v^T v$$

$$p(v, h) \propto \exp\left(- E(v, h)\right)$$  

$$ p(v) = \sum_h p(v,h) = \sum_h\frac{\exp(−E(v,h))}{Z}$$

Free energy: 

$$ F(v) = - \log(p(v)) \\  
= - \sum_{i=1}^N \log \left (1 + \exp\left(v^T W_i + {b_h}_i \right) \right) - b^T_v v - logZ\\
= - \sum_{i=1}^N softplus \left (v^T W_i + b_i \right) - b^T_v v - logZ $$

In [2]:
def free_energy_spike_n_slab(self, v):
    fe_vis_term = - T.dot(v, self.b_v) # (batch_size, num_vis) x (num_vis,) = (batch_size,)
    fe_hid_pre = T.dot(v, self.W) + self.b_h # (batch_size, num_hid)
    fe_hid_term = - T.nnet.softplus(fe_hid_pre).sum(axis=1) # (batch_size,...)
    return fe_vis_term + fe_hid_term

## Truncated Exponential RBM

* real-valued with $v \in [0, K], K < \infty$

In [4]:
def sample_h_given_v(self, v):
    """
    v ... visible unit of shape (batch_size, num_vis)
    h ... hidden unit of shape (batch_size, num_hid)
    sampling h ~ p(h_i=1|v) = sigm( v^T W_{i} + b^h_i)
    """
    vW = T.dot(v, self.W) # (batch_size, num_hid)
    h_pre = vW + self.b_h 
    h_mean = T.nnet.sigmoid(h_pre)
    h_samp = self.srng.binomial(size=h_mean.shape, n=1, p=h_mean,
                                dtype=theano.config.floatX)
    return [h_pre, h_mean, h_samp] # 3x(batch_size, num_hid)

def sample_v_given_s_h(self, s, h):
    """
    v ... visible of shape (batch_size, num_vis)
    h ... spike hidden of shape (batch_size, num_vis)
    sampling v ~ p(v_j=1|h) = sigm(W^T_{j} h^T + b_j)
    """
    Wh = T.dot(h, self.W.T) # (batch_size, num_vis)
    v_pre = Wh + self.b_v
    v_samp = v_mean = v_pre
    return [v_pre, v_mean, v_samp] # 3x(batch_size, num_vis)

In [None]:
def free_energy_spike_n_slab(self, v):
    fe_vis_term = 0.5 * T.sum((v - self.b_v)**2, axis=1) # (batch_size,)
    fe_hid_pre = T.dot(v, self.W) + self.b_h # (batch_size, num_hid)
    fe_hid_term = - T.nnet.softplus(fe_hid_pre).sum(axis=1) # (batch_size,...)
    return fe_vis_term + fe_hid_term

## Gaussian RBM (GRBM)

* real-valued with $v \in \mathbb{R}$

In [6]:
def sample_h_given_v(self, v):
    """
    v ... visible unit of shape (batch_size, num_vis)
    h ... hidden unit of shape (batch_size, num_hid)
    sampling h ~ p(h_i=1|v) = sigm( v^T W_{i} + b^h_i)
    """
    vW = T.dot(v, self.W) # (batch_size, num_hid)
    h_pre = vW + self.b_h 
    h_mean = T.nnet.sigmoid(h_pre)
    h_samp = self.srng.binomial(size=h_mean.shape, n=1, p=h_mean,
                                dtype=theano.config.floatX)
    return [h_pre, h_mean, h_samp] # 3x(batch_size, num_hid)

def sample_v_given_h(self, h):
    """
    v ... visible of shape (batch_size, num_vis)
    h ... spike hidden of shape (batch_size, num_vis)
    sampling v ~ p(v_j=1|h) = sigm(W^T_{j} h^T + b_j)
    """
    Wh = T.dot(h, self.W.T) # (batch_size, num_vis)
    v_pre = Wh + self.b_v
    v_samp = v_mean = v_pre
    return [v_pre, v_mean, v_samp] # 3x(batch_size, num_vis)

In [5]:
def free_energy_spike_n_slab(self, v):
    fe_vis_term = 0.5 * T.sum((v - self.b_v)**2, axis=1) # (batch_size,)
    fe_hid_pre = T.dot(v, self.W) + self.b_h # (batch_size, num_hid)
    fe_hid_term = - T.nnet.softplus(fe_hid_pre).sum(axis=1) # (batch_size,...)
    return fe_vis_term + fe_hid_term