# Posterior Sampling for Gaussian Mixture Model with CRP using Gibbs sampler

Reference: https://pdfs.semanticscholar.org/9ece/0336316d78837076ef048f3d07e953e38072.pdf

In [4]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

In [5]:
# Lets generate some data
X1 = np.random.multivariate_normal([5, 5], np.diag([0.5, 0.5]), size=20)
X2 = np.random.multivariate_normal([8, 8], np.diag([0.5, 0.5]), size=20)
X3 = np.random.multivariate_normal([20, 20], np.diag([0.5, 0.5]), size=10)

X = np.vstack([X1, X2, X3])

X

array([[  5.69495732,   4.94284443],
       [  5.68782589,   4.47679027],
       [  5.07670341,   5.27024669],
       [  5.01264355,   4.85634392],
       [  4.76901253,   5.33982979],
       [  6.13146084,   4.80160574],
       [  5.09411255,   5.71230429],
       [  5.45426402,   5.82271327],
       [  5.41291749,   6.47048614],
       [  4.46493235,   5.39304167],
       [  4.93414211,   5.7132852 ],
       [  5.008842  ,   4.38361214],
       [  5.78992476,   4.36240139],
       [  4.74251203,   5.94154687],
       [  6.25891103,   5.09371976],
       [  5.53060006,   6.80101313],
       [  5.90391398,   4.0444675 ],
       [  3.8510053 ,   5.42636835],
       [  3.15902075,   4.53601873],
       [  5.04865555,   5.52438314],
       [  7.94303999,   8.78235396],
       [  6.88585705,   7.04106302],
       [  7.45988939,   7.86791135],
       [  7.04769455,   7.29522411],
       [  8.7342623 ,   7.42250037],
       [  7.79284563,   9.05105669],
       [  7.10191592,   7.51952475],
 

In [6]:
N, D = X.shape

In [7]:
# GMM paramters
mus = [] # This lists a 2x1 vector (mean vector of each gaussian)
sigma = np.eye(D)
precision = np.linalg.inv(sigma)
zs = np.zeros([N], dtype=int)
C = [] # Cluster, bu=inary matrix of KxM
Ns = [] # Count of each cluster

In [10]:
sigma

array([[ 1.,  0.],
       [ 0.,  1.]])

In [11]:
precision

array([[ 1.,  0.],
       [ 0.,  1.]])

In [12]:
zs

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0])

In [13]:
# CRP prior
alpha = 200

In [14]:
# Base distribution prior: N(mutheta, precisiontheta)
mu_theta = np.ones(D)
sigma_theta = np.eye(D)
precision_theta = np.linalg.inv(np.eye(D))
G_theta = stats.multivariate_normal(mean=mu_theta, cov=np.eye(D))

In [15]:
# Initiate the ONE cluster
C.append(np.ones(N, dtype=int))
zs[:] = 0
Ns.append(N)
mus.append(G_theta.rvs())

K = 1
mvn = stats.multivariate_normal

In [None]:
# Gibbs sampler
for it in range(20):
    # here we sample from full conditional of assignment from CRP prior
    # z ~ GEM(alpha)
    # Now, for each data point, draw a cluster assignment
    for i in range(N):
        