# Pattern Recognition homework

## EM Algorithm - (a)

In [1]:
import numpy as np
from scipy.stats import multivariate_normal

### Generate the dataset

In [2]:
def generate_dataset():
    m1 = np.array([1, 1])
    m2 = np.array([3, 3])
    m3 = np.array([2, 6])
    s1 = np.matrix([[0.1, 0], [0, 0.1]])
    s2 = np.matrix([[0.2, 0], [0, 0.2]])
    s3 = np.matrix([[0.3, 0], [0, 0.3]])
    
    norm1 = multivariate_normal(mean=m1, cov=s1)
    norm2 = multivariate_normal(mean=m2, cov=s2)
    norm3 = multivariate_normal(mean=m3, cov=s3)
    
    dataset = np.zeros((2, 1000)) # each column corresponds to a data
    
    for i in range(1000):
        decide_pdf = np.random.randint(0, 10)
        if 0 <= decide_pdf <= 3:
            dataset[:, i] = norm1.rvs()
        elif 4 <= decide_pdf <= 7:
            dataset[:, i] = norm2.rvs()
        else:
            dataset[:, i] = norm3.rvs()
    
    return dataset

In [3]:
dataset = generate_dataset()

### Define functions and variables of EM algorithm

In [4]:
alpha = np.zeros(3) # each element corresponds to a model
mu = np.zeros((2, 3)) # each column corresponds to a model
cov = np.zeros((2, 2, 3))
expect = np.zeros((3, 1000)) # each column corresponds to a data

In [5]:
def pdf(X, mu_l, cov_l):
    norm = multivariate_normal(mean=mu_l, cov=cov_l, allow_singular=True)
    return norm.pdf(X)


def e_step(dataset, alpha, mu, cov, expect, J):
    
    for l in range(J):
        numera = alpha[l] * pdf(np.transpose(dataset), mu[:, l], cov[:, :, l])

        denomi = 0
        for iter_l in range(J):
            denomi += alpha[iter_l] * pdf(np.transpose(dataset), mu[:, iter_l], cov[:, :, iter_l])

        expect[l, :] = numera / denomi
        
    return expect
            
        
def m_step(dataset, alpha, mu, cov, expect, J):
    
    # calculate new alpha
    alpha = np.sum(expect, axis=1) / dataset.shape[1]
    
    # calculate new mu
    for l in range(J):
        # expect[l]: probability density of each data of model l
        numera = np.sum(expect[l] * dataset, axis=1)
        denomi = np.sum(expect[l])
        mu[:, l] = numera / denomi
    
    # calculate new cov
    for l in range(J):
        
        numera = np.zeros((2, 2))
        for i in range(dataset.shape[1]):
            diff_vec = np.reshape(dataset[:, i] - mu[:, l], (2, 1))
            numera += expect[l, i] * diff_vec.dot(np.transpose(diff_vec))
        
        denomi = np.sum(expect[l])
        
        cov[:, :, l] = numera / denomi
        
    return alpha, mu, cov

### Initialize the variables and run EM algorithm

#### (a) - (1)

In [6]:
J = 3

alpha[:] = 1/3

mu[:, 0] = [0, 2]
mu[:, 1] = [5, 2]
mu[:, 2] = [5, 5]

cov[:, :, 0] = 0.15 * np.identity(2)
cov[:, :, 1] = 0.27 * np.identity(2)
cov[:, :, 2] = 0.4 * np.identity(2)

for i in range(200):
    expect = e_step(dataset, alpha, mu, cov, expect, J)
    alpha, mu, cov = m_step(dataset, alpha, mu, cov, expect, J)
    if i == 199:
        print('em iteration i =', i)
        for k in range(J):
            print('alpha', k, '=', alpha[k])
        for k in range(J):
            print('mu', k, '=', mu[:, k])
        for k in range(J):
            print('cov', k, '=\n', cov[:, :, k])

em iteration i = 199
alpha 0 = 0.4109588768253148
alpha 1 = 0.41503857865526234
alpha 2 = 0.17400254451942282
mu 0 = [1.0116826  0.99781589]
mu 1 = [3.01182016 2.98883536]
mu 2 = [2.03959909 5.98347388]
cov 0 =
 [[ 0.09675528 -0.0017855 ]
 [-0.0017855   0.10538203]]
cov 1 =
 [[0.18589021 0.00947531]
 [0.00947531 0.20210651]]
cov 2 =
 [[ 0.25975244 -0.01719009]
 [-0.01719009  0.2662268 ]]


#### (a) - (2)

In [7]:
J = 3

alpha[:] = [0.2, 0.4, 0.4]

mu[:, 0] = [1.6, 1.4]
mu[:, 1] = [1.4, 1.6]
mu[:, 2] = [1.3, 1.5]

cov[:, :, 0] = 0.2 * np.identity(2)
cov[:, :, 1] = 0.4 * np.identity(2)
cov[:, :, 2] = 0.3 * np.identity(2)

for i in range(200):
    expect = e_step(dataset, alpha, mu, cov, expect, J)
    alpha, mu, cov = m_step(dataset, alpha, mu, cov, expect, J)
    if i == 199:
        print('em iteration i =', i)
        for k in range(J):
            print('alpha', k, '=', alpha[k])
        for k in range(J):
            print('mu', k, '=', mu[:, k])
        for k in range(J):
            print('cov', k, '=\n', cov[:, :, k])

em iteration i = 199
alpha 0 = 0.18107452213827144
alpha 1 = 0.5894317582205445
alpha 2 = 0.22949371964118406
mu 0 = [0.9921593  1.02795673]
mu 1 = [2.72417729 3.871655  ]
mu 2 = [1.0253246  0.97375114]
cov 0 =
 [[ 0.10599418 -0.05088895]
 [-0.05088895  0.10294813]]
cov 1 =
 [[ 0.40448068 -0.60276652]
 [-0.60276652  2.09103996]]
cov 2 =
 [[0.08731931 0.03744372]
 [0.03744372 0.10625254]]


#### (a) - (3)

In [9]:
alpha = np.zeros(2) # each element corresponds to a model
mu = np.zeros((2, 2)) # each column corresponds to a model
cov = np.zeros((2, 2, 2))
expect = np.zeros((2, 1000)) # each column corresponds to a data

J = 2

alpha[:] = [0.5, 0.5]

mu[:, 0] = [1.6, 1.4]
mu[:, 1] = [1.4, 1.6]

cov[:, :, 0] = 0.2 * np.identity(2)
cov[:, :, 1] = 0.4 * np.identity(2)

for i in range(200):
    expect = e_step(dataset, alpha, mu, cov, expect, J)
    alpha, mu, cov = m_step(dataset, alpha, mu, cov, expect, J)
    if i == 199:
        print('em iteration i =', i)
        for k in range(J):
            print('alpha', k, '=', alpha[k])
        for k in range(J):
            print('mu', k, '=', mu[:, k])
        for k in range(J):
            print('cov', k, '=\n', cov[:, :, k])

em iteration i = 199
alpha 0 = 0.4106782649540512
alpha 1 = 0.5893217350459488
mu 0 = [1.01098326 0.99764757]
mu 1 = [2.7242981  3.87219858]
cov 0 =
 [[ 0.09610188 -0.00195991]
 [-0.00195991  0.10539676]]
cov 1 =
 [[ 0.4044806  -0.60323608]
 [-0.60323608  2.08991349]]


#### (a) - (4): Discuss the results
* Initialization affects the estimation results a lot.
    * The result of (1) is very close to the parameter values used to generate the dataset.
    * The result of (2) is not close to the parameter values used to generate the dataset although the initialization doesn't look unreasonable.
* The first estimated model of (3) seems close to the first model used to generate the dataset, while the second estimated model of (3), according to its alpha and mu, seems like the combination of the second and the third models used to generate the dataset.