# EM Algorithm for Mikture of k Gaussians

## Setup

In [37]:
import numpy as np
from scipy.stats import norm

In [221]:
FILEPATH = 'em_data.txt'
data = []

with open(FILEPATH) as f:
    for line in f:
        data.append(float(line))
        
data = np.array(data)
data = np.reshape(data, (data.shape[0],1))

k = 5
num_iters = 5

params = list(zip(
    list(np.random.uniform(low=np.min(data), high=np.max(data), size=k)), 
    list(np.random.uniform(high=np.std(data, ddof=1), size=k))
))

weights = np.random.uniform(size=k)
weights /= np.sum(weights)

for _ in range(num_iters):
    probs = []
    for mean, std in params:
        probs.append(norm.pdf(data, loc=mean, scale=std))

    probs = np.hstack(probs)

    preds = probs * weights
    preds /= np.sum(preds, axis=1, keepdims=True)

    n = np.sum(preds, axis=0) 
    weights = n / data.shape[0]

    means = np.sum(preds * data, axis=0) / n
    stdevs = np.sum(preds * ((np.repeat(data, means.shape[0], axis=1) - means) ** 2), axis=0) / n
    params = list(zip(
        list(means),
        list(stdevs)
    ))

In [222]:
params

[(6.583928378811735, 2.5107226733434536e-06),
 (15.324708817035, 66.75407453748434),
 (15.324710473450004, 66.75405764812999),
 (26.315136512667028, 0.0028654796536896096),
 (15.32469780176283, 66.75406363345678)]