# Pen-and-paper

In [1]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_samples

### Data

In [2]:
X = pd.DataFrame(np.array([[1, 2], [-1, 1], [1, 0]]))
mean = [[2,2],[0,0]]
cov = [[[2,1],[1,2]],[[2,0],[0,2]]]
coef = [0.5,0.5]

## E-step

In [3]:
def expansion(mean, cov, coef, print_gaussian=False):
    results = np.zeros((3, 2))
    for i, row in X.iterrows():
        for j in range(2):
            var = multivariate_normal(mean=mean[j], cov=cov[j])
            results[i][j] = var.pdf([row[0], row[1]])
    if print_gaussian:
        print(results)
    
    weights = np.zeros((3, 2))
    for i in range(3):
        sum = coef[0] * results[i][0] + coef[1] * results[i][1]
        for j in range(2):
            weights[i][j] = results[i][j] * coef[j] / sum
    
    return weights

In [4]:
weights = expansion(mean, cov, coef, True)
print(weights)

[[0.06584074 0.02279933]
 [0.00891057 0.04826618]
 [0.03380376 0.061975  ]]
[[0.74278756 0.25721244]
 [0.15584262 0.84415738]
 [0.35293589 0.64706411]]


## M-step

### N

In [5]:
N = weights.sum(axis=0)

print(N)

[1.25156606 1.74843394]


### ùúá

In [6]:
ùúá = np.matmul(weights.transpose(), X.values) / N

print(ùúá)

[[0.75096381 0.93879311]
 [0.04803516 0.77702808]]


### Œ£

In [7]:
Œ£ = np.zeros((2, 2, 2))

for i in range(2):
    sum = 0
    for j, row in X.iterrows():
        dif = np.array([[row[0] - ùúá[i][0]],[row[1] - ùúá[i][1]]])
        sum += weights[j][i] * np.matmul(dif, dif.transpose())
    sum *= 1 / N[i]
    Œ£[i] = sum

print(Œ£)

[[[ 0.43605335  0.07757255]
  [ 0.07757255  0.91735899]]

 [[ 0.99900404 -0.21530512]
  [-0.21530512  0.46747582]]]


### ùúã

In [8]:
ùúã = N / 3

print(ùúã)

[0.41718869 0.58281131]


## Hard assignment

In [9]:
print(expansion(ùúá, Œ£, ùúã))

[[0.87330603 0.12669397]
 [0.0340858  0.9659142 ]
 [0.48359766 0.51640234]]


## Silhouette score

In [20]:
silhouettes = silhouette_samples(X, [0, 1, 1])
print('Silhouette 2:', silhouettes[1])
print('Silhouette 3:', round(silhouettes[2], 4))
silhouette_c = (silhouettes[1] + silhouettes[2]) / 2
print('Silhouette C:', round(silhouette_c, 4))

Silhouette 2: 0.0
Silhouette 3: -0.1056
Silhouette C: -0.0528
