# Gaussian Mixture Models

In [1]:
from sklearn.datasets import make_moons
from sklearn.mixture import GaussianMixture

In [2]:
X, y = make_moons(n_samples=1000, noise= 0.05)

In [3]:
X.shape

(1000, 2)

In [4]:
gm = GaussianMixture(n_components=3, n_init=10)
gm.fit(X)

GaussianMixture(covariance_type='full', init_params='kmeans', max_iter=100,
                means_init=None, n_components=3, n_init=10,
                precisions_init=None, random_state=None, reg_covar=1e-06,
                tol=0.001, verbose=0, verbose_interval=10, warm_start=False,
                weights_init=None)

In [5]:
# Expectation Maximization

gm.weights_

array([0.20483823, 0.20860144, 0.58656033])

In [6]:
gm.means_

array([[-0.74651296,  0.5584375 ],
       [ 1.73966713, -0.07172641],
       [ 0.49559662,  0.25453085]])

In [7]:
gm.covariances_

array([[[ 0.05235359,  0.06228406],
        [ 0.06228406,  0.08855114]],

       [[ 0.05624059,  0.0632219 ],
        [ 0.0632219 ,  0.08599995]],

       [[ 0.16472065, -0.09815285],
        [-0.09815285,  0.28887593]]])

In [8]:
gm.converged_

True

In [9]:
gm.n_iter_

13

In [10]:
gm.predict(X)

array([1, 0, 0, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2,
       2, 2, 0, 2, 2, 1, 2, 0, 2, 1, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 1, 2,
       1, 2, 2, 2, 0, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 1, 2, 0, 1,
       2, 1, 0, 1, 2, 2, 0, 2, 2, 1, 2, 2, 0, 2, 1, 0, 0, 0, 2, 2, 1, 2,
       2, 2, 1, 2, 2, 2, 0, 1, 0, 1, 2, 2, 2, 2, 0, 0, 2, 2, 1, 2, 2, 2,
       0, 2, 1, 0, 2, 2, 2, 2, 2, 2, 0, 1, 1, 2, 2, 1, 0, 0, 2, 1, 2, 2,
       2, 1, 1, 2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 1, 2, 1, 1, 0, 2, 0, 2,
       0, 1, 0, 0, 2, 2, 0, 2, 2, 0, 0, 2, 2, 0, 1, 0, 0, 1, 2, 0, 2, 2,
       0, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 1, 2, 2, 2, 0,
       2, 2, 2, 0, 2, 2, 2, 0, 1, 2, 2, 2, 2, 2, 0, 1, 2, 0, 2, 1, 0, 2,
       2, 2, 0, 0, 0, 2, 1, 0, 0, 1, 2, 1, 2, 0, 0, 2, 2, 1, 1, 2, 2, 2,
       1, 0, 1, 2, 0, 1, 2, 2, 2, 2, 1, 0, 2, 2, 1, 0, 1, 1, 2, 2, 1, 2,
       2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2,
       0, 0, 2, 1, 2, 2, 2, 1, 2, 0, 2, 2, 1, 2, 1,

In [11]:
gm.predict_proba(X)

array([[7.34075006e-240, 9.98334133e-001, 1.66586691e-003],
       [7.40433036e-001, 7.15319053e-174, 2.59566964e-001],
       [9.50585980e-001, 3.40289467e-195, 4.94140203e-002],
       ...,
       [9.87626464e-001, 2.19001141e-212, 1.23735356e-002],
       [8.23429175e-025, 9.74740612e-089, 1.00000000e+000],
       [5.16663729e-094, 2.96281749e-023, 1.00000000e+000]])

In [12]:
y

array([1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,

In [13]:
X_new, y_new = gm.sample(6)

In [14]:
X_new

array([[-0.27566438,  1.16692679],
       [ 1.54156763, -0.31411112],
       [ 1.23454954, -0.73875722],
       [ 0.37252093,  0.35478856],
       [ 0.25257757,  0.09515037],
       [ 0.40199027,  0.70942219]])

In [15]:
y_new

array([0, 1, 1, 2, 2, 2])

In [16]:
gm.score_samples(X)

array([-7.88959867e-01, -1.74903111e+00, -5.64689021e-01, -1.32053627e+00,
       -1.62952814e+00, -2.13989810e+00, -1.28747275e+00, -1.39107086e+00,
       -1.77901951e+00, -1.04311539e+00, -1.78002333e+00,  3.45774043e-02,
       -1.76864133e+00, -2.69645679e-01, -1.38387487e+00, -1.54746692e+00,
       -1.87415167e+00, -2.21679180e+00, -4.93587746e-02, -2.25930734e+00,
       -1.68716934e+00, -1.27129162e+00, -1.70204607e+00, -1.61976210e+00,
       -5.25077711e-01, -1.51878140e+00, -1.65969765e+00, -1.81426240e+00,
       -2.29338239e+00, -1.09517735e+00, -1.68912829e+00, -1.45535809e+00,
       -1.37160724e+00, -1.96592094e+00, -1.51973760e+00, -2.62683657e-01,
       -1.30356029e+00,  8.45709294e-02, -2.03878145e+00, -1.90290533e+00,
       -1.54232326e+00, -5.63631987e-01, -3.99819058e-01, -1.79577556e+00,
       -5.62789169e-01, -2.07166564e+00, -1.68506264e+00, -1.77702624e+00,
       -2.04941024e-01, -1.30674374e+00, -2.64078417e-01, -1.49852203e+00,
       -5.99641963e+00,  

In [17]:
import numpy as np

densities = gm.score_samples(X)
density_threshold = np.percentile(densities, 4) # 4% percentile 
anomalies = X[densities < density_threshold]

In [18]:
anomalies

array([[ 1.17819152, -0.42172296],
       [-0.24157349,  0.94226506],
       [ 1.87710676,  0.50652354],
       [-1.02865028, -0.01195397],
       [-0.91898703,  0.071734  ],
       [ 1.22735846, -0.42170073],
       [ 1.18682935, -0.45402597],
       [ 1.9658843 ,  0.43281098],
       [-0.89673877,  0.08773921],
       [ 1.16746036, -0.45716629],
       [-0.19076709,  0.98478729],
       [-0.94516545,  0.02931951],
       [-0.9732092 , -0.03256418],
       [-0.21709222,  0.94839724],
       [-1.0078327 ,  0.01265301],
       [ 1.99177564,  0.50859232],
       [-0.1692408 ,  0.96794953],
       [-0.1929176 ,  0.98244497],
       [ 1.84631534,  0.4300941 ],
       [ 1.99165559,  0.47039984],
       [-0.26055484,  0.89613844],
       [-0.93305769, -0.01194031],
       [-0.1646824 ,  1.1057804 ],
       [ 1.96557319,  0.46741667],
       [-0.19394178,  1.02191761],
       [-1.03835112, -0.06019431],
       [ 1.94354935,  0.43308753],
       [ 1.26150663, -0.38790968],
       [-0.26826512,

In [20]:
gm.bic(X)

2733.964344124223

In [21]:
gm.aic(X)

2650.5325043815265

In [22]:
# Bayesian Gaussian Mixture Models

from sklearn.mixture import BayesianGaussianMixture

In [23]:
bgm = BayesianGaussianMixture(n_components=10, n_init = 10)
bgm.fit(X)

np.round(bgm.weights_, 2)

array([0.2 , 0.14, 0.16, 0.  , 0.14, 0.12, 0.14, 0.  , 0.1 , 0.  ])

In [24]:
bgm = BayesianGaussianMixture(n_components=3, n_init = 10)
bgm.fit(X)

np.round(bgm.weights_, 2)

array([0.53, 0.23, 0.24])