In [93]:
import numpy as np
import torch
from scipy.stats import multivariate_normal
from OGMM import OGMM, init_cov_and_T_split
from sklearn.mixture import GaussianMixture

In [94]:
dataset = np.load('MGR.npy')
domain_names = ['Noiseless', "buccaneer2", "destroyerengine", "f16", "factory2"]

In [95]:
data_noiseless = dataset[:1000, :]
# feature column 17 is only zeros
data_noiseless = np.delete(data_noiseless, 17, axis=1)

# separation between training and validation data
data_noiseless = np.random.permutation(data_noiseless)
prop_train = 0.8
data_train = data_noiseless[:int(prop_train * len(data_noiseless)), :]
data_test = data_noiseless[int(prop_train * len(data_noiseless)):, :]


# Features of the noiseless data
X_train = data_train[:, :-2]
X_train_t = torch.from_numpy(X_train)
X_test = data_test[:, :-2]
X_test_t = torch.from_numpy(X_test)

# Labels of the noiseless data
y_train = data_train[:, -2]
y_test = data_test[:, -2]

Log likelihood function

In [96]:
GMM_sklearn = GaussianMixture(n_components=10, random_state=0).fit(X_train)

In [97]:
np.sum(GMM_sklearn.score_samples(X_test))

-49368.33201802708

In [98]:
def normal_pdf(x, mean, cov, bib='numpy'):
    if bib == 'numpy':
        return (2*np.pi)**(-len(x)/2) * np.linalg.det(cov)**(-1/2) * np.exp(-1/2 * (x - mean).T @ np.linalg.inv(cov) @ (x - mean))
    else:
        return (2*torch.pi)**(-len(x)/2) * torch.linalg.det(cov)**(-1/2) * torch.exp(-1/2 * (x - mean).T @ torch.inverse(cov) @ (x - mean))

In [99]:
def log_likelihood(mixture, X, bib='numpy'):
    pdf = np.zeros(X.shape[0])
    for i in range(X.shape[0]):
        for gauss in mixture:
            pdf[i] += gauss[0] * normal_pdf(X[i], gauss[2], gauss[3], bib)
        if pdf[i] != 0:
            pdf[i] = np.log(pdf[i])
    return np.sum(pdf)

In [100]:
means = GMM_sklearn.means_
weights = GMM_sklearn.weights_
cov = GMM_sklearn.covariances_
GMM_sklearn_list = [[weights[i], 1, means[i], cov[i]] for i in range(len(weights))]

In [101]:
ll_sklearn = log_likelihood(GMM_sklearn_list, X_test)
ll_sklearn

-45479.13859892194

In [102]:
range_data = np.mean(np.max(X_train, axis=0) - np.min(X_train, axis=0))

K_max = 10
cov_init, T_split = init_cov_and_T_split(K_max, range_data, X_train.shape[1])
GMM_online = OGMM(X_train_t, K_max, cov_init, 0.1, T_split)

  r = r.astype(result_t, copy=False)


In [103]:
GMM_online

[[tensor(0.1508),
  tensor(87.),
  tensor([ 3.9402e-01,  8.6990e-02,  1.3310e-01,  3.3272e-03,  2.2514e+03,
           4.7373e+05,  2.3629e+03,  1.5258e+05,  4.7577e+03,  1.9866e+06,
           9.8355e-02,  2.9469e-03,  5.0388e-06,  1.1032e-02, -1.5307e-04,
           6.1092e-03,  1.0693e+02, -1.2430e+02,  3.7505e+03,  9.7021e+01,
           7.0989e+02, -5.7617e+00,  4.5066e+02,  3.0709e+01,  2.3840e+02,
           1.4573e+00,  1.9673e+02,  1.2610e+01,  1.3901e+02, -4.6771e+00,
           1.2801e+02,  8.7704e+00,  8.7156e+01, -6.3522e+00,  8.8759e+01,
           6.5937e+00,  8.4416e+01, -5.7156e+00,  7.2533e+01,  3.7058e+00,
           5.9212e+01, -4.7520e+00,  6.2013e+01,  1.1632e+00,  5.4355e+01,
          -4.4549e+00,  5.6226e+01, -1.2802e+00,  5.6235e+01, -3.7215e+00,
           5.9137e+01,  6.1333e-01,  5.7111e+01, -1.3020e+00,  5.9918e+01,
          -8.9456e-01,  5.8473e+01], dtype=torch.float64),
  tensor([[ 1.9991e-02, -1.2793e-04,  1.4975e-03,  ..., -1.1405e+00,
            1.

In [104]:
ll_online = log_likelihood(GMM_online, X_test_t, 'torch')
ll_online

-64765.36445507992

In [105]:
(ll_online - ll_sklearn)/ll_sklearn

0.424067527449061