In [1]:
%load_ext autoreload
%autoreload 2

# Load Data

In [2]:
from pathlib import Path
from opensynth.data_modules.lcl_data_module import LCLDataModule
import pytorch_lightning as pl

import matplotlib.pyplot as plt

data_path = Path("../../data/processed/historical/train/lcl_data.csv")
stats_path = Path("../../data/processed/historical/train/mean_std.csv")
outlier_path = Path("../../data/processed/historical/train/outliers.csv")

dm = LCLDataModule(data_path=data_path, stats_path=stats_path, batch_size=25000, n_samples=50000)
dm.setup()

In [3]:
import torch
from opensynth.models.faraday import FaradayVAE
vae_model = torch.load("vae_model.pt")

  vae_model = torch.load("vae_model.pt")


In [4]:
from opensynth.models.faraday.gaussian_mixture.prepare_gmm_input import encode_data_for_gmm

next_batch = next(iter(dm.train_dataloader()))
input_tensor = encode_data_for_gmm(data=next_batch, vae_module=vae_model)
input_data = input_tensor.detach().numpy()
n_samples = len(input_tensor)

In [24]:
N_COMPONENTS = 50

# Init GMM

In [288]:
from opensynth.models.faraday.new_gmm import gmm_utils

labels_, means_, responsibilities_ = gmm_utils.initialise_centroids(
        X=input_data, n_components=N_COMPONENTS
    )
print(labels_.dtype, responsibilities_.dtype, means_.dtype)
responsibilities_ = responsibilities_.double()
means_ = means_.double()

torch.float64 torch.float64 torch.float64


In [326]:
from opensynth.models.faraday.new_gmm.train_gmm import initialise_gmm_params, training_loop
from opensynth.models.faraday.new_gmm.new_gmm_model import GaussianMixtureModel

gmm_init_params = initialise_gmm_params(
    X=input_data,
    n_components = N_COMPONENTS,
    reg_covar=1e-4,
)
torch_gmm = GaussianMixtureModel(
    num_components=N_COMPONENTS,
    num_features = input_data.shape[1],
    reg_covar=1e-4
)
torch_gmm.initialise(gmm_init_params)

In [327]:
trained_model = training_loop(model=torch_gmm, data=input_tensor.double(), max_iter=10)

100%|██████████| 10/10 [00:02<00:00,  4.58it/s]


In [328]:
trained_model.means[0]

tensor([-44.6996,   9.7845,  27.4608,  25.7868,   3.7500,  23.0785,   6.0063,
         62.9924, -15.2910,  57.9771, -48.0307,  32.3790, -26.4540,  -9.3495,
         -3.1272,  30.9442,   3.6154,   4.0000], dtype=torch.float64,
       grad_fn=<SelectBackward0>)

# SK Learn GMM 1 Epoch

In [330]:
from sklearn.mixture import GaussianMixture
skgmm = GaussianMixture(n_components=N_COMPONENTS, covariance_type='full', max_iter=10, random_state=0, reg_covar=1e-4)
skgmm = skgmm.fit(input_data)
skgmm_pred = skgmm.predict(input_data)



In [331]:
np.round(skgmm.means_[0],4)

array([-44.6996,   9.7845,  27.4608,  25.7868,   3.75  ,  23.0785,
         6.0063,  62.9924, -15.291 ,  57.9771, -48.0307,  32.379 ,
       -26.454 ,  -9.3495,  -3.1272,  30.9442,   3.6154,   4.    ])

In [332]:
trained_model.means[0]

tensor([-44.6996,   9.7845,  27.4608,  25.7868,   3.7500,  23.0785,   6.0063,
         62.9924, -15.2910,  57.9771, -48.0307,  32.3790, -26.4540,  -9.3495,
         -3.1272,  30.9442,   3.6154,   4.0000], dtype=torch.float64,
       grad_fn=<SelectBackward0>)

In [333]:
skgmm.weights_

array([5.20000000e-04, 2.95234078e-02, 2.24275406e-02, 2.13311945e-02,
       2.57137565e-02, 3.09102879e-02, 1.52641088e-01, 1.76296845e-03,
       2.00000000e-04, 5.20000000e-04, 2.15207532e-03, 2.41805456e-02,
       6.39622568e-03, 1.11753034e-02, 4.00000000e-04, 4.00000000e-04,
       2.39475997e-03, 6.37067725e-03, 6.37277244e-02, 4.89965103e-02,
       2.31499730e-02, 3.55521442e-03, 1.39012071e-02, 5.99733962e-02,
       1.18566139e-02, 1.64457649e-03, 1.35874997e-03, 1.50235387e-02,
       7.89798908e-02, 5.80084828e-03, 2.02634295e-02, 6.00000000e-04,
       1.08265738e-02, 8.00000000e-05, 5.73050819e-02, 1.93279274e-02,
       1.24429833e-02, 5.11003527e-02, 2.98709893e-03, 2.75129942e-02,
       3.34490400e-02, 2.40000000e-04, 7.20000000e-04, 1.76446485e-02,
       2.61844449e-02, 4.21163276e-03, 8.93370391e-04, 1.12025250e-03,
       4.43562535e-03, 4.16664688e-02])

In [334]:
trained_model.weights

tensor([5.2000e-04, 3.6866e-02, 1.5985e-02, 2.1693e-02, 2.7932e-02, 3.6130e-02,
        1.3222e-01, 1.7175e-03, 2.0000e-04, 5.2000e-04, 2.2235e-03, 2.4940e-02,
        6.5245e-03, 8.3505e-03, 4.0000e-04, 4.0000e-04, 2.0152e-03, 3.9902e-03,
        5.5802e-02, 4.5161e-02, 1.2055e-02, 4.2699e-03, 9.6797e-03, 1.3082e-01,
        8.0029e-03, 1.3146e-03, 1.1705e-03, 1.1891e-02, 8.3878e-02, 5.3266e-03,
        1.5553e-02, 6.0000e-04, 1.5537e-02, 8.0000e-05, 6.4761e-02, 1.8728e-02,
        7.9390e-03, 4.2152e-02, 1.6587e-03, 2.2138e-02, 4.7755e-02, 2.4000e-04,
        7.2000e-04, 1.6641e-02, 2.0641e-02, 3.5518e-03, 9.5651e-04, 1.1201e-03,
        3.2688e-03, 2.3965e-02], dtype=torch.float64, grad_fn=<DivBackward0>)

In [335]:
skgmm.precisions_cholesky_[0][0]

array([  0.10493682,   0.17882734,   0.30549423,   0.20290598,
         0.4126827 ,   0.23054097,   0.25236835,   0.51472395,
        -2.31794102,   3.01093835,  -8.74960783,   1.13780447,
        66.16276724,   3.66385032,  -5.79759895,   7.43396918,
       -11.58390644, -35.57066572])

In [336]:
trained_model.precision_cholesky[0][0]

tensor([  0.1049,   0.1788,   0.3055,   0.2029,   0.4125,   0.2305,   0.2521,
          0.5148,  -2.3180,   3.0113,  -8.7476,   1.1410,  63.6677, -13.2915,
         -4.4993,   2.4293,  -8.2627, -15.2198], dtype=torch.float64)

In [337]:
skgmm_pred

array([19, 17, 40, ..., 32,  3, 22])

In [338]:
trained_model.predict(input_tensor.double())

tensor([23, 21, 40,  ..., 32,  3, 22])