In [1]:
%load_ext autoreload
%autoreload 2

# Load Data

In [2]:
from pathlib import Path
from opensynth.data_modules.lcl_data_module import LCLDataModule
import pytorch_lightning as pl

import matplotlib.pyplot as plt

data_path = Path("../../data/processed/historical/train/lcl_data.csv")
stats_path = Path("../../data/processed/historical/train/mean_std.csv")
outlier_path = Path("../../data/processed/historical/train/outliers.csv")

dm = LCLDataModule(data_path=data_path, stats_path=stats_path, batch_size=25000, n_samples=50000)
dm.setup()

In [3]:
import torch
from opensynth.models.faraday import FaradayVAE
vae_model = torch.load("vae_model.pt")

  vae_model = torch.load("vae_model.pt")


In [4]:
from opensynth.models.faraday.gaussian_mixture.prepare_gmm_input import encode_data_for_gmm

next_batch = next(iter(dm.train_dataloader()))
input_tensor = encode_data_for_gmm(data=next_batch, vae_module=vae_model)
input_data = input_tensor.detach().numpy()
n_samples = len(input_tensor)

In [5]:
N_COMPONENTS = 250
REG_COVAR = 1e-4
EPOCHS = 10

# Init GMM

In [6]:
from opensynth.models.faraday.new_gmm import gmm_utils

labels_, means_, responsibilities_ = gmm_utils.initialise_centroids(
        X=input_data, n_components=N_COMPONENTS
    )
print(labels_.dtype, responsibilities_.dtype, means_.dtype)
responsibilities_ = responsibilities_.double()
means_ = means_.double()

torch.float64 torch.float64 torch.float64


In [7]:
from opensynth.models.faraday.new_gmm.train_gmm import initialise_gmm_params, training_loop
from opensynth.models.faraday.new_gmm.new_gmm_model import GaussianMixtureModel


gmm_init_params = initialise_gmm_params(
    X=input_data,
    n_components = N_COMPONENTS,
    reg_covar=REG_COVAR,
)
torch_gmm = GaussianMixtureModel(
    num_components=N_COMPONENTS,
    num_features = input_data.shape[1],
    reg_covar=REG_COVAR
)
torch_gmm.initialise(gmm_init_params)
trained_model = training_loop(model=torch_gmm, data=input_tensor.double(), max_iter=EPOCHS)

100%|██████████| 10/10 [00:12<00:00,  1.25s/it]


In [9]:
trained_model.means[0]

tensor([-36.6833,  -0.3893,  30.1310,  21.9777,   8.5242,  18.1993,  -0.5867,
         64.1840, -21.1583,  49.8895, -43.8030,  30.8422, -30.8649,  -5.9629,
         -5.2127,  28.9076,   2.0000,   4.3333], dtype=torch.float64,
       grad_fn=<SelectBackward0>)

In [10]:
gmm_init_params["means"][0]

tensor([-36.6833,  -0.3893,  30.1310,  21.9777,   8.5242,  18.1993,  -0.5867,
         64.1840, -21.1583,  49.8895, -43.8030,  30.8422, -30.8649,  -5.9629,
         -5.2127,  28.9076,   2.0000,   4.3333], dtype=torch.float64)

# SK Learn GMM 1 Epoch

In [11]:
from sklearn.mixture import GaussianMixture

init_weights = gmm_init_params["weights"].detach().numpy()
init_means = gmm_init_params["means"].detach().numpy()

skgmm = GaussianMixture(n_components=N_COMPONENTS, covariance_type='full', max_iter=EPOCHS, random_state=0)
skgmm.fit(input_data)
skgmm_pred = skgmm.predict(input_data)



In [12]:
skgmm.means_[0]

array([-36.68334929,  -0.38932436,  30.13103358,  21.97765541,
         8.52422716,  18.19925419,  -0.58672021,  64.18397903,
       -21.15834347,  49.88953972, -43.803044  ,  30.84217676,
       -30.86488851,  -5.9629341 ,  -5.21265117,  28.90758769,
         2.        ,   4.33333333])

In [13]:
trained_model.means[0]

tensor([-36.6833,  -0.3893,  30.1310,  21.9777,   8.5242,  18.1993,  -0.5867,
         64.1840, -21.1583,  49.8895, -43.8030,  30.8422, -30.8649,  -5.9629,
         -5.2127,  28.9076,   2.0000,   4.3333], dtype=torch.float64,
       grad_fn=<SelectBackward0>)

In [15]:
skgmm.weights_[:10]

array([0.00024   , 0.01170063, 0.008613  , 0.00116115, 0.0030314 ,
       0.01001826, 0.00012   , 0.00349853, 0.00068   , 0.00016   ])

In [16]:
trained_model.weights[:10]

tensor([0.0002, 0.0104, 0.0054, 0.0012, 0.0029, 0.0095, 0.0001, 0.0053, 0.0007,
        0.0002], dtype=torch.float64, grad_fn=<SliceBackward0>)

In [37]:
skgmm.precisions_cholesky_[0][0][:10]

array([ 2.52977988e-01,  3.30976580e-01, -6.85523046e-03,  4.51098180e-01,
        1.08074007e+00,  1.86940425e+02,  3.61819347e+02,  4.59106724e+02,
       -1.37150187e+02,  3.64090669e+02])

In [38]:
trained_model.precision_cholesky[0][0][:10]

tensor([ 2.5298e-01,  3.3096e-01, -6.8877e-03,  4.5079e-01,  1.0764e+00,
         1.5836e+01,  1.0149e+01,  2.7259e+01, -1.5315e+01,  1.9870e+01],
       dtype=torch.float64)