# Variational model


In [6]:
from plot import training_log

training_log(version=[8, 11])

In [1]:
import os

import torch
import yaml
from models.lit_model import LitModel
from numpy import random
from data.dataset import MCSims
from plot.plot import plot_field_xy_from_tensor
import holoviews as hv

hv.extension("bokeh")
device = "cpu"

version = 11
path = f"lightning_logs/version_{version}/"

with open(os.path.join(path, "hparams.yaml"), "r") as f:
    config = yaml.safe_load(f)

litmodel = LitModel(
    model_type=config["model_type"],
    config=config["config"],
    learning_rate=config["learning_rate"],
)

litmodel.model.load_state_dict(torch.load(os.path.join(path, "model.pth")))

dataset = MCSims(augment=False, preprocess=False)
n = 1071

original = dataset[n].float()  # Convert to float
model = litmodel.model
model.eval()
reconstruct = model(original.unsqueeze(0).to(device))[0].squeeze(0).detach().cpu()

plot_field_xy_from_tensor(original) + plot_field_xy_from_tensor(reconstruct)


BokehModel(combine_events=True, render_bundle={'docs_json': {'ac5af75f-1a2f-4680-8a71-ef4d280690a1': {'version…

In [1]:

import torch
import hdbscan
#from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
from plot.plot import plot_H_vs_T_with_hover

# Encode the dataset
encoded_data = torch.load("lightning_logs/version_11/encoded_data.pth").cpu().numpy()

# labels = hdbscan.HDBSCAN(min_cluster_size=15, cluster_selection_epsilon=0.06).fit_predict(encoded_data)
labels = GaussianMixture(n_components=6, covariance_type='diag').fit_predict(encoded_data)
# labels = BayesianGaussianMixture(n_components=6, covariance_type='spherical').fit_predict(encoded_data)
plot_H_vs_T_with_hover(labels=labels)

FigureWidget({
    'data': [{'customdata': {'bdata': ('AAABAAIAAwAEAAUABgAHAAgACQAKAA' ... 'nrCewJ7QnuCe8J8AnxCfIJ8wn0CfUJ'),
                             'dtype': 'i2'},
              'hovertemplate': 'T: %{x}<br>H: %{y}<extra></extra>',
              'marker': {'color': '#2E91E5', 'size': 10},
              'mode': 'markers',
              'name': 'Cluster 1',
              'type': 'scatter',
              'uid': '6142fd5b-7c1b-4891-af10-880f6a6b670c',
              'x': {'bdata': ('AAAAAADQoUAAAAAAANChQAAAAAAA0K' ... 'AAAAAYpUAAAAAAABilQAAAAAAAGKVA'),
                    'dtype': 'f8'},
              'y': {'bdata': ('AAAAAAAAAADEVz1FvxW/QMZXPUW/Fc' ... 'Huc09QF0Ey9wJxpswXQZLsF279SBhB'),
                    'dtype': 'f8'}},
             {'customdata': {'bdata': ('GgAbABwAHQAeAB8AIAAhACIAIwAkAC' ... 'm4CbkJugm7CbwJvQm+Cb8JwAnBCcIJ'),
                             'dtype': 'i2'},
              'hovertemplate': 'T: %{x}<br>H: %{y}<extra></extra>',
              'marker': {'color': '#E15F

In [5]:
import pandas as pd
import torch
from sklearn.mixture import GaussianMixture

# measure the Adjusted Rand Index for a subset of the data
from sklearn.metrics import adjusted_rand_score
import numpy as np
from sklearn.metrics.cluster import contingency_matrix

true_df = pd.read_csv("data/dataset.csv")

true_labels = true_df["label"].values

# make a list of predicted labels which are oniy the labels in of index id from true_labels

predicted_labels = [labels[10 * i] for i in range(261)]


# calculate the Adjusted Rand Index
ari = adjusted_rand_score(true_labels, predicted_labels)
print(f"Adjusted Rand Index: {ari:.4f}")

def purity_score(y_true, y_pred):
    matrix = contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(matrix, axis=0)) / np.sum(matrix)
purity = purity_score(true_labels, predicted_labels)
print(f"Purity Score: {purity:.4f}")


Adjusted Rand Index: 0.7415
Purity Score: 0.9195
