In [45]:
import json
import  pickle
import joblib
import optuna
import os
import time
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy  as np
import pandas as pd

from sklearn.cluster import KMeans
from eval_utils import *
from models_gaussian import GaussianDenseHMM, HMMLoggingMonitor, DenseHMMLoggingMonitor

In [5]:
t = time.localtime()
RESULT_DIR = f'gaussian_dense_hmm_benchmark/eval-cooc-{t.tm_year}-{t.tm_mon}-{t.tm_mday}'

We have examined the efficiency of co-occurence based learning methods for Gaussian HMM. This notebook contains a comparison of:

- standard Gaussian Hidden Markov Model implementation,
- co-occurrence based larning for Gaussian Hidden Markov Model,
- EM learning for GaussianDenseHHMM,
- co-occurrence based learning for GaussianDenseHMM.

The dense representation was examined with fixed and tuned embedding length.

In [49]:
n = 8
s = 100
T = 100
simple_model = True

In [50]:
# Provide data
all_files = os.listdir(RESULT_DIR)

results = [s for s in all_files if len(s.split("_")) == 6]
studies = [s for s in all_files if len(s.split("_")) == 7]

cases = list(set(["_".join(s.split("_")[1:-1]) for s in studies]))

In [15]:
# read all data
experiment = dict(studies  =dict(l_fixed=dict(),  l_tuned=dict()),
                  summaries=dict(l_fixed=dict(),  l_tuned=dict()))

In [None]:
for c in cases:
    with open(f"{RESULT_DIR}/optuna_{c}_lTrue.pkl", "rb") as f:
        experiment["studies"]["l_fixed"][c] = joblib.load(f)
    with open(f"{RESULT_DIR}/optuna_{c}_lFalse.pkl", "rb") as f:
        experiment["studies"]["l_tuned"][c] = joblib.load(f)
    with open(f"{RESULT_DIR}/best_result_{c}_lTrue.json", "r") as f:
        experiment["summaries"]["l_fixed"][c] = json.load(f)
    with open(f"{RESULT_DIR}/best_result_{c}_lFalse.json", "r") as f:
        experiment["summaries"]["l_tuned"][c] = json.load(f)

## Compare tunning methods

In [16]:
# Variable importance
for c in cases:
    display(optuna.visualization.plot_param_importances(experiment["studies"]["l_fixed"][c],
                                                        target=lambda t: t.values[0], target_name="flops",  title="Fixed"))
    display(optuna.visualization.plot_param_importances(experiment["studies"]["l_tuned"][c],
                                                        target=lambda t: t.values[0], target_name="flops", title="Tuned"))

In [40]:
# l zafixowane versus wyniki hiperparametryzacji zależnie od l
for c in cases:
    no_trials = len(experiment["studies"]["l_fixed"][c].trials)
    x = [experiment["studies"]["l_fixed"][c].trials[i].params['l_param'] for i in range(no_trials)]
    y = [experiment["studies"]["l_fixed"][c].trials[i].values[0] for i in range(no_trials)]
    name = cases.split("_")[0]
    sns.violinplot(x, y)
    plt.xlabel("l")
    plt.ylabel("log-likelihood")
    plt.title(f"{name} (when fixed l={int(np.ceil(8 / 3))})")
    plt.show()

In [17]:
# s, T, n, simple_model = 100, 40, 4, True
#
# with open(f"gaussian_dense_hmm_benchmark/fit_coocs_ll_mini-2022-8-12/optuna_s{s}_T{T}_n{n}_simple_model{simple_model}.pkl",  "rb") as f:
#     study = joblib.load(f)

## Compare  results of all methods

In [46]:
# with open(f"gaussian_dense_hmm_benchmark/fit_coocs_ll_mini-2022-8-12/best_result_s{s}_T{T}_n{n}_simple_model{simple_model}.json",  "r") as f:
#     best_results = json.load(f)

In [48]:
#  tabularise results
for c in cases:
    display(f'\n\n### {" ".join(c.split("_"))}')
    display(pd.DataFrame(experiment["summaries"]["l_fixed"][c]).style.set_caption("l fixed"))
    display(pd.DataFrame(experiment["summaries"]["l_tuned"][c]).style.set_caption("l tuned"))


## Train co-oc + EM

In [3]:
# Use the same parameters and provide time benchmarks

In [None]:
s, T, n, pi, A, mu, sigma, result, true_values, wandb_params, X_true, Y_true, lengths, data, em_scheduler = init_experiment(dsize=(100, 100, 8), simple_model=True)


def to_discrete(X, m):
    kmeans = KMeans(n_clusters=m, random_state=0).fit(Y_true)
    nodes_tmp = np.sort(kmeans.cluster_centers_, axis=0)
    nodes = np.concatenate([(nodes_tmp[1:] + nodes_tmp[:-1]) / 2, np.array([[np.infty]])])
    return (X > nodes.reshape(1, -1)).sum(axis=-1).reshape(-1, 1), nodes.reshape(-1)


Y_disc, nodes = to_discrete(Y_true, m=n)

In [None]:
for  _  in range(10):
    # TODO: read parameters from dense cooc and dense em
    cooc_params = experiment["studies"]["l_fixed"][c].best_params  # TODO: c
    em_params   = experiment["studies"]["l_fixed"][c].best_params  # TODO: c
    mstep_cofig = {"cooc_lr": cooc_params['cooc_lr_param'],
                   "cooc_epochs": cooc_params['cooc_epochs_param'],
                   "l_uz": cooc_params['l_param'],
                   "em_params":  em_params['cooc_lr_param'],
                   "em_epochs": em_params['cooc_epochs_param'],
                   "loss_type": 'square',
                   "scheduler": em_scheduler}
    wandb_params["init"].update({"name": f"dense-cooc+em"})
    hmm_monitor = HMMLoggingMonitor(tol=TOLERANCE, n_iter=0, verbose=True,
                                    wandb_log=True, wandb_params=wandb_params, true_vals=true_values,
                                    log_config={'metrics_after_convergence': True})
    densehmm = GaussianDenseHMM(n, mstep_config=mstep_cofig,
                                covariance_type='diag', opt_schemes={"cooc"},
                                nodes=np.concatenate([np.array([-np.infty]), nodes]),
                                discrete_observables=n, em_iter=20,
                                logging_monitor=hmm_monitor,
                                init_params="", params="stmc", early_stopping=True)
    densehmm.means_ = mu.reshape(-1, 1)
    start = time.perf_counter()
    densehmm.fit_coocs(Y_true, lengths)
    densehmm.fit(Y_true, lengths)
    time_tmp = time.perf_counter() - start

#  Results in wanbd