In [65]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [66]:
import numpy as np
import pandas as pd
import ssm
import matplotlib.pyplot as plt
import plotly.express as px
import pickle
import scipy
import scipy.io
import tsfresh
import dtale
import os
px.set_mapbox_access_token("pk.eyJ1IjoibWlrb2xhanNsdXBpbnNraSIsImEiOiJjazJ5dDI0MWEwOTA3M2hxanRwbmo4NDRuIn0.NSbj3B1Bm6fC5QUP3s-BjQ")
import plotly.io as pio
pio.renderers.default = "iframe"
import dask
from lib.huawei import load_data, load_motion, load_ambient, load_battery, load_api, load_label
import tables
from ssm.util import random_rotation, find_permutation
import logging
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
logging.basicConfig(filename='huawei_hmm.log', level=logging.DEBUG)

In [67]:
MODELS_PATH = "../Models/hmm_parallel/"

In [68]:
with open("../Data/huawei-competition/selected_features.pickle", "rb") as ifile:
    selected_features = pickle.load(ifile)

In [69]:
data = []
for selected_feature in selected_features:
#     file_path = os.path.join("../Data/2012.01.17-huawei/splitted/", f"1.3a-stationary_50ms__{selected_feature}.csv")
#     df = pd.read_csv(file_path, parse_dates = ["DATETIME_UTC"], index_col="DATETIME_UTC")
    a = np.load(os.path.join("../", selected_feature))
    data.append(a)
#     print(df.shape)
data = np.stack(data, axis=1)

In [70]:
colnames = [feature.split("/")[-1][15:-4] for feature in selected_features]

In [71]:
transportation_modes = np.array(["still", "walk", "run", "bike", "car", "bus", "train", "subway"])

In [72]:
labels = np.load("../Data/huawei-competition/resampled/Label.npy")[:,-1].astype(int) - 1

In [73]:
order = np.loadtxt("../Data/huawei-competition/train_order.txt").astype(int)

In [74]:
data_ordered = np.zeros(data.shape)

In [75]:
data_ordered[order - 1, :] = data

In [76]:
labels_ordered = np.zeros(labels.shape)
labels_ordered[order - 1] = labels

In [77]:
n = int(data_ordered.shape[0]*0.25)
m = int(data_ordered.shape[0]*0.5)
X_train = data_ordered[m:]
y_train = labels_ordered[m:]
X_val = data_ordered[:n]
y_val = labels_ordered[:n]
X_test = data_ordered[n:m]
y_test = labels_ordered[n:m]

In [78]:
def load_model(path):
    with open(path, "rb") as ifile:
        model_dic = pickle.load(ifile)
    return model_dic

In [79]:
models_dic = {}
for f in os.listdir(MODELS_PATH):
    models_dic[f] = load_model(os.path.join(MODELS_PATH, f))

In [80]:
list(models_dic.keys())[0][:-7]

'huawei_hmm_standard_diagonal_gaussian_10000_10000_random'

In [81]:
def _plot_switches(i, states, data, limit):
    m = data.shape[0]
    if limit is None:
        xs = np.arange(m)
        ys = data[:, i]
        color = transportation_modes[states.astype(int)]
    else:
        lim = np.minimum(limit, m)
        xs = np.arange(lim)
        ys = data[:lim, i]
        color = transportation_modes[states.astype(int)][:lim]
    fig = px.scatter(x= xs, y = ys, color = color, title = f"{i}: {colnames[i]}, limit {limit}")
    return fig

In [82]:
def plot_switches(prefix, model_dic, X, y, limit = None):
    n = int(X.shape[0]*0.25)
    m = int(X.shape[0]*0.5)
    X_train = X[m:]
    y_train = y[m:]
    X_val = X[:n]
    y_val = y[:n]
    X_test = X[n:m]
    y_test = y[n:m]
    model = model_dic["arhmm"]
    lls = model_dic["hmm_lls"]
    most_likely = model_dic["most_likely"]
    for i in range(len(colnames)):
        fig = _plot_switches(i, most_likely, X_train, limit)
    #     fig.show()
        fig.write_image(f"../Plots/hmm/features/{prefix}_{i}_{colnames[i]}_{limit}.png")
    most_likely = model.most_likely_states(X_val)
    for i in range(len(colnames)):
        fig = _plot_switches(i, most_likely, X_val, limit)
    #     fig.show()
        fig.write_image(f"../Plots/hmm/features/{prefix}_val_{i}_{colnames[i]}_{limit}.png")
        

In [83]:
def plot_switches_binary(prefix, model_dic, X, y, label0, label1, limit = None):
    n = int(X.shape[0]*0.25)
    m = int(X.shape[0]*0.5)
    X_train = X[m:]
    y_train = y[m:]
    X_val = X[:n]
    y_val = y[:n]
    X_test = X[n:m]
    y_test = y[n:m]
    model = model_dic["arhmm"]
    lls = model_dic["hmm_lls"]
    most_likely = model_dic["most_likely"]
    indices = np.isin(most_likely, [label0, label1])
    for i in range(len(colnames)):
        fig = _plot_switches(i, most_likely[indices], X_train[indices,:], limit)
    #     fig.show()
        fig.write_image(f"../Plots/hmm/features/{prefix}_{i}_{colnames[i]}_{limit}_{transportation_modes[label0]}_{transportation_modes[label1]}.png")
    most_likely = model.most_likely_states(X_val)
    indices = np.isin(most_likely, [label0, label1])
    for i in range(len(colnames)):
        fig = _plot_switches(i, most_likely[indices], X_val[indices,:], limit)
    #     fig.show()
        fig.write_image(f"../Plots/hmm/features/{prefix}_val_{i}_{colnames[i]}_{limit}_{transportation_modes[label0]}_{transportation_modes[label1]}.png")

In [84]:
# def plot_states(data_z, z_est, label, prefix):
#     titles = ["True", label]
#     states_list = [data_z, z_est]
#     fig, axs = plt.subplots(2,1, figsize=(6,4))
#     for (i, ax, states) in zip(range(len(axs)), axs, states_list):
#         ax.imshow(states[None,:], aspect="auto")
#         ax.set_yticks([])
#         ax.set_title(titles[i])
#         if i < (len(axs) - 1):
#             ax.set_xticks([])

#     plt.suptitle(f"{titles[0]} and {titles[1]}", va="baseline")
#     plt.tight_layout()
#     plt.savefig(f"../Plots/hmm/{prefix}_{titles[0]}_{titles[1]}.pdf")
#     plt.show()

In [86]:
for f_name, model_dic in models_dic.items():
    prefix = f_name[:-7]
    print(prefix)
    plot_switches(prefix, model_dic, data_ordered, labels_ordered)
    plot_switches(prefix, model_dic, data_ordered, labels_ordered, 1000)
    plot_switches(prefix, model_dic, data_ordered, labels_ordered, 6000)

huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_kmeans


In [None]:
for f_name, model_dic in models_dic.items():
    for i in range(len(transportation_modes)):
        for j in range(i+1, len(transportation_modes)):
            prefix = f_name[:-7]
            print(prefix)
            plot_switches_binary(prefix, model_dic, data_ordered, labels_ordered, i, j)
            plot_switches_binary(prefix, model_dic, data_ordered, labels_ordered, i, j, 1000)
            plot_switches_binary(prefix, model_dic, data_ordered, labels_ordered, i, j, 6000)

huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_ga

In [None]:
plot_states(y_train, models[0]["most_likely"], "estimated", 0 )

In [None]:
accuracy_score(y_train, models[1]["most_likely"])

In [None]:
confusion_matrix(y_train, models[1]["most_likely"])

In [None]:
posterior_x = posterior.mean_continuous_states[0]
most_likely = rslds.most_likely_states(posterior_x, X_train)

In [None]:
y_train

In [None]:
# try:
perm = find_permutation(y_train.astype(int), most_likely)
rslds.permute(perm)
# except:
#     logging.error("Prediction exception")

In [None]:
z_est = rslds.most_likely_states(posterior_x, X_train)
plot_states(y_train, z_est, "Predicted", 0)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
accuracy_score(y_train, z_est)

In [None]:
confusion_matrix(y_train, z_est)

In [None]:
print(models[0]['hmm_lls'])

In [None]:
plt.plot(models[1]['hmm_lls'][1000:])

In [None]:
"test"