In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
import numpy as np
import pandas as pd
import ssm
import matplotlib.pyplot as plt
import plotly.express as px
import pickle
import scipy
import scipy.io
import tsfresh
import dtale
import os
px.set_mapbox_access_token("pk.eyJ1IjoibWlrb2xhanNsdXBpbnNraSIsImEiOiJjazJ5dDI0MWEwOTA3M2hxanRwbmo4NDRuIn0.NSbj3B1Bm6fC5QUP3s-BjQ")
import plotly.io as pio
pio.renderers.default = "iframe"
import dask
from lib.huawei import load_data, load_motion, load_ambient, load_battery, load_api, load_label
import tables
from ssm.util import random_rotation, find_permutation
import logging
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.feature_selection import mutual_info_classif
logging.basicConfig(filename='huawei_hmm.log', level=logging.DEBUG)

In [17]:
MODELS_PATH = "../Models/hmm/"

In [18]:
with open("../Data/huawei-competition/selected_features.pickle", "rb") as ifile:
    selected_features = pickle.load(ifile)

In [42]:
transportation_modes = np.array(["still", "walk", "run", "bike", "car", "bus", "train", "subway"])

In [19]:
data = []
for selected_feature in selected_features:
#     file_path = os.path.join("../Data/2012.01.17-huawei/splitted/", f"1.3a-stationary_50ms__{selected_feature}.csv")
#     df = pd.read_csv(file_path, parse_dates = ["DATETIME_UTC"], index_col="DATETIME_UTC")
    a = np.load(os.path.join("../", selected_feature))
    data.append(a)
#     print(df.shape)
data = np.stack(data, axis=1)

In [20]:
labels = np.load("../Data/huawei-competition/resampled/Label.npy")[:,-1].astype(int) - 1

In [21]:
order = np.loadtxt("../Data/huawei-competition/train_order.txt").astype(int)

In [22]:
data_ordered = np.zeros(data.shape)

In [23]:
data_ordered[order - 1, :] = data

In [24]:
labels_ordered = np.zeros(labels.shape)
labels_ordered[order - 1] = labels

In [25]:
n = int(data_ordered.shape[0]*0.25)
m = int(data_ordered.shape[0]*0.5)
X_train = data_ordered[m:]
y_train = labels_ordered[m:]
X_val = data_ordered[:n]
y_val = labels_ordered[:n]
X_test = data_ordered[n:m]
y_test = labels_ordered[n:m]

In [26]:
def load_model(path):
    with open(path, "rb") as ifile:
        model_dic = pickle.load(ifile)
    return model_dic

In [27]:
models_dic = {}
for f in os.listdir(MODELS_PATH):
    models_dic[f] = load_model(os.path.join(MODELS_PATH, f))

In [28]:
for f in os.listdir("../Models/hmm_parallel/"):
    models_dic[f] = load_model(os.path.join("../Models/hmm_parallel/", f))

In [29]:
np.unique(y_train, return_counts=True)

(array([0., 1., 2., 3., 4., 5., 6., 7.]),
 array([1176,  949,  338, 1110, 1195,  993, 1455,  939]))

In [30]:
def plot_states(data_z, z_est, label, prefix):
    titles = ["True", label]
    states_list = [data_z, z_est]
    fig, axs = plt.subplots(2,1, figsize=(6,4))
    for (i, ax, states) in zip(range(len(axs)), axs, states_list):
        ax.imshow(states[None,:], aspect="auto")
        ax.set_yticks([])
        ax.set_title(titles[i])
        if i < (len(axs) - 1):
            ax.set_xticks([])

    plt.suptitle(f"{titles[0]} and {titles[1]}", va="baseline")
    plt.tight_layout()
    plt.savefig(f"../Plots/hmm/{prefix}_{titles[0]}_{titles[1]}.pdf")
    plt.show()

In [34]:
mutual_informations = {}

In [35]:
for f_name, model_dic in models_dic.items():
    prefix = f_name[:-7]
    print(prefix)
    model = model_dic["arhmm"]
    lls = model_dic["hmm_lls"]
    most_likely = model_dic["most_likely"]
    mutual_info = mutual_info_classif(X_train, most_likely)
    mutual_informations[f_name+"_validation"] = mutual_info
#     plt.plot(lls)
#     plt.show()
#     l = len(lls) // 10
#     plt.plot(lls[l:])
#     plt.show()
#     plot_states(y_train, most_likely, "Estimated", prefix)
#     print(accuracy_score(y_train, most_likely))
#     disp = ConfusionMatrixDisplay(confusion_matrix(y_train, most_likely))
#     disp.plot()
#     plt.show()
#     y_predicted = model.most_likely_states(X_val)
#     plot_states(y_val, y_predicted, "Predicted", prefix)
#     print(accuracy_score(y_val, y_predicted))
#     disp = ConfusionMatrixDisplay(confusion_matrix(y_val, y_predicted))
#     disp.plot()
#     plt.show()

huawei_hmm_recurrent_diagonal_gaussian_10_10000_random
huawei_hmm_recurrent_diagonal_gaussian_1000_10000_random
huawei_hmm_recurrent_diagonal_gaussian_1000_10000_kmeans
huawei_hmm_recurrent_diagonal_gaussian_10000_10000_random
huawei_hmm_recurrent_diagonal_gaussian_10000_10000_kmeans
huawei_hmm_recurrent_diagonal_gaussian_100000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_hmm_standard_diagonal_gaussian_10000_10000_kmeans
huawei_reduced_hmm_recurrent_diagonal_gaussian_10000_10000_random
huawei_reduced_hmm_recurrent_gaussian_10000_10000_random
huawei_reduced_hmm_recurrent_no_input_ar_10000_10000_random
huawei_reduced_hmm_recurrent_robust_ar_10000_10000_random
huawei_reduced_hmm_recurrent_diagonal_gaussian_10000_10000_kmeans
huawei_reduced_hmm_recurrent_gaussian_10000_10000_kmeans
huawei_reduced_hmm_standard_diagonal_gaussian_10000_10000_random
huawei_reduced_hmm_standard_diagonal_gaussian_10000_10000_kmeans
huawei_reduced_hmm_standard_autoregressive_10000

In [47]:
feature_names = []
for i, feature in enumerate(selected_features):
    feature_name = feature.split("-")[-1].split(".")[0]
    feature_names.append(f"{i}_{feature_name}")

In [48]:
feature_names

['0_LAcc_magnitude_q2',
 '1_Acc_magnitude_hist_0',
 '2_Acc_magnitude_max_signal1',
 '3_LAcc_y_q1',
 '4_LAcc_y_hist_0',
 '5_Acc_y_Acc_magnitude_cov',
 '6_LAcc_y_hist_9',
 '7_Acc_magnitude_hist_9',
 '8_Acc_magnitude_entropy',
 '9_Acc_magnitude_Acc_derotated_z_cov',
 '10_LAcc_z_hist_0',
 '11_LAcc_x_hist_0',
 '12_Acc_y_hist_9',
 '13_LAcc_x_max_signal1',
 '14_Acc_magnitude_Gyr_magnitude_cov',
 '15_Acc_y_LAcc_y_cov',
 '16_Acc_magnitude_LAcc_magnitude_cov',
 '17_Acc_magnitude_hist_1',
 '18_Acc_derotated_z_max_signal2',
 '19_LAcc_z_hist_9',
 '20_LAcc_y_Acc_magnitude_cov',
 '21_Acc_z_Acc_magnitude_cov',
 '22_Acc_magnitude_Gra_magnitude_cov',
 '23_LAcc_x_hist_9',
 '24_Acc_magnitude_hist_2',
 '25_Gyr_y_hist_0',
 '26_LAcc_z_Acc_magnitude_cov',
 '27_LAcc_magnitude_hist_9',
 '28_Gyr_x_Acc_magnitude_cov',
 '29_LAcc_z_pitch_cov',
 '30_Acc_z_LAcc_z_cov',
 '31_LAcc_y_Gyr_magnitude_cov',
 '32_Gyr_y_max_signal1',
 '33_LAcc_z_Acc_y_cov',
 '34_Gyr_y_LAcc_y_cov',
 '35_LAcc_x_Acc_y_cov',
 '36_LAcc_y_Acc_derot

In [53]:
for key, mi in mutual_informations.items():
    print(key)
    print("="*100)
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        print(pd.Series(data = mi, index = feature_names).sort_values(ascending=False))
    print("-"*100)
    print("")

huawei_hmm_recurrent_diagonal_gaussian_10_10000_random.pickle_validation
11_LAcc_x_hist_0                        1.133813
10_LAcc_z_hist_0                        1.132985
4_LAcc_y_hist_0                         1.123134
27_LAcc_magnitude_hist_9                1.111529
1_Acc_magnitude_hist_0                  1.110682
2_Acc_magnitude_max_signal1             1.097501
13_LAcc_x_max_signal1                   1.089889
5_Acc_y_Acc_magnitude_cov               1.087844
18_Acc_derotated_z_max_signal2          1.066815
25_Gyr_y_hist_0                         1.061401
0_LAcc_magnitude_q2                     1.048535
9_Acc_magnitude_Acc_derotated_z_cov     1.044770
16_Acc_magnitude_LAcc_magnitude_cov     1.042455
8_Acc_magnitude_entropy                 1.014231
7_Acc_magnitude_hist_9                  1.012460
14_Acc_magnitude_Gyr_magnitude_cov      1.008994
6_LAcc_y_hist_9                         1.003378
32_Gyr_y_max_signal1                    0.999696
19_LAcc_z_hist_9                        0.993

In [None]:
plot_states(y_train, models[0]["most_likely"], "estimated", 0 )

In [None]:
accuracy_score(y_train, models[1]["most_likely"])

In [None]:
confusion_matrix(y_train, models[1]["most_likely"])

In [None]:
posterior_x = posterior.mean_continuous_states[0]
most_likely = rslds.most_likely_states(posterior_x, X_train)

In [None]:
y_train

In [None]:
# try:
perm = find_permutation(y_train.astype(int), most_likely)
rslds.permute(perm)
# except:
#     logging.error("Prediction exception")

In [None]:
z_est = rslds.most_likely_states(posterior_x, X_train)
plot_states(y_train, z_est, "Predicted", 0)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
accuracy_score(y_train, z_est)

In [None]:
confusion_matrix(y_train, z_est)

In [None]:
print(models[0]['hmm_lls'])

In [None]:
plt.plot(models[1]['hmm_lls'][1000:])

In [None]:
"test"