In [1]:
import numpy as np
from munkres import Munkres, print_matrix

In [3]:
def compute_cost(zt, zt_real):
    assert type(zt) == np.ndarray, 'input must be np.array'
    cost_mat = []  # np.zeros((len(np.unique(zt_real)), len(np.unique(zt))));
    K_use = max(len(np.unique(zt_real)), len(np.unique(zt)))
    for ii in range(K_use):  ## real
        cost_mat.append([])
        for jj in range(K_use):
            cost_mat[ii].append((np.abs((zt_real == ii) * 1 - (zt == jj) * 1)).sum())
    # print_matrix(cost_mat);

    m = Munkres()
    indexes = m.compute(cost_mat)

    total = 0
    for row, column in indexes:
        value = cost_mat[row][column]
        total += value
    #     print(f'({row}, {column}) -> {value}')
    # print(f'total cost: {total}')
    return total, indexes

In [4]:
num_seq = 500
num_state = 10
num_obs = 500
max_L = 20

In [5]:
# the original simulated data
ori_data = np.load(
    f"../data/hmm_syn_dataset(norefine_state-{num_state}_obs-{num_obs}_size-{num_seq}_maxL-{max_L}).npz",
    allow_pickle=True)
hidden_states = list(ori_data['real_hidden'])
hidden_states

[[6, 1, 3, 7, 10, 9, 8, 8, 10, 8, 8, 8, 10, 5, 3, 5, 7],
 [2, 6, 7, 4, 4, 6, 7, 3, 8, 9, 5, 7, 9, 1, 4, 7, 3, 6, 1],
 [7, 7, 8, 8, 6, 10, 8, 8, 4, 7, 3, 4, 6, 4, 6, 7, 4, 2, 4],
 [6, 4, 7, 3, 3, 9, 2, 4, 9, 7, 8, 1, 6, 1],
 [2, 9, 7, 7, 3, 5, 3, 9, 4, 7, 7, 8, 3, 9],
 [6, 9, 8, 9, 7, 7, 9, 9, 6, 10, 9, 5, 3],
 [10, 1, 3, 8, 8, 9, 7, 8, 9, 1, 2, 6],
 [6, 7, 4, 8, 5, 3, 1, 2, 5, 2, 10, 9, 1, 3, 4, 8, 1, 8, 3],
 [8, 9, 4, 7, 7, 4, 5, 2, 10, 5, 10, 9, 5, 7, 7, 3],
 [2, 9, 7, 1, 10, 9, 4, 8, 5, 6, 9, 7, 10, 9, 7, 3, 10, 8, 9, 5],
 [5, 7, 3, 4, 8, 5, 3, 9, 2, 4, 8, 5, 10],
 [4, 9, 6, 8, 10, 9, 6, 1, 2, 10, 8, 4, 7, 1, 10, 9, 8, 3, 6, 4],
 [9, 5, 10, 8, 8, 5, 2, 5, 10, 5, 8, 9, 9, 7, 10, 9, 4, 10],
 [9, 4, 10, 8, 9, 7, 3, 5, 10, 9, 6, 7, 3],
 [6, 9, 3, 7, 7, 10, 8, 5, 7, 8, 2],
 [1, 3, 9, 7, 3, 10, 3, 8, 5, 10, 6, 9, 4, 8, 8, 5, 7, 3],
 [5, 3, 5, 9, 9, 8, 2, 6, 7, 8],
 [10, 5, 7, 4, 8, 4, 5, 2, 4, 8, 5, 7, 3, 8, 5, 3, 6, 1],
 [10, 8, 8, 5, 2, 4, 7, 3, 8, 8, 3, 10, 9],
 [3, 6, 1, 2, 3, 5, 2, 4

In [6]:
hidden_states_flatten = [i for ls in hidden_states for i in ls]

In [7]:
hidden_len = [len(s) for s in hidden_states]

In [8]:
def step_loss(it: int):
    VI_result = np.load(f"../data/VI3-{it}-state-{num_state}_obs-{num_obs}_size-{num_seq}_maxL-{max_L}.npz",
                        allow_pickle=True)
    u_z = VI_result['u_z']
    D, L, N = u_z.shape
    T = 100

    # sample 100 times
    z = list()
    for _ in range(T):
        for d in range(D):
            z_d = list()
            for j in range(hidden_len[d]):
                z_dj = np.random.choice(N, p=u_z[d][j])
                z_d.append(z_dj)
            assert len(z_d) == hidden_len[d]
            z += z_d

    hidden_states_flatten_arr = np.array(hidden_states_flatten * T)
    _, indexes = compute_cost(zt=np.array(z), zt_real=hidden_states_flatten_arr)
    dic = dict((v, k) for k, v in indexes)

    tmp = np.array([dic.get(z[t], 0) for t in range(len(z))])
    zero_one_loss = np.sum(tmp != hidden_states_flatten_arr)
    zero_one_loss /= hidden_states_flatten_arr.size

    print(f"In step {it}, Zero one loss rate is : {round(zero_one_loss, 4)}%")
    return zero_one_loss

In [10]:
loss = list()
for step in range(0, 661, 40):
    loss_t = step_loss(it=step)
    loss.append(loss_t)

In step 0, Zero one loss rate is : 0.9426%
In step 40, Zero one loss rate is : 0.8593%
In step 80, Zero one loss rate is : 0.8597%
In step 120, Zero one loss rate is : 0.8595%
In step 160, Zero one loss rate is : 0.8608%
In step 200, Zero one loss rate is : 0.8605%
In step 240, Zero one loss rate is : 0.8599%
In step 280, Zero one loss rate is : 0.8596%
In step 320, Zero one loss rate is : 0.86%
In step 360, Zero one loss rate is : 0.8614%
In step 400, Zero one loss rate is : 0.8611%
In step 440, Zero one loss rate is : 0.861%
In step 480, Zero one loss rate is : 0.8609%
In step 520, Zero one loss rate is : 0.862%
In step 560, Zero one loss rate is : 0.8612%
In step 600, Zero one loss rate is : 0.8601%
In step 640, Zero one loss rate is : 0.8607%
