In [49]:
import numpy as np
from hmmlearn import hmm
from models import StandardHMM, DenseHMM

In [50]:
import ssm
from ssm.util import find_permutation

In [51]:
n = 4
n_features = 20
T = 10000
np.random.seed(2022)

In [52]:
startprob = np.random.uniform(size=n)
startprob /= startprob.sum()
# A = np.repeat(startprob[np.newaxis,:], n, axis=0)
A = np.exp(np.random.uniform(0, 5, size=(n, n)))
A /= A.sum(axis=1)[:, np.newaxis]
print(A)

B = np.exp(np.random.uniform(0, 5, size=(n,  n_features)))
B /= B.sum(axis=1)[:, np.newaxis]
print(B)

[[0.19654409 0.07287817 0.56800767 0.16257008]
 [0.35182895 0.14605805 0.25343227 0.24868073]
 [0.3189469  0.59131471 0.03110439 0.05863401]
 [0.033737   0.13675546 0.81950126 0.01000628]]
[[0.06002488 0.00627282 0.00646954 0.06255808 0.00159528 0.01993377
  0.10949811 0.01015476 0.00419833 0.01563687 0.02446125 0.00833843
  0.19119066 0.00175563 0.07709793 0.00869842 0.06116333 0.14065
  0.00936557 0.18093636]
 [0.0033018  0.00271772 0.00969413 0.01354555 0.02550351 0.06218624
  0.06372731 0.03839945 0.00328873 0.19621213 0.00266308 0.10978371
  0.00264011 0.00836001 0.00613285 0.00802059 0.23290171 0.00228911
  0.01652552 0.19210673]
 [0.0037348  0.0015001  0.04392505 0.00349651 0.0206727  0.02746123
  0.18878684 0.00213588 0.00185537 0.18799384 0.01289951 0.09617844
  0.0100129  0.00937158 0.11202101 0.03504082 0.06540799 0.00217776
  0.16858827 0.00673939]
 [0.00899809 0.06936065 0.01246044 0.0045815  0.00216479 0.13178157
  0.04762684 0.1621241  0.04749894 0.00399227 0.01995184 0.

In [53]:
def my_hmm_sampler(pi, A, B, T):
    X = [np.random.choice(np.arange(n), 1, replace=True, p=pi)]
    for t in range(T-1):
        X.append(np.random.choice(np.arange(n), 1, replace=True, p=A[X[t][0],  :]))
    Y = np.concatenate([np.random.choice(np.arange(n_features), 1, replace=True, p=B[s[0],  :]) for  s  in X]).reshape(-1,  1)
    return X, Y

Y_true, X_true = my_hmm_sampler(startprob, A, B, T)

In [54]:
# import  pandas  as pd
# pd.DataFrame({"Y":Y_true, "X":X_true[:,0]}).value_counts().sort_index()

In [55]:
# print(Y_true, X_true )

In [56]:
standardhmm = StandardHMM(n, init_params=None, em_iter=1000)
densehmm = DenseHMM(n)

In [57]:
hmml = hmm.MultinomialHMM(n, n_iter=1000)
%time hmml.fit(X_true)

CPU times: user 13.9 s, sys: 4.93 ms, total: 13.9 s
Wall time: 13.9 s


In [None]:
%time standardhmm.fit(X_true, [T])
%time densehmm.fit_coocs(X_true, [T])

In [None]:
print(standardhmm.transmat_.round(2))
print(standardhmm.emissionprob_.round(2))

In [None]:
Y_true = np.concatenate(Y_true)

In [None]:
perm = find_permutation(standardhmm.predict(X_true.transpose()),Y_true)
(Y_true == np.array([perm[i] for i in standardhmm.predict(X_true.transpose())])).mean()

In [None]:
print(densehmm.transmat_.round(2))
print(densehmm.emissionprob_.round(2))
perm = find_permutation(densehmm.predict(X_true.transpose()),Y_true)
(Y_true == np.array([perm[i] for i in densehmm.predict(X_true.transpose())])).mean()

In [None]:
print(hmml.transmat_.round(2))
print(hmml.emissionprob_.round(2))
perm = find_permutation(hmml.predict(X_true.transpose()),Y_true)
(Y_true == np.array([perm[i] for i in hmml.predict(X_true.transpose())])).mean()

In [None]:
perm = find_permutation(densehmm.predict(X_true.transpose()),Y_true)
perm

In [None]:
print(A.round(2))

In [None]:
ssm_hmm = ssm.HMM(n, 1, observations="categorical")
ssm_hmm.observations.__init__(K=n, D=1, C=n_features)
ssm_hmm.fit(X_true)

most_likely_states = ssm_hmm.most_likely_states(X_true)
ssm_hmm.permute(find_permutation(Y_true, most_likely_states))
print(ssm_hmm.transitions.transition_matrix.round(2))

In [None]:
(ssm_hmm.most_likely_states(X_true) == Y_true).mean()

In [None]:
# from hmmlearn import _hmmc as _hmmcmod
# stats = standardhmm._initialize_sufficient_statistics(1, T)
# X=X_true
# total_logprob = 0
#
# # Iterate over all sequences
#
#
# stats['nobs'] = 0
#
# # Compute posteriors by forward-backward algorithm
# framelogprob = standardhmm._compute_log_likelihood(X[0:T].transpose())
# logprob, fwdlattice = standardhmm._do_forward_log_pass(framelogprob)
# fwdlattice = _hmmcmod.forward_log(standardhmm.startprob_, standardhmm.transmat_, framelogprob)
# # print(logprob)
# # print(fwdlattice)
# stats['all_logprobs'][0] = logprob # logprob = probability of X[i:j]
#
# total_logprob += logprob
# bwdlattice = _hmmcmod.backward_log(standardhmm.startprob_, standardhmm.transmat_, framelogprob)
# posteriors = standardhmm._compute_posteriors_log(fwdlattice, bwdlattice)
#
# # Pad posteriors with zeros such that its length equals max_seqlen
# posteriors = 1
#
# n_samples, n_components = framelogprob.shape
# # when the sample is of length 1, it contains no transitions
# # so there is no reason to update our trans. matrix estimate
#
#
# # Compute pairwise gammas and log_xi_sum
# cur_gamma_pairwise = np.zeros_like(stats['bar_gamma_pairwise'])
# log_xi_sum = np.full((n_components, n_components), -np.inf)
# log_xi_sum = _hmmcmod.compute_log_xi_sum(fwdlattice,
#                           standardhmm.transmat_,
#                           bwdlattice, framelogprob)

In [None]:
# log_xi_sum