In [1]:
%load_ext cython

In [2]:
import os
os.chdir("C:\\Users\\chris\\PycharmProjects\\hmm-master")
os.getcwd()

'C:\\Users\\chris\\PycharmProjects\\hmm-master'

In [3]:
from utils.simulate_returns import simulate_2state_gaussian
from models.hmm_cython import _log_forward_probs
from models.hmm_gaussian_em import EMHiddenMarkov

# HMMLEARN speed

In [4]:
from utils.simulate_returns import simulate_2state_gaussian

from hmmlearn import hmm
from hmmlearn import _hmmc


model = hmm.GaussianHMM(n_components=2)

returns, true_regimes = simulate_2state_gaussian(plotting=False)  # Simulate some data from two normal distributions
returns = returns.reshape(-1,1)

model.fit(returns)

GaussianHMM(n_components=2)

In [5]:
%%timeit -n100 -r10
framelogprob = model._compute_log_likelihood(returns)
logprob, fwdlattice = model._do_forward_pass(framelogprob)

179 µs ± 15.1 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


# Init data and variables

In [8]:
# Generate some data
returns, true_regimes = simulate_2state_gaussian(plotting=False)  # Simulate some data from two normal distributions

# Init model
model = EMHiddenMarkov(n_states=2, init="random", random_state=42)
model.fit(returns)

emission_probs = model.emission_probs_
delta = model.delta
TPM = model.T
n_states = 2

print("emissions",emission_probs[:10])
print('init dist: ', delta)
print("TPM : ", TPM)

emissions [[3.61951205 1.25613906]
 [4.09890793 1.61612958]
 [3.29657387 1.1658601 ]
 [1.18219783 0.67703196]
 [4.02197665 1.66433172]
 [4.0219931  1.66432367]
 [1.07613723 0.64960506]
 [3.00809264 1.09449702]
 [3.68147914 1.77165268]
 [3.52737752 1.22874097]]
init dist:  [1.00000000e+00 9.80619485e-21]
TPM :  [[0.99656988 0.00343012]
 [0.00528727 0.99471273]]


# Pure python function to compare speeds

In [9]:
import numpy as np

def _log_forward_proba(n_states, X, emission_probs, delta, TPM):  # TODO not working yet
    T = len(X)
    log_alphas = np.zeros((T, n_states))  # initialize matrix with zeros

    # a0, compute first forward as dot product of initial dist and state-dependent dist
    # Each element is scaled to sum to 1 in order to handle numerical underflow
    alpha_t = delta * emission_probs[0, :]
    sum_alpha_t = np.sum(alpha_t)
    alpha_t_scaled = alpha_t / sum_alpha_t
    llk = np.log(sum_alpha_t)  # Scalar to store the log likelihood
    log_alphas[0, :] = llk + np.log(alpha_t_scaled)

    # a1 to at, compute recursively
    for t in range(1, T):
        alpha_t = (alpha_t_scaled @ TPM) * emission_probs[t, :]  # Dot product of previous forward_prob, transition matrix and emmission probablitites
        sum_alpha_t = np.sum(alpha_t)

        alpha_t_scaled = alpha_t / sum_alpha_t  # Scale forward_probs to sum to 1
        llk = llk + np.log(sum_alpha_t)  # Scalar to store likelihoods
        log_alphas[t, :] = llk + np.log(alpha_t_scaled)

    return log_alphas

#_log_forward_proba(n_states, returns, emission_probs, delta, TPM)

In [10]:
%%timeit -n100 -r10
_log_forward_proba(n_states, returns, emission_probs, delta, TPM)

10.3 ms ± 220 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


# Cython versions

In [11]:
%%cython

import numpy as np

def log_forward_proba_c(n_states, X, emission_probs, delta, TPM):  # TODO not working yet
    T = len(X)
    log_alphas = np.zeros((T, n_states))  # initialize matrix with zeros

    # a0, compute first forward as dot product of initial dist and state-dependent dist
    # Each element is scaled to sum to 1 in order to handle numerical underflow
    alpha_t = delta * emission_probs[0, :]
    sum_alpha_t = np.sum(alpha_t)
    alpha_t_scaled = alpha_t / sum_alpha_t
    llk = np.log(sum_alpha_t)  # Scalar to store the log likelihood
    log_alphas[0, :] = llk + np.log(alpha_t_scaled)

    # a1 to at, compute recursively
    for t in range(1, T):
        alpha_t = np.dot(alpha_t_scaled, TPM) * emission_probs[t, :]  # Dot product of previous forward_prob, transition matrix and emmission probablitites
        sum_alpha_t = np.sum(alpha_t)

        alpha_t_scaled = alpha_t / sum_alpha_t  # Scale forward_probs to sum to 1
        llk = llk + np.log(sum_alpha_t)  # Scalar to store likelihoods
        log_alphas[t, :] = llk + np.log(alpha_t_scaled) 

    return log_alphas

In [12]:
%%timeit -n100 -r10
log_forward_proba_c(n_states, returns, emission_probs, delta, TPM)

9.83 ms ± 126 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [13]:
%%cython

import numpy as np
cimport numpy as np



def log_forward_proba_c(int n_states, np.ndarray[double, ndim=1] X, np.ndarray[double, ndim=2] emission_probs,
                        np.ndarray[double, ndim=1] delta, np.ndarray[double, ndim=2] TPM):  # TODO not working yet
    cdef int T = len(X)
    cdef np.ndarray[double, ndim=2] log_alphas = np.zeros((T, n_states), dtype=np.float)  # initialize matrix with zeros

    # a0, compute first forward as dot product of initial dist and state-dependent dist
    # Each element is scaled to sum to 1 in order to handle numerical underflow
    cdef np.ndarray[double, ndim=1] alpha_t = delta * emission_probs[0, :]
    cdef double sum_alpha_t = np.sum(alpha_t)
    cdef np.ndarray[double, ndim=1] alpha_t_scaled = alpha_t / sum_alpha_t
    cdef double llk = np.log(sum_alpha_t)  # Scalar to store the log likelihood
    log_alphas[0, :] = llk + np.log(alpha_t_scaled)

    # a1 to at, compute recursively
    cdef int t
    
    for t in range(1, T):
        alpha_t = np.dot(alpha_t_scaled, TPM) * emission_probs[t, :]  # Dot product of previous forward_prob, transition matrix and emmission probablitites
        sum_alpha_t = np.sum(alpha_t)

        alpha_t_scaled = alpha_t / sum_alpha_t  # Scale forward_probs to sum to 1
        llk = llk + np.log(sum_alpha_t)  # Scalar to store likelihoods
        log_alphas[t, :] = llk + np.log(alpha_t_scaled) 

    return log_alphas

In [14]:
%%timeit -n100 -r10
log_forward_proba_c(n_states, returns, emission_probs, delta, TPM)

10.3 ms ± 149 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
