In [75]:
from pomegranate import *
import numpy as np

In [76]:
model = HiddenMarkovModel(name = "Tree ring model")
#Hidden states: X, can take values H,C
#Obervable states: Can take values S,M,L

#To specify a Pomegranate hidden Markov model, we have to define states and transition probabilities.

#------------Hidden states and rows of the emission matrix----------:
#  probabilities for tree ring width in hot years (hidden state H):
H_em = DiscreteDistribution({"S": 0.1, "M": 0.4, "L": 0.5})
# now we can declare the hidden state H as an object:
H_st = State(H_em, name = "H")

#  probabilities for tree ring width in cold years (hidden state C):
C_em = DiscreteDistribution({"S": 0.7, "M": 0.2, "L": 0.1})
C_st = State(C_em, name = "C")

model.add_states(H_st, C_st)

#-----------Initial distribution------------
model.add_transition(model.start, H_st, 1/2)
model.add_transition(model.start, C_st, 1/2)

#-----------Transition matrix-------------
model.add_transition(H_st, H_st, 0.7)
model.add_transition(H_st, C_st, 0.3)
model.add_transition(C_st, H_st, 0.4)
model.add_transition(C_st, C_st, 0.6)

#---------when all ingredients are mixed in, you can finalize ('bake') the model:---------
model.bake()

# Instead of adding all the transition probabilities one by one, or line by line, as above
# you can also enter a matrix right away. See the first example here:
# https://pomegranate.readthedocs.io/en/latest/HiddenMarkovModel.html
#For large transition matrices, where you maybe have to compute the entries one by one, the above approach
# is more convenient. In our small example both are fine.

In [77]:
#shows the transition matrix
model.dense_transition_matrix()
#here it is a 4x4-matrix because Pomegranate automatically adds two more states, model.start and model.end

array([[0.6, 0.4, 0. , 0. ],
       [0.3, 0.7, 0. , 0. ],
       [0.5, 0.5, 0. , 0. ],
       [0. , 0. , 0. , 0. ]])

In [74]:
#Let's analyze a tree ring sequences that we might have observed: 
seq = ["L", "L", "M", "S", "S"]

#Pomegranate offers to compute the _log probability_ of a sequence of observable states, for some reason.
#The get the probability, just exponentiate:
prob= np.exp(model.log_probability(seq))
print('The probability that the sequence',*seq,'appears is {:.2f}%.'.format(prob*100))

The probability that the sequence M S M S S M L L M L appears is 0.001833%.
The probability that the sequence L L M S S appears is 0.48%.


In [73]:
#The so-called Viterbi algorithm computes the most likely sequence of hidden states 
# for a given sequence of observable states:
log_likelihood, path = model.viterbi(seq)
print("The most likely underlying series of H/C states is {} at {:.3f}%."
      .format([s[1].name for s in path[1:]], np.exp(log_likelihood)*100))

The most likely underlying series of H/C states is ['H', 'H', 'H', 'C', 'C'] at 0.216%.
