In [185]:
# Author: Yilin ZHENG
import numpy as np
import random

1. Viterbi Algorithm

In [214]:
# Reference: https://en.wikipedia.org/wiki/Viterbi_algorithm
def viterbi(observations, states, init_state, trans_mat, emission_mat, print_parameters=False, iterations=0):
    V = [{}]
    # Initialization
    for s in states:
        V[0][s] = {"prob": init_state[s] * emission_mat[s][observations[0]], "prev": None}
    # For later steps
    for t in range(1, len(observations)):
        V.append({})
        for s in states:
            max_trans_prob = V[t-1][states[0]]["prob"]*trans_mat[states[0]][s]
            prev_selected_state = states[0]
            for prev_state in states[1:]:
                trans_prob = V[t-1][prev_state]["prob"]*trans_mat[prev_state][s]
                if trans_prob > max_trans_prob:
                    max_trans_prob = trans_prob
                    prev_selected_state = prev_state 
            max_prob = max_trans_prob * emission_mat[s][observations[t]]
            V[t][s] = {"prob": max_prob, "prev": prev_selected_state}
        if print_parameters and iterations != 0 and t == iterations:
            for line in get_parameters(V):
                print(line)
    opt = []
    max_prob = max(value["prob"] for value in V[-1].values())
    previous = None
    for s, data in V[-1].items():
        if data["prob"] == max_prob:
            opt.append(s)
            previous = s
            break
    for t in range(len(V) - 2, -1, -1):
        opt.insert(0, V[t + 1][previous]["prev"])
        previous = V[t + 1][previous]["prev"]
    return opt, max_prob

def get_parameters(V):
    yield " ".join(("%9d" % i) for i in range(len(V)))
    for state in V[0]:
        yield "%.7s: " % state + " ".join("%.7s" % ("%f" % v[state]["prob"]) for v in V)

In [215]:
# sequences
seq0 = ('Heads', 'Heads', 'Heads')
seq1 = ('Heads', 'Heads', 'Tails')
seq2 = ('Heads', 'Tails', 'Heads')
seq3 = ('Heads', 'Tails', 'Tails')
seq4 = ('Tails', 'Heads', 'Heads')
seq5 = ('Tails', 'Heads', 'Tails')
seq6 = ('Tails', 'Tails', 'Heads')
seq7 = ('Tails', 'Tails', 'Tails')

all_seqs = [seq0, seq1, seq2, seq3, seq4, seq5, seq6, seq7]
observation_seq = random.sample(all_seqs, 5)  # randomly selected 5 sequences

observations = []
for seq in observation_seq:
    observations += [s for s in seq]
print(observations)
states = ("Coin1", "Coin2", "Coin3")
init_state_1 = {"Coin1": 0.3, "Coin2": 0.4, "Coin3": 0.3}
init_state_2 = {"Coin1": 0.34, "Coin2": 0.33, "Coin3": 0.33}
init_state_3 = {"Coin1": 0.2, "Coin2": 0.4, "Coin3": 0.4}
init_state_4 = {"Coin1": 0.4, "Coin2": 0.5, "Coin3": 0.1}
init_states = [init_state_1, init_state_2, init_state_3, init_state_4]
trans_mat = {"Coin1": {"Coin1": 0.6, "Coin2": 0.2, "Coin3": 0.2},
             "Coin2": {"Coin1": 0.3, "Coin2": 0.5, "Coin3": 0.2},
             "Coin3": {"Coin1": 0.5, "Coin2": 0.2, "Coin3": 0.3}}
emission_mat = {"Coin1": {"Heads": 0.7, "Tails": 0.3},
                "Coin2": {"Heads": 0.4, "Tails": 0.6},
                "Coin3": {"Heads": 0.5, "Tails": 0.5}}

['Heads', 'Heads', 'Heads', 'Tails', 'Heads', 'Tails', 'Heads', 'Tails', 'Tails', 'Tails', 'Tails', 'Heads', 'Tails', 'Tails', 'Tails']


In [216]:
for init_state in init_states: 
    result_seq, prob = viterbi(observations, states, init_state, trans_mat, emission_mat)
    print("The sequences are " + " -> ".join(result_seq))
    print("The highest probability is %s" % prob)

The sequences are Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2
The highest probability is 3.704249548016638e-09
The sequences are Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2
The highest probability is 4.198149487752189e-09
The sequences are Coin3 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2
The highest probability is 2.939880593663998e-09
The sequences are Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin1 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2 -> Coin2
The highest probability is 4.938999397355515e-09


2. Calculate Parameters

In [217]:
# Data
init_state = {"Coin1": 0.34, "Coin2": 0.33, "Coin3": 0.33}
trans_mat = {"Coin1": {"Coin1": 0.1, "Coin2": 0.3, "Coin3": 0.6},
             "Coin2": {"Coin1": 0.5, "Coin2": 0.3, "Coin3": 0.2},
             "Coin3": {"Coin1": 0.4, "Coin2": 0.3, "Coin3": 0.3}}
emission_mat = {"Coin1": {"Heads": 0.2, "Tails": 0.8},
                "Coin2": {"Heads": 0.4, "Tails": 0.6},
                "Coin3": {"Heads": 0.5, "Tails": 0.5}}


In [219]:
result_seq, prob = viterbi(observations, states, init_state, trans_mat, emission_mat, print_parameters=True, iterations=2)
# print("The sequences are " + " -> ".join(result_seq))
# print("The highest probability is %s" % prob)

        0         1         2
Coin1: 0.06800 0.01320 0.00198
Coin2: 0.13200 0.01980 0.00297
Coin3: 0.16500 0.02475 0.00396
