In [364]:
# package imports
import numpy as np
from itertools import islice
from scipy.linalg import eig
from scipy.stats import norm
from sklearn.preprocessing import normalize

In [365]:
# get stationary dustribution of transition matrix
# from stack overflow
def get_stationary_distibution(state_transition_matrix: np.ndarray) -> np.ndarray:
    S, U = eig(state_transition_matrix.T)
    stationary = np.array(U[:, np.where(np.abs(S - 1.) < 1e-8)[0][0]].flat)
    stationary = stationary / np.sum(stationary)
    return stationary

In [366]:
def estimate_sequence(state_transition_matrix:np.ndarray, gaussian_params: np.ndarray, initial_state_probability:np.ndarray, observations:np.ndarray, state_count: int, state_converter:dict):
    observation_count = observations.shape[0]
    state_probability_matrix = np.zeros((state_count, observation_count))
    path = np.ndarray((state_count, observation_count-1), dtype=int)
    emission_matrix = norm(loc=gaussian_params[0,:], scale=gaussian_params[1,:]).pdf(observations).T
    emission_matrix = normalize(emission_matrix, axis=0)
    state_probability_matrix[:,0] = np.log(initial_state_probability) + np.log(emission_matrix[:,0])
    for i in range(1, observation_count):
        for j in range(state_count):
            prob = state_probability_matrix[:,i-1] + np.log(state_transition_matrix[:,j]) + np.log(emission_matrix[j,i])
            path[j,i-1] = np.argmax(prob)
            state_probability_matrix[j,i] = max(prob)

    out_path = ['' for i in range(observation_count)]
    sink_index = np.argmax(state_probability_matrix[:,-1])
    out_path[observation_count-1] = state_converter[sink_index]
    for i in range(observation_count-2,-1,-1):
        sink_index = path[sink_index, i]
        out_path[i] = state_converter[sink_index]

    return path, state_probability_matrix, emission_matrix, out_path

In [367]:
# Viterbi algorithm
def viterbi():
    pass

In [368]:
# Baum-Welch Learning
def baum_welch():
    pass

In [369]:
# read data
observed_states = np.loadtxt('./Input/data.txt', dtype=float).reshape(-1,1)
observed_states.shape

(1000, 1)

In [370]:
# read parameters
with open('./Input/parameters.txt.txt', 'r') as f:
    no_of_states = int(f.readline())

with open('./Input/parameters.txt.txt', 'r') as lines:
    transition_matrix = np.genfromtxt(islice(lines, 1, 1+no_of_states))

with open('./Input/parameters.txt.txt', 'r') as lines:
    gaussian_parameters = np.genfromtxt(islice(lines, 1+no_of_states, 1+2*no_of_states), dtype=int)

In [371]:
initial_distribution = get_stationary_distibution(transition_matrix)
index_state_map = {
    0: '\"El Nino\"',
    1: '\"La Nina\"'
}

In [372]:
a, b, c, hidden_path = estimate_sequence(transition_matrix, gaussian_parameters, initial_distribution, observed_states, no_of_states, index_state_map)

In [373]:
# norm.pdf(104.524317662043, loc=gaussian_parameters[:,0], scale=gaussian_parameters[:,1])
a

array([[1, 1, 1, ..., 0, 0, 1],
       [1, 1, 1, ..., 1, 1, 1]])

In [374]:
b

array([[-4.68619767e+01, -5.42948762e+01, -5.14943271e+01, ...,
        -5.36454548e+02, -5.90702136e+02, -5.89241290e+02],
       [-2.87682072e-01, -3.93042588e-01, -4.98403104e-01, ...,
        -5.35555976e+02, -5.35661337e+02, -5.35766697e+02]])

In [375]:
c

array([[1.77894327e-20, 3.50730551e-23, 6.41200887e-22, ...,
        3.14837047e-01, 3.93979520e-24, 5.37677679e-23],
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        9.49145739e-01, 1.00000000e+00, 1.00000000e+00]])

In [376]:
viterbi_output = []
with open('./Output/states_Viterbi_wo_learning.txt', 'r') as f:
    for line in f.readlines():
        viterbi_output.append(line.rstrip('\n'))

match = 0

for item1, item2 in zip(viterbi_output, hidden_path):
    if item1 == item2:
        match += 1

print(match)

857
