In [131]:
import numpy as np 
import itertools as iter    
from collections import defaultdict
import statistics as st 

with open('../../NLP_Resources/kasyno10K.txt') as f:
    kasyno = np.array([int(x) for x in f.read() if x.isnumeric()])

def countABG(data, a , b , pi):

    alfa = np.zeros((2, data.size), dtype=np.float128)

    alfa[:, 0] = pi

    for t in range(1, data.size):
        for j in range(2):
            for i in range(2):
                alfa[j,t] += alfa[i, t-1] * a[i,j] * b[i, j, data[t-1]-1]
            alfa[j,t] *= 2

    beta = np.zeros((2, data.size), dtype=np.float128)
    beta[:, data.size-1] = 1

    for t in range(data.size-2, -1, -1):
        for j in range(2):
            for i in range(2):
                beta[j,t] += beta[i, t+1] * a[i, j] * b[i, j, data[t]-1]
            beta[j,t] *= 2
    

    gamma = alfa * beta / (alfa * beta).sum()

    return alfa, beta, gamma



In [132]:
def countKSI(data, alfa, a, b, beta):
    ksi = np.zeros((data.size - 1, 2, 2 ), dtype=np.float128)
    for t in range(data.size - 1):
        for i in range(2):
            for j in range(2):
                ksi[t, i, j] = (alfa[i, t] * a[i, j] * b[i, j, (data[t+1] - 1)] * beta[j, t+1])
    return ksi

In [148]:
def baumWelch(data, a, b, pi, iter):
    a = a.copy()
    b = b.copy()
    for i in range(iter):
        alfa, beta, gamma = countABG(data, a, b, pi)
        ksi = countKSI(data, alfa, a, b, beta)

        for i in range(2):
            for j in range(2):
                a[i, j] = ksi[:, i, j].sum() / ksi[:,i,:].sum()
        for i in range(2):
            for j in range(6):
                b[i, :, j] = gamma[i, (data-1) == j].sum() / gamma[i, :].sum()

    return a, b[:, 0]

In [280]:
for i in range(2):
    probabilities[i] = np.random.rand(6)
    probabilities[i] /= probabilities[i].sum()

switchProbability = np.array([0.1, 0.1])

print(switchProbability)
print(probabilities)

[0.1 0.1]
[[0.12665771 0.20371786 0.08156093 0.22302538 0.24144162 0.12359649]
 [0.10913301 0.03903804 0.19059934 0.25837262 0.27015455 0.13270244]]


In [288]:


a = np.array([
        [1-switchProbability[0], switchProbability[0]],
        [switchProbability[1], 1-switchProbability[1]]
    ])
b = np.array([
        [probabilities[0], probabilities[0]],
        [probabilities[1], probabilities[1]]
    ])
pi = np.array([1,0])

t = baumWelch(kasyno[:9000], a, b, pi, 4)

In [289]:
print(t[0], t[1], sep = '\n')
# print("{:.17g}".format(t[0][0][0]))
# print("{:.17g}".format(t[0][0][1]))
# print("{:.17g}".format(t[0][1][0]))
# print("{:.17g}".format(t[0][1][1]))

[[0.898312   0.101688  ]
 [0.06190488 0.93809512]]
[[0.16354834 0.18448644 0.19905868 0.09176529 0.14143634 0.21970491]
 [0.11834548 0.09843239 0.17283486 0.09664115 0.1804542  0.33329191]]
