In [129]:
import numpy as np
import math
from statistics import mean 

"""
Creates randomly generated normalized user data for the 
KNN to determine the states for the Hidden Markov Process

Data that is randomly generated:
- preference values for the sample users - prob they vote for each sector
- values for the emission probabilities - seeing each observation based on the state
- state transition created based off random state array generated
"""

#generates one row of the emission probability matrix
def genOne():
    valArr = []
    genArray = np.random.randint(100, size = 5)
    genArray = sum_array_to_prob(genArray)
    return genArray.tolist()

#converts the input data over the total sum into probabilities
def sum_array_to_prob(genArray):
    genArray = np.array(genArray)
    genArray = genArray / sum(genArray)
    return genArray

#generates the entire emission probability matrix, qnty is the number of samples it averages over
def genAll(qnty):
    num_state_count = 5
    returnArr = []
    for y in range(num_state_count):
        val = [0, 0, 0, 0, 0]
        for x in range(qnty):
            val = [x + y for x, y in zip(val, genOne())]
        val = [x / qnty for x in val]
        returnArr.append(val)
    return returnArr

#takes the user data and overall emissions probability to classify the user into the bucket
def classifyUser(userData, emissionsProbs):
    bestIndex = -1
    curBest = 99999
    for x in range(len(emissionsProbs)):
        curDist = 0
        for y in range(len(emissionsProbs[x])):
            curDist += (emissionsProbs[x][y] - userData[y]) ** 2
        curDist = math.sqrt(curDist)
        if(curBest > curDist):
            curBest = curDist
            bestIndex = x
    return bestIndex
            
emissionProbs = np.array(genAll(10))
print(emissionProbs)


[[0.19315951 0.20274011 0.18560085 0.20699978 0.21149975]
 [0.19316312 0.28471168 0.19348393 0.15105487 0.17758641]
 [0.21186442 0.16998969 0.22405349 0.20692099 0.1871714 ]
 [0.22948645 0.21233816 0.20073757 0.12471203 0.23272579]
 [0.17377348 0.20004883 0.25674211 0.16423194 0.20520364]]


In [139]:
"""
Creates the transition matrix used in the HMM model
"""
userClassifications = []
for x in range(100):
    userClassifications.append(classifyUser(genOne(), emissionProbs))

rows, cols = (5,5)
transitionMatrix = [[0 for i in range(cols)] for j in range(rows)] 

#populations the transition matrix with the raw data
for x in range(len(userClassifications) - 1):
    curState = userClassifications[x]
    nextState = userClassifications[x+1]
    transitionMatrix[curState][nextState] += 1
    
#print(transitionMatrix)
#converts the raw data into the smoothed values
for row_n in range (rows):
    transitionMatrix[row_n] = sum_array_to_prob(transitionMatrix[row_n]).tolist()
    transitionMatrix[row_n] = [round(val, 2) for val in transitionMatrix[row_n]]

print(transitionMatrix)    

[[0.0, 0.25, 0.25, 0.17, 0.33], [0.07, 0.33, 0.3, 0.19, 0.11], [0.18, 0.29, 0.29, 0.07, 0.18], [0.2, 0.2, 0.27, 0.13, 0.2], [0.18, 0.18, 0.29, 0.24, 0.12]]


In [150]:
"""
Implements Viterbi's algorithm to determine the ideal state sequence

Additional assumptions:
- probability of each state from the start
- sequence of assets that the user liked - last 7
"""

def Viterbi():
    #considers the overall user base that is in each state
    overallUserBase = [0.2, 0.2, 0.2, 0.2, 0.2]
    allPaths = [[0], [1], [2], [3], [4]]
    baseProb = overallUserBase #initial probabilities for the algorithm
    ObsSequence = [1, 0, 2, 3, 3, 0, 4] #sequence that the user liked
    states = 5

    for obs in ObsSequence:
        for pathIndex in range(len(allPaths)):
            lastPathLoc = allPaths[pathIndex][-1]
            cProbArray = []
            for state in range(states):
                cProb = transitionMatrix[lastPathLoc][state]*emissionProbs[state][obs]
                cProbArray.append(cProb)
            baseProb[pathIndex] *= max(cProbArray)
            allPaths[pathIndex].append(cProbArray.index(max(cProbArray)))

    #print(baseProb)
    #print(allPaths[baseProb.index(max(baseProb))])
    return allPaths[baseProb.index(max(baseProb))]

In [151]:
print(Viterbi())

[1, 1, 1, 2, 2, 2, 2, 2]
