In [13]:
class HMM():
    '''
    class that represents a Hidden Markov Model (HMM)
    contains methods that analyse aspects of the HMM
    '''
    def __init__(self,symbols,alphabet):
        '''
        :param symbols: list of possible emissions
        :param alphabet: list of possible states

        :param emitMatrix: Nested dictionary representing an emission matrix
        key:value --> state: RowDictionary
        Where RowDictionary is a dictionary that represents a single row in the matrix.
        key:value --> state: emission pr

        :param transMatrix: Nested dictionary representing a transition matrix
        key:value --> state: RowDictionary
        Where RowDictionary is a dictionary that represents a single row in the matrix.
        key:value --> state: transition pr

        '''
        self.symbols = symbols
        self.alphabet = alphabet
        self.emitMatrix = {}
        for letter in alphabet:
            self.emitMatrix[letter] = {}
            for symbol in symbols:
                self.emitMatrix[letter][symbol] = 0
        self.transMatrix = {}
        for letter1 in alphabet:
            self.transMatrix[letter1] = {}
            for letter2 in alphabet:
                self.transMatrix[letter1][letter2] = 0

    def fillTransMatrix(self,path):
        '''

        :param path: string representing the path of the HMM

        :return: transMatrix with the value of each RowDictionary filled with the value that maximizes Pr(path)
        '''

        for i in range(len(path)-1): # fills transMatrix with the counts of each transition in path
            self.transMatrix[path[i]][path[i+1]] += 1

        for letter,rowDict in self.transMatrix.items(): # converts the counts to probabilities
            if sum(rowDict.values()) == 0:
                for letter2,count in rowDict.items():
                    self.transMatrix[letter][letter2] = 1/len(self.alphabet)
            else:
                total = sum(rowDict.values())
                for letter2,count in rowDict.items():
                    self.transMatrix[letter][letter2] = count/total

        return self.transMatrix

    def fillEmitMatrix(self,emissions,path):
        '''

        :param emissions: string representing the emissions
        :param path: string representing the path of the HMM
        :return: emitMatrix with the value of each RowDictionary filled with the value that maximizes Pr(path,emissions)
        '''

        for i in range(len(emissions)): # fills emitMatrix with the counts of each type of emission in emissions
            self.emitMatrix[path[i]][emissions[i]] += 1

        for letter,rowDict in self.emitMatrix.items(): # converts the counts to probabilities
            if sum(rowDict.values()) == 0:
                for symbol,count in rowDict.items():
                    self.emitMatrix[letter][symbol] = 1/len(self.symbols)
            else:
                total = sum(rowDict.values())
                for symbol,count in rowDict.items():
                    self.emitMatrix[letter][symbol] = count/total

        return self.emitMatrix


def printMatrix(matrix):
    '''
    Used to print transMatrix and emitMatrix
    :param dict:
    :return:
    '''
    toPrint = '' # dummy string that will be printed and then overwritten for every new line
    for key in matrix[list(matrix)[0]].keys():
        toPrint += '\t'+key
    print(toPrint)

    for key in matrix.keys():
        toPrint = key
        for pr in matrix[key].values():
            toPrint += '\t'+str(round(pr,3))
        print(toPrint)



def main(fName=''):
    '''
    Reads input from file and runs fillEmitMatrix() and fillTransMatrix() and prints the returned Matricies
    :param fName: name of the file

    '''
    with open(fName) as inFile:
        lines = inFile.readlines()
        emissions = lines[0].strip()
        symbols = lines[2].split()
        path = lines[4].strip()
        alphabet = lines[6].split()

        hmm = HMM(symbols,alphabet)
        hmm.fillEmitMatrix(emissions,path)
        hmm.fillTransMatrix(path)
        printMatrix(hmm.transMatrix)
        print('--------')
        printMatrix(hmm.emitMatrix)


if __name__ == '__main__':
    main('problem23in.txt')

	A	B
A	0.489	0.511
B	0.462	0.538
--------
	x	y	z
A	0.375	0.271	0.354
B	0.404	0.25	0.346


Given: A sequence of emitted symbols x = x1 . . . xn in an alphabet ∑ and a path π = π1 . . . πn generated by a k-state HMM with unknown transition and emission probabilities.

Return: A matrix of transition probabilities Transition and a matrix of emission probabilities Emission that maximize Pr(x,π) over all possible matrices of transition and emission probabilities.