### Problems

[19] https://rosalind.info/problems/ba10a/  <br>
[20] https://rosalind.info/problems/ba10b/  <br>
[21] https://rosalind.info/problems/ba10c/  <br>
[22] https://rosalind.info/problems/ba10d/  <br>

# Problem 19

In [1]:
from collections import defaultdict

In [2]:
class HiddenPathProb:
    """
    Parameters:
        states: States
        pi: Hidden path
        transition: Transition matrix
        
    Outputs:
        Probability of given hidden path
    """
    
    def __init__(self, states, pi, transition):
        # initialize class with parameters
        self.pi = pi
        self.n = len(pi)
        self.m = len(states)
        self.transition = transition
        
    def compute_probability(self):
        # as initial probabilities are equal
        Pr = 1.0/self.m
        # iterate over hidden path, pi
        for i in range(0, self.n-1):
            Pr *= self.transition[self.pi[i]][self.pi[i+1]]
        return Pr

# input filename and read it
filename = input("Enter Filename: ")
with open(filename, "r") as f:
    # read input file
    pi = f.readline().strip()
    unwanted = f.readline().strip()
    states = f.readline().strip().split()
    unwanted = f.readline().strip()

    distM, transition = defaultdict(), defaultdict(dict)
    for line in f.readlines()[1:]:
        temp = line.strip().split()
        distM[temp[0]] = temp[1:]
    for state1, nums in distM.items():
        for state2, num in zip(states, nums):
            transition[state1][state2] = float(num)
            
# create object of class
pr_pi = HiddenPathProb(states, pi, transition)
# write output to file
with open(filename+".out", "w") as ofile:
    print(pr_pi.compute_probability(), file=ofile)

Enter Filename: rosalind_ba10a.txt


# Problem 20

In [3]:
class OutcomeProb:
    """
    Parameters:
        string: Outcome string
        alphabet: String containing letters
        pi: Hidden path
        states: States
        emission: Emission matrix
        
    Outputs:
        Conditional Probability of given string
    """
    def __init__(self, string, alphabet, pi, states, emission):
        # initialize class with parameters
        self.string = string
        self.alphabet = alphabet
        self.pi = pi
        self.states = states
        self.emission = emission
        
    def compute_outcome_prob(self):
        Pr = 1.0
        # iterate over outcome string
        for state, letter in zip(self.pi, self.string):
            Pr *= self.emission[state][letter]
        return Pr


# input filename and read it
filename = input("Enter Filename: ")
with open(filename, "r") as f:
    # read input file
    string = f.readline().strip()
    unwanted = f.readline().strip()
    alphabet = f.readline().strip().split()
    unwanted = f.readline().strip()
    pi = f.readline().strip()
    unwanted = f.readline().strip()
    states = f.readline().strip().split()
    
    distM, emission = defaultdict(), defaultdict(dict)
    for line in f.readlines()[2:]:
        temp = line.strip().split()
        distM[temp[0]] = temp[1:]
    for state, nums in distM.items():
        for letter, num in zip(alphabet, nums):
            emission[state][letter] = float(num)
            
# create object of class
pr_out = OutcomeProb(string, alphabet, pi, states, emission)
# write output to file
with open(filename+".out", "w") as ofile:
    print(pr_out.compute_outcome_prob(), file=ofile)

Enter Filename: rosalind_ba10b.txt


# Problem 21

In [4]:
class Viterbi:
    """
    Parameters:
        string: Outcome string
        alphabet: String containing letters
        states: States
        transition: Transition matrix
        emission: Emission matrix
        
    Outputs:
        Maximized unconditional path
    """
    
    def __init__(self, string, alphabet, states, transition, emission):
        # initialize class with parameters
        self.string = string
        self.alphabet = alphabet
        self.states = states
        self.transition = transition
        self.emission = emission

    def implement_viterbi(self):
        # find out value of length of states and string and initial_prob
        m, n = len(self.states), len(self.string)
        initial_prob = 1.0/m
        # initialize Viterbi matrix and backtracking matrix
        # mxn matrix
        scoreM = [[0]*n for i in range(m)]
        backtrackM = [[0]*n for i in range(m)]

        # fill the viterbi and backtrack matrix
        for i, x in enumerate(self.string):
            for j, state1 in enumerate(self.states):
                if i == 0:
                    scoreM[j][i] = initial_prob * self.emission[state1][x]
                else:
                    scoreM[j][i] = self.emission[state1][x] * max(scoreM[k][i-1] * self.transition[state2][state1] for k, state2 in enumerate(self.states))
                    backtrackM[j][i] = max(zip(range(m),states), key=lambda l: scoreM[l[0]][i-1] * self.transition[l[1]][state1])[0]

        # find out maximum last state value in viterbi matrix
        index = max(range(m), key=lambda l: scoreM[l][-1])
        # convert states into a numeric dictionary
        statesp = {i:j for i, j in enumerate(states)}
        path = ""
        # backtracking over viterbi matrix
        for i in range(len(scoreM[0]) - 1, -1, -1):
            path = statesp[index] + path
            index = backtrackM[index][i]

        # return the maximized path
        return path


# input filename and read it
filename = input("Enter Filename: ")
with open(filename, "r") as f:
    # read input file
    string = f.readline().strip()
    unwanted = f.readline().strip()
    alphabet = f.readline().strip().split()
    unwanted = f.readline().strip()
    states = f.readline().strip().split()
    lines = f.readlines()[2:]
    tr_temp, emit_temp = lines[:len(states)], lines[len(states)+2:]
    
    distM, transition = defaultdict(), defaultdict(dict)
    for line in tr_temp:
        temp = line.strip().split()
        distM[temp[0]] = temp[1:]
    for state1, nums in distM.items():
        for state2, num in zip(states, nums):
            transition[state1][state2] = float(num)
            
    distM, emission = defaultdict(), defaultdict(dict)
    for line in emit_temp:
        temp = line.strip().split()
        distM[temp[0]] = temp[1:]
    for state, nums in distM.items():
        for letter, num in zip(alphabet, nums):
            emission[state][letter] = float(num)
            
# create object of class
viterbi = Viterbi(string, alphabet, states, transition, emission)
# write output to file
with open(filename+".out", "w") as ofile:
    print(viterbi.implement_viterbi(), file=ofile)

Enter Filename: rosalind_ba10c.txt


# Problem 22

In [5]:
class StringProb:
    """
    Parameters:
        string: Outcome string
        alphabet: String containing letters
        states: States
        transition: Transition matrix
        emission: Emission matrix
        
    Outputs:
        Probability of emitting given string
    """
    
    def __init__(self, string, alphabet, states, transition, emission):
        # initialize class with parameters
        self.string = string
        self.alphabet = alphabet
        self.states = states
        self.transition = transition
        self.emission = emission
        
    def implement_forward(self):
        # find out value of length of states and string and initial_prob
        m, n = len(self.states), len(self.string)
        initial_prob = 1.0/m
        # initialize forward matrix.
        # mxn matrix
        scoreM = [[0] * n for i in range(m)]

        # fill the Forward matrix
        for i, x in enumerate(self.string):
            for j, state1 in enumerate(self.states):
                if i == 0:
                    scoreM[j][i] = initial_prob * self.emission[state1][x]
                else:
                    scoreM[j][i] = self.emission[state1][x] * sum(scoreM[k][i-1] * self.transition[state2][state1] for k, state2 in enumerate(self.states))

        # return probability
        return sum(scoreM[i][-1] for i in range(m))

# input filename and read it
filename = input("Enter Filename: ")
with open(filename, "r") as f:
    # read input file
    string = f.readline().strip()
    unwanted = f.readline().strip()
    alphabet = f.readline().strip().split()
    unwanted = f.readline().strip()
    states = f.readline().strip().split()
    lines = f.readlines()[2:]
    tr_temp, emit_temp = lines[:len(states)], lines[len(states)+2:]
    
    distM, transition = defaultdict(), defaultdict(dict)
    for line in tr_temp:
        temp = line.strip().split()
        distM[temp[0]] = temp[1:]
    for state1, nums in distM.items():
        for state2, num in zip(states, nums):
            transition[state1][state2] = float(num)
            
    distM, emission = defaultdict(), defaultdict(dict)
    for line in emit_temp:
        temp = line.strip().split()
        distM[temp[0]] = temp[1:]
    for state, nums in distM.items():
        for letter, num in zip(alphabet, nums):
            emission[state][letter] = float(num)
            
# create object of class
forward = StringProb(string, alphabet, states, transition, emission)
# write output to file
with open(filename+".out", "w") as ofile:
    print(forward.implement_forward(), file=ofile)

Enter Filename: rosalind_ba10d.txt
