In [11]:
from math import log

# Define the log function to handle probabilities and avoid log(0)
L = lambda p: log(p) if p > 0 else float('-inf')

# States
S = ['E', '5', 'I']

# Transition Matrix
T = {
    'Start': {'E': L(1.0)},
    'E': {'E': L(0.9), '5': L(0.1), 'End': L(0.01)},  # Added transition from 'E' to 'End'
    '5': {'I': L(1.0)},
    'I': {'I': L(0.9), 'End': L(0.1)}
}

# Emission Matrix
E = {
    'E': {'A': L(.25), 'C': L(.25), 'G': L(.25), 'T': L(.25)},
    '5': {'A': L(.05), 'C': float('-inf'), 'G': L(.95), 'T': float('-inf')},
    'I': {'A': L(.4), 'C': L(.1), 'G': L(.1), 'T': L(.4)}
}

# Function to compute log probability of a given path and sequence
def get_log_prob_of_a_given_path(h, q):
    t = 0.0
    # Add the log probability of starting state
    t += T['Start'][h[0]]
    
    for i in range(len(q)):
        s, o = h[i], q[i]
        # Add the emission probability
        t += E[s].get(o, float('-inf'))
        
        # Add the transition probability for intermediate states
        if i < len(q) - 1:
            t += T[s].get(h[i + 1], float('-inf'))
        # Add the transition probability to 'End' state if it's the last state
        elif 'End' in T.get(s, {}):
            t += T[s]['End']
    return t

# Function to compute the most likely path using the Viterbi algorithm
def viterbi(q):
    v = [{}]
    h = {}
    
    # Initialize the start probabilities
    for s in S:
        v[0][s] = T['Start'].get(s, float('-inf')) + E[s].get(q[0], float('-inf'))
        h[s] = [s]
    
    # Recursively calculate probabilities for each position in the sequence
    for t in range(1, len(q)):
        v.append({})
        n = {}
        
        for c in S:
            m, p = max(
                ((v[t-1][x] + T[x].get(c, float('-inf')) + 
                  E[c].get(q[t], float('-inf')), x)
                 for x in S if x in T and c in T[x]),
                default=(float('-inf'), None)
            )
            v[t][c] = m
            if p:
                n[c] = h[p] + [c]
        h = n
    
    # Compute the final probabilities
    n = len(q) - 1
    f = {}
    for s in S:
        f[s] = v[n][s] + T.get(s, {}).get('End', float('-inf'))
    
    # Find the most likely final state
    b = max(f, key=f.get)
    return h[b], f[b]

# Input sequence and known path
sequence = "CTTCATGAGTAAAAAGACGTAATGCA"
known_path = "EEEEEEEEEEEEEEE5IIIIIIIIII"

# Calculate the log probability for the known path and sequence
log_prob_known_path = get_log_prob_of_a_given_path(known_path, sequence)

# Calculate the most likely path and its log probability using Viterbi
most_likely_path, log_prob_viterbi = viterbi(sequence)

# Output results
print(f"Example Sequence: {sequence}")
print(f"Example Path: {known_path}")
print(f"Log probability of given path: {log_prob_known_path:.2f}")
print(f"Best path from Viterbi: {''.join(most_likely_path)}")
print(f"Log probability of best path: {log_prob_viterbi:.2f}")

Example Sequence: CTTCATGAGTAAAAAGACGTAATGCA
Example Path: EEEEEEEEEEEEEEE5IIIIIIIIII
Log probability of given path: -42.58
Best path from Viterbi: EEEEEEEE5IIIIIIIIIIIIIIIII
Log probability of best path: -40.68
