In [1]:
import numpy as np
#from numba import jit
import traceback

sequence = input().split() # seq1 seq2 match mismatch gap


# Encode bases as numbers and vice versa
bases_to_numbers = {'_':0, 'A':1, 'T':2, 'G':3, 'C':4, 'U':5}
numbers_to_bases = {0:'_', 1:'A', 2:'T', 3:'G', 4:'C', 5:'U'}

match = float(sequence[2]) # match score
mu = float(sequence[3])    # mismatch penalty
delta = float(sequence[4]) # gap penalty

# Transform sequence into numeric form
def btn(s):
    return np.array(list(map(lambda x: bases_to_numbers[x], list(s))))

def ntb(s):
    return np.array(list(map(lambda x: numbers_to_bases[x], list(s))))

weight_matrix_standard = np.array([[-1, delta, delta, delta, delta, delta],   #    gap A T G C U
                                  [delta, match, mu, mu, mu, mu],             # gap
                                  [delta, mu, match, mu, mu, mu],             # A 
                                  [delta, mu, mu, match, mu, mu],             # T
                                  [delta, mu, mu, mu, match, mu],             # G
                                  [delta, mu, mu, mu, mu, match]])            # C
                                                                              # U
#---------------------------------------------------------------------------------------------------------------
# Get optimal alignment from optimal path
#@jit(nopython=True)
def path_to_alignment(seq1, seq2, path):
    try:
        seqA = list(reversed(list(seq1[path[-1][0]:path[0][0]]))) if len(path) > 1 else [seq1[path[0][0]]] # We move from the beginning of the sequence to the end
        assert len(seqA) > 0
    except BaseException:
        return traceback.format_exc()
    
    try:
        seqB = list(reversed(list(seq2[path[-1][1]:path[0][1]]))) if len(path) > 1 else [seq2[path[0][1]]] # and .pop() gives us the last element, so we reverse the order
        assert len(seqB) > 0
    except BaseException:
        return traceback.format_exc()

    grenzenA = (path[0][0], path[-1][0])
    grenzenB = (path[0][1], path[-1][1])
    path = path[::-1]       # path is generated from the end to the beginning, so we reverse it too
    aligned_seq1, aligned_seq2  = [], []
    
    try:
        leftA = ''.join(ntb(seq1[0:grenzenA[1]])).lower()
        rightA = ''.join(ntb(seq1[grenzenA[0]::])).lower()
        leftB = ''.join(ntb(seq2[0:grenzenB[1]])).lower()
        rightB = ''.join(ntb(seq2[grenzenB[0]::])).lower()
    except BaseException:
        return 'FUCK', 'YOMAMA'
    
    def diff(i): # displacement vector at each step. Tells us what pair of symbols to choose
        return np.array(path[i+1]) - np.array(path[i])
    
    diff_path = list(map(diff, range(len(path)-1))) # rewrite the path as a sequence of displacements
    
    try:
        for i in range(len(diff_path)): # if (1, 1), append two nucleotides, 
                                        # if (0, 1) or (1, 0), append gap+nucl or vice versa
            A = numbers_to_bases[seqA.pop()] if diff_path[i][0] == 1 and seqA else '_' 
            B = numbers_to_bases[seqB.pop()] if diff_path[i][1] == 1 and seqB else '_'
            aligned_seq1.append(A)
            aligned_seq2.append(B)
            
    except BaseException as exep:
        return traceback.format_exc()
    
    return leftA + ''.join(aligned_seq1) + rightA, leftB + ''.join(aligned_seq2) + rightB

#------------------------------------------------------------------------------------------------------------

#@jit(nopython=True)
def smith_watermann(seq1, seq2, weight_matrix):
    len1, len2 = len(seq1), len(seq2)
    
    paths = {} # For each element of the path scores matrix we will determine and remember 
               # the optimal precedent element
               # at the same time as we determine the value of the element itslef. paths{} is a
               # dictionary of links: path : (i, j) -> optimal_precedent((i, j))
                
    current_max = (0, (0, 0)) # current maximal value in the matrix and its position 
    path_scores = np.zeros([len1+1, len2+1]) #Path scores matrix initialization
    path_scores[0, 0] = 0
    path_scores[1:, 0] = [max(delta*i, 0) for i in range(1, len1+1)]
    path_scores[0, 1:] = [max(delta*j, 0) for j in range(1, len2+1)]
    
#-----------------------------------------------------------------------------------------------------------
    
    for i in range(1, len1+1): # Optimal precedent elements on the boundary are determined uniquely
                               # double-check if boundary elements are not zero
                               # we only need links between nonzero elements
        if path_scores[(i, 0)] != 0:
            paths[(i, 0)] = (i-1, 0)
            
    for j in range(1, len2+1):
        if path_scores[(0, j)] != 0:
            paths[(0, j)] = (0, j-1)
    
#-----------------------------------------------------------------------------------------------------------
    
    for i in range(1, len1+1):
        for j in range(1, len2+1):
            prev_pos = ((i-1, j-1), (i, j-1), (i-1, j)) # Possible precedent elements
            # Scores of paths that come from the set of optimal precedent elements
            prev_scores = (path_scores[i-1, j-1] + weight_matrix[seq1[i-1], seq2[j-1]],
                                    path_scores[i, j-1] + delta, 
                                    path_scores[i-1, j] + delta)
                                    
            path_scores[i, j] = max(*prev_scores, 0)
            current_max = max(current_max, (path_scores[i, j], (i, j)), key = lambda x: x[0])
            # Add the link to the optimal precedent element into paths{}
            
            if path_scores[i, j] != 0: # if the score of the current element is not zero 
                                       # add the link to the optimal precedent element into paths{}
                    paths[(i, j)] = max(zip(prev_pos, prev_scores), key = lambda x: x[1])[0]
                    
            # So paths{} only contains links to nonzero elements that start at nonzero elements
        
    optimal_path = [current_max[1]] # optimal path must contain the element with the highest score
    optimal_score = current_max[0]  # the highest score in the table is the score of the alignment
    
    while optimal_path[-1] in paths: # Follow the links to the optimal elements backwards 
                                     # until we reach the element (0, 0)
        optimal_path.append(paths[optimal_path[-1]])
    
    return seq1, seq2, path_scores, optimal_path,\
            path_to_alignment(seq1, seq2, optimal_path), int(optimal_score)

#-----------------------------------------------------------------------------------------------------------

print(smith_watermann(btn(sequence[0]), btn(sequence[1]), weight_matrix_standard)[5],\
      *smith_watermann(btn(sequence[0]), btn(sequence[1]), weight_matrix_standard)[4])

fff ff


IndexError: list index out of range

In [30]:
try:
    assert 1 == 0
except AssertionError:
    print("AAA")

AAA


In [10]:
def path_to_alignment_2(seq1, seq2, path):
    seq1_aligned = ntb([seq1[i-1] for i in [P[0] for P in path]])
    seq2_aligned = ntb([seq2[j-1] for j in [P[1] for P in path]])
    return seq1_aligned, seq2_aligned

In [43]:
a = np.array([1])
a[0:0]


array([], dtype=int32)

4 tgttacTTA_CGt ggttgacTTGACTg


In [90]:
a = [1, 2, 3, 4, 5] 
a[3:0:-1]
c = 'mama'
d = 'papa'
c + d

'mamapapa'