In [1]:
from typing import List, Dict, Iterable, Tuple
import numpy as np

In [169]:
def GlobalAlignment_backtrack(match_reward: int, mismatch_penalty: int, indel_penalty: int, s: str, t: str) -> Tuple[int, str, str]:
# Initialize the score matrix with zeros and set up the backtrack matrix
    m = [[0] * (len(t) + 1) for _ in range(len(s) + 1)]
    backtrack = [[None] * (len(t) + 1) for _ in range(len(s) + 1)]
    
    # Initialize the first row and column of the score matrix
    for i in range(len(s) + 1):
        m[i][0] = i * -indel_penalty
    for j in range(len(t) + 1):
        m[0][j] = j * -indel_penalty
    
    # Fill the score matrix and the backtrack matrix
    for i in range(1, len(s) + 1):
        for j in range(1, len(t) + 1):
            if s[i - 1] == t[j - 1]:
                match_score = m[i - 1][j - 1] + match_reward
            else:
                match_score = m[i - 1][j - 1] - mismatch_penalty

            delete_score = m[i - 1][j] - indel_penalty
            insert_score = m[i][j - 1] - indel_penalty
            
            m[i][j] = max(match_score, delete_score, insert_score)
            if m[i][j] == delete_score:
                backtrack[i][j] = 'up'
            elif m[i][j] == insert_score:
                backtrack[i][j] = 'left'
            elif m[i][j] == match_score:
                backtrack[i][j] = 'diag'
            
    return m,backtrack

In [168]:
def middle_col_score(match_reward,mismatch_penalty,indel_penalty,v,w):
    mid = len(w)//2
    m = GlobalAlignment_backtrack(match_reward,mismatch_penalty,indel_penalty,v,w[:mid])[0]
    #print(m)
    mid_col_source = [row[-1] for row in m]
    del m
    m,backtrack = GlobalAlignment_backtrack(match_reward,mismatch_penalty,indel_penalty,v[::-1],w[mid:][::-1])
    mid_col_sink = [row[-1] for row in m][::-1]
    mid_backtrack = [row[-1] for row in backtrack][::-1]
    del m,backtrack
    score = [x+y for x,y in zip(mid_col_source,mid_col_sink)]
    #print('source:',mid_col_source,'sink:',mid_col_sink,'final mid score:',score,'backtrack',mid_backtrack)
    return score,mid_backtrack

In [167]:
def middle_edge(match_reward:int,mismatch_penalty:int,indel_penalty:int,v:str,w:str):
    scores,backtrack = middle_col_score(match_reward,mismatch_penalty,indel_penalty,v,w)
    max_middle = max(range(len(scores)), key=lambda i: scores[i])
    #print(scores[max_middle])
    return max_middle,backtrack[max_middle]

In [166]:
def alignment_score(match_reward,mismatch_penalty,indel_penalty,v,w):
    scores = middle_col_score(match_reward,mismatch_penalty,indel_penalty,v,w)[0]
    max_middle = max(range(len(scores)), key=lambda i: scores[i])
    return scores[max_middle]

In [173]:
def linear_space_alignment(match_reward:int,mismatch_penalty:int,indel_penalty:int,v:str,w:str,top:int,bottom:int,left:int,right:int):
    if left == right:
        return v[top:bottom], '-'*(bottom - top)
    elif top == bottom:
        return '-'*(right - left), w[left:right]
    else:
        mid_x = (left+right)//2
        mid_y,direction = middle_edge(match_reward,mismatch_penalty,indel_penalty,v[top:bottom],w[left:right])
        mid_y+=top
        left_v,left_w=linear_space_alignment(match_reward,mismatch_penalty,indel_penalty,v,w,top,mid_y,left,mid_x)
        if direction=='left':
            mid_v='-'
            mid_w=w[mid_x]
            mid_x+=1
        elif direction=='diag':
            mid_v=v[mid_y]
            mid_w = w[mid_x]
            mid_x+=1
            mid_y+=1
        elif direction=='up':
            mid_v=v[mid_y]
            mid_w='-'
            mid_y+=1
        else:
            mid_v=''
            mid_w=''
        right_v,right_w = linear_space_alignment(match_reward,mismatch_penalty,indel_penalty,v,w,mid_y,bottom,mid_x,right)
        align_v = left_v+mid_v+right_v
        align_w = left_w+mid_w+right_w
        return align_v,align_w

In [164]:
def linear_space_alignment_call(match_reward,mismatch_penalty,indel_penalty,v,w):
    align_v,align_w = linear_space_alignment(match_reward,mismatch_penalty,indel_penalty,v,w,0,len(v),0,len(w))
    score = alignment_score(match_reward,mismatch_penalty,indel_penalty,v,w)
    return score,align_v,align_w

In [187]:
linear_space_alignment_row(1,1,2,'GAGA','GAT',0,4,0,3)

('GAGA', 'GA-T')

In [183]:
middle_edge(1,1,2,'GAT','GAGA')

(2, 'left')

In [182]:
print(middle_row_score(1,1,2,'GAGA','GAT'))

middle_row_edge(1,1,2,'GAGA','GAT')


([-4, -3, -1, -4], ['diag', 'diag', 'up', None])


(2, 'up')

In [174]:
linear_space_alignment_call(1,1,5,'TCCTGTTCTGCTAGGGGTGGGGCTTCGGGAATACTTACCGGATTGCCTTTTAGTATACGCAAACTCACCACGATCGATGTCTGTAATAGTACAGCCAGTTTCACATCACTCTCTGGGAGCTTTGGATATCCCTCTCGTAACGATGCGACAGCTTACCTTGGAACCTGGGCGATATAGGGTCAAGCGTTATTGTGCTTAGGACATCTCCTACAGGTAACTATCAGTTCTCATTCGGTCCTATGCTCCCTGCAACCTGAAGCAAGTATGTAAGTCCACGGTCATTGGTGCCGCATGCACGGACGTGCTTAAGTTACCACGTTCTACTTTGTATGACCCAGGAGTGCAAGCCTACGGTCATTAGTATGGCACCGAGTGAGTAGTGTTGCTCTCCGTGAGCTTTCGATGACACTACACCTCGTGCGTGATTTCGTTGGCCCCTGGTCCGTACGGTACCCGTTAAACTATGGCCACTTTATGCGCTGCCGAAGTGGTCTCACTACGTGGTTTAGTTGCATGAGGGACAGGCAATTGGCTTCGTTAATAGGGAATGCATACTAGAGTGCTTCCTCGCCACGAGGCACATTCCAAGGGAGAGCCCAGTAGCGGACAGAATTGCTAATTCCTGCACGCTACGACTCTCGCGGCCTGGGATGCTTAGCTATCGGCCCGTGGACTGATTACTCGCTGCTTAGCGGCCAGCGGATAGTCTCCGCGCAGTAGACCGCGAGATCCAGGCAGTAGTTTGCATTAGAATTTTGCGTAAGGATTGGATTAAGAGCCGAATCATCCCATATCCCGGGTATCAGAACATGTTGCTCGCACGACGCATTCGCGCAAATTCTTGTCACCGAGGCCATGGGATGTTTCAAATTACCTTTCATGTCTCTGAAGCGGGATGCATTACATACGTTCCAAAAACACGACTTCCGACTCGCCGCGCCGCACCGCTAATGGGTGATGGTCAAGTCCGCCGAATGTAGTCCTCATGCGGCAACCCACACTACCCCGTGCATTCGTTCCACGTCCAATTCGGAACCCATCCTAAGGGTGGTAACCTCACCTGTGGTGTCCAGCGGCACGCCCAGAACCTGGGCACCAATATAAACTGCAGACCATATTAAGTCCCCTGTTCAGCCTGAACCTGATCTTGGTAGACTCGGAGGTCAACGGCTATGTTCAATGCGCTATCTTAGTCTCCTATGGCGCGTTGCTGGCCAATCTAGCGAAAAAGGGGTACGGGATGGCTGATATTTATGTGGGTCTTGGCAGGTGTGACCGAGGTTACCTCTCCGGATGTACAAGCAAATATCTGCTTGGAAAAGGCTTTAGAGCCGTTATCTATTGCTGTCCGCACCAGGGCGCGTTTCATAGCGGATGCAGTTGTTACGTCAACCACTGGCGTAACTTAGTGTTGGACCAATGCGCATTGAACATGTTCCCGGCCCAATATGACTTACCTCCCACGGAGTCTTCCAATTCAGGGTCCCTAGCTCCTACATTTCCGCGTAAGCCGGTGCAGGATCCGTACTTTTCCCGTGTGAGCCTATGACAACCATAAATCATCAACGCGCGGCAGTGTGTGGTAGTAGTTGAACTTGCGTGGCGATACCGGAAAAGGGAGCATGGTATAATACTACAGTACACCGAGTAGTTTTCAGAGATGCCTTAGATAAAACTCACACGATCCCCCGCGGACCGTGCTGGTCGTGGTGTCGTGTCTGTGGCAACAACTCTCGCGCTGTGCGCGGGTTGCTAGGTTACATCCTCCAGTGGGGTGGTGTAGTCGTTTGCTCGCTCAGT','TCCCATTCTGCTATAGCTTACTTACCGGACTGGCTTTTAGACAGACATCACGAGCCTGTAATAGTTTCACATCACGCTCAGGGAGAGTCGACATTTTATATCCCTCTCATGACAATGCAACAGCTGGAGCTGCAGAACACCTTCTGCGATCTGGGCGATATAGGGTCAAGCGTTATTGTGGACATCTACCTACAGGTAACCATCTTCTATGATCCCTGCAACCGCAAGTAGGAATGGTCGTATCAACTCGTCCACGGTCATTGGGCCTCATGCTAGCGTAAGTTACGAGGTACGTTCTGACTAAACGCCAACTACGGCAGGGAGTACATGCCTACATTCTTCTTAGTATGGCACCGAGGGAGTCGTTTTGAGCCTTCGATGACAACCTCGTGCGTGGATTCGCTAGACGACACAGGCGTGTGGTCCGTACGGTACCCGTTAAAACTTGTAAACAATAACTATGCGCTGCCGAAGTAGTTTGCCACTACGTGGATTAGTAGCATCAGGCAATTTCAAAGGCTTCGTTAATGCATACTATAGTGCTTCCTCGACGCACGAGGCACAAGGGGTAGCGGTCAGAATTGATCTAATTCCTGCACGCTTCGGCTGCCTGGGATGCGAATCACGCTCATGCACAAGGGAGACGGCCCGTGGACTGGTGCACTGGCTCACTGAGGATAGACAGTTTAGCAGTAGACAGCTAGATCCAGGCAGTACATTAGAATTTTGCGTAAGGATTTTATTATAATAATCCCATATCCATCAGAATATCTTGCTCGCCAACTTGTCGGACGCATGCGCTCAAGTTCTTGTCACCAAAGGTCCACGGTCATCTACGTAATGGATCGTGAGTGTTTCAAATTACCGCAGGTCTTGAAGCGGGATGCTTATCATACGGGCGAAAAACATAACACGACCTAACGACTCGCCGCGCCGCACCGGCTACTGGTCAAGTCCGCCGAATGTAGTCCTCATGCGGCAACACTACTCGCTGCATTCGTTCATTAAGTACGCCGGCCTTCAATTCGGGCCAGAACCACCTAAACTACGGGTGGTTACCTCACCTGTGGTGTCCATCGTTCGCGTCACGCGATACGCACCAATATAAACGGCAGACCCCAACTTATTAAGTCCCCTGTTCAGTGAACTTGATCTTGGTAGACTCGGAGGTCAACGACTTTTGCGCTATCCAATTAATTTAGTCTCCCATGGCGCCAATCTAGCGCAAAAGGGGTACGAGACGGCTGTATTTCTATCCAGGGAGACCGAAATTGGCGCACCGACTCTCCGGATGTGCGCGGCTCAAGCAAATACATAGATTGGAAAGGAGGCTTCAAGCACTAGACGCCGTTATCCTCTATTCCAATTCTACTGTCCGCACCAGGGCGCAAACTCTTTTGATACGCCAGCGGATGCAGGAGTGTATTGTTACGTCAACCACTTAGTGTTGGAGTGAACAAGTTGCATTGTGTGTAAATCCCGGCCCTTGTCGACGCCGAACCTACGGAATACAGGGTCACTAGCTCCTACATGTAATAGGTAACCATCCGGGGGTGCAGCATCCGTACTTTTCCATAACAACTCATCAGATCACGGCAGTGTGTGGTAGTAGTTGACGCCTTGCCCGGATTCTTACGCTGTGCCGTCCTGCCGCGCAATACTACAGTACAAGCCCAGTATTTTTCAGAGTTGCCTTAGATAAATGCTGCGACATTAACACCAAGCTCCCCCCCCAGACCGTGTTGGTCGTCTCTGAGTGTGATTTACGTGCTGAAGCAAGAATTACTACTCTCGCTGTTCGCGGTTGAGTCGTATCCAGTGGGTTGGTGTAGTCGGAGTGGTGCCTCGCTCAGATTTT')


(-310,
 'TCCTGTTCTGCTAGGGGTGGGGCTTCGGGAATACTTACCGGATTGCCTTTTAGTATACGCAAACTCACCACGATCGATGTCTGTAATAGTACAGCCAGTTTCACATCACTCTCTGGGAGCTTTGGATATCCCTCTCGTAACGATGCGACAGCTTACCTTGGAACCTGGGCGATATAGGGTCAAGCGTTATTGTGCTTAGGACATCTCCTACAGGTAACTATCAGTTCTCATTCGGTCCTATGCTCCCTGCAACCTGAAGCAAGTATGTAAGTCCACGGTCATTGGTGCCGCATGC-A-CGGACGTGCTTAAGTTACCACGTTCT-ACTTTGTATGAC-CCA-GGAGTGCAAGCCTACGGTC--ATTAGTATGGCACCGAGTGAGTAGTGTTGCTCTCCGTGAGCTTTCGATGACACTACACCTCGTGCGTGATTTCGTTGGCCCCTGGTCCGTACGGTACCCGTT-AAAC-TATGGCCACT--TTATGCGCTGCCGAAGTGGTCT--CACTACGTGGTTTAGTTGCATGAGGGACAGGCAATTGGCTTCGTTAATAGGGAATGCATACTAGAGTGCTTCCTCGCCACGAGGCACATTCCAAGGGAGAGCCCAGTAGCGGACAGAATTGCTAATTCCTGCACGCTACGACTC-TCGCGGCCTGGGATGCTTAGCTATCGGCCCGTGGACTGAT-TACTCGCT-GCTTAGCGGCCAGCGGATAGTCTCCGCGCAGTAGACCGCGAGATCCAGGCAGTAGTTTGCATTAGAATTTT-GCGTAAGGATTGGATTAAGAGCCGAATCATCCCATATCCCGGGTATCAGA-ACATG-TTGCTCG--CACGAC-GCATTCGCGCAAATTCTTGTCACCGAGGCCATGGGA-TGTTTCAAATTACCTTTCATGTCTCTGAAGCGGGATGCATTACATACGTTCCAAAAACACGAC-TTCCGACTCGCCGCGCCGCACCGCTAATGGGTGATGGTCAAGT