In [212]:
from random import randint
from collections import Counter
from numpy import unravel_index, zeros
import csv
import numpy as np
from pprint import pprint

In [None]:
def global_alignment(v, w, scoring_matrix, sigma):
    S = [[0 for repeat_j in range(len(w)+1)] for repeat_i in range(len(v)+1)]
    backtrack = [[0 for repeat_j in range(len(w)+1)] for repeat_i in range(len(v)+1)]
    for i in range(1, len(v)+1):
        S[i][0] = -i*sigma
    for j in range(1, len(w)+1):
        S[0][j] = -j*sigma
    for i in range(1, len(v)+1):
        for j in range(1, len(w)+1):
            scores = [S[i-1][j] - sigma, S[i][j-1] - sigma, S[i-1][j-1] + scoring_matrix[v[i-1], w[j-1]]]
            S[i][j] = max(scores)
            backtrack[i][j] = scores.index(S[i][j])
    insert_indel = lambda word, i: word[:i] + '-' + word[i:]
    v_aligned, w_aligned = v, w
    i, j = len(v), len(w)
    max_score = str(S[i][j])
    while i*j != 0:
        if backtrack[i][j] == 0:
            i -= 1
            w_aligned = insert_indel(w_aligned, j)
        elif backtrack[i][j] == 1:
            j -= 1
            v_aligned = insert_indel(v_aligned, i)
        else:
            i -= 1
            j -= 1
    for repeat in range(i):
        w_aligned = insert_indel(w_aligned, 0)
    for repeat in range(j):
        v_aligned = insert_indel(v_aligned, 0)
    return max_score, v_aligned, w_aligned


In [2]:

def local_alignment(v, w, scoring_matrix, sigma):
    S = zeros((len(v)+1, len(w)+1), dtype=int)
    backtrack = zeros((len(v)+1, len(w)+1), dtype=int)
    for i in range(1, len(v)+1):
        for j in range(1, len(w)+1):
            scores = [S[i-1][j] - sigma, S[i][j-1] - sigma, S[i-1][j-1] + scoring_matrix[v[i-1], w[j-1]], 0]
            S[i][j] = max(scores)
            backtrack[i][j] = scores.index(S[i][j])
    insert_indel = lambda word, i: word[:i] + '-' + word[i:]
    i,j = unravel_index(S.argmax(), S.shape)
    max_score = str(S[i][j])
    v_aligned, w_aligned = v[:i], w[:j]
    while backtrack[i][j] != 3 and i*j != 0:
        if backtrack[i][j] == 0:
            i -= 1
            w_aligned = insert_indel(w_aligned, j)
        elif backtrack[i][j] == 1:
            j -= 1
            v_aligned = insert_indel(v_aligned, i)
        elif backtrack[i][j] == 2:
            i -= 1
            j -= 1
    v_aligned = v_aligned[i:]
    w_aligned = w_aligned[j:]

    return max_score, v_aligned, w_aligned


scoring_matrix = {}
with open('BLOSUM62.txt') as input_data:
    items = [line.strip().split() for line in input_data.readlines()]
    scoring_matrix = {(item[0], item[1]):int(item[2]) for item in items}

scoring_matrix


{('A', 'A'): 4,
 ('A', 'C'): 0,
 ('A', 'D'): -2,
 ('A', 'E'): -1,
 ('A', 'F'): -2,
 ('A', 'G'): 0,
 ('A', 'H'): -2,
 ('A', 'I'): -1,
 ('A', 'K'): -1,
 ('A', 'L'): -1,
 ('A', 'M'): -1,
 ('A', 'N'): -2,
 ('A', 'P'): -1,
 ('A', 'Q'): -1,
 ('A', 'R'): -1,
 ('A', 'S'): 1,
 ('A', 'T'): 0,
 ('A', 'V'): 0,
 ('A', 'W'): -3,
 ('A', 'Y'): -2,
 ('C', 'A'): 0,
 ('C', 'C'): 9,
 ('C', 'D'): -3,
 ('C', 'E'): -4,
 ('C', 'F'): -2,
 ('C', 'G'): -3,
 ('C', 'H'): -3,
 ('C', 'I'): -1,
 ('C', 'K'): -3,
 ('C', 'L'): -1,
 ('C', 'M'): -1,
 ('C', 'N'): -3,
 ('C', 'P'): -3,
 ('C', 'Q'): -3,
 ('C', 'R'): -3,
 ('C', 'S'): -1,
 ('C', 'T'): -1,
 ('C', 'V'): -1,
 ('C', 'W'): -2,
 ('C', 'Y'): -2,
 ('D', 'A'): -2,
 ('D', 'C'): -3,
 ('D', 'D'): 6,
 ('D', 'E'): 2,
 ('D', 'F'): -3,
 ('D', 'G'): -1,
 ('D', 'H'): -1,
 ('D', 'I'): -3,
 ('D', 'K'): -1,
 ('D', 'L'): -4,
 ('D', 'M'): -3,
 ('D', 'N'): 1,
 ('D', 'P'): -1,
 ('D', 'Q'): 0,
 ('D', 'R'): -2,
 ('D', 'S'): 0,
 ('D', 'T'): -1,
 ('D', 'V'): -3,
 ('D', 'W'): -4,
 ('D', 'Y'

In [3]:
word1 = 'RPEVEEADNSFSVQHAVERHRFFPAHNIAAKSHSQKSSEEVYITQWMTCGDRCMNHLLKTGPIGDKDGATMSSSRWHVQSRAKPTWKVWAMEYRCMEHFIDRKEFAKMHLYKITIWTMQHMHECPGLIRPHHNIFLWTERQENQWQAMYICDMHSMYRARLDMHCGHEQCNGNYEHLFALDKLVWRLKGQVMEPPIWAMKHQTKSHPVGVMHAINNCLEMDFLVRQARFLRIIVTLCGQGSWFRDFGAPFMSYRKEEMCFIKNYNSHVDEIFYRGWREHWFTGDPHTHEDQWQFIRWCHPRPTYPKNYRPKYFGYWGKFDGRLSFLRQWVHIPYHWGSVDIAKRAWPKNWHFTYLYGFYNEMPELYDSPVGCIRQCQKTGPTHLRQRLISHFSHVCVHIINQRMFLSPPMIASYVTLFATKNQGPMYQTRKFAHACTACHYFYFMNAIVYPQKNFRWIMEGVDIHCVSIYWVHGMCHCQWLAIHKIYWTHINQVTEDEMYVFQCGYCSEGHRTCVLTIVFFFELCPYEYQFPWKYSCCHHSMLGVHKWEIDVQQQETDAMETNVGICAYIIHSAMALYAWCICMDYGYLPGMYRVKTTPWCIRHPFDDTLFRAGCDWCIGILYRKTGSWSEPGKLQGCWRHQIGVTIVWALGWMEDEFRNTGDIAEMDGRFNMCEPIENNGQYICTSVSSYPCYPFMKDYIRQWSLGPQMCLWFMNAKRWTWQLETQTDVMAMAEIQMCIICAHWFGWEWYCHCVPHLPMMGCEKPIRSPHEGQHQSCKAADHGQWSSSTSRENQIHMHDDLYGFVNIYYRRTFGAISCALFRCVLANFTDEMKVSCSWTGCWRVDIRAVFTVMHVCMPDQFEGFYCALDWDVCMDHDCEWNWQTAYAYKVEQKAITGNNMEANSWPYKRQIAAYNIYNYDGAIWKFWIEFQYFSDMQYGQGTNRWENIWLTTETYQEHLYNYGKRYCKQETIRQPFPRQNFPNDSQIWRRMHGCTLTKYRIDYEIQANMCLWVWGCKIDPFDCFMGTTVGIGYLCMKAGWSGSVHCMYWTRDWVCMHIKPYEMNWMFAWNYVNELKTKATFITNTYCSFWFLSFNSSCDGWGTPRWIFAREQVWGVYQIRTIFADDSFGMTVYTPFEKFTRWKFMEHQGNHQIYPDTQGGPPGKPASWPMVFQGHKTLIHCQKRRMAQTRVMRVGSGQQINAHIHWFCSWIWYTKKWWPYVSSPRTEKTAVGQQPRVYLYMPDNAMGSVHVQDWHLVRDDDRPFNCGSLEDLRQTSRNLIHQIGSTEGFHIKQSLQVQWEPWQKLEGDQMSCGCIPDRWNGECMGSNHQLVVQEMSMMTSRIYGTRPWYGLFFFLFRGEREPYVTTDWGDFARRLHMERFCLGCVTRFWDFLCVYARLAEEKIWPICGFKAKFTQIQPHRGMNFNVFRVHFKNVKTHFHNDKIQQRNAPADPTRVPCFMEGFIISAVHCMEKHSTKPGMSYDEPKADVNMEHKWVFIWPSDQCINLKWIPQIDPFGGYGYEGGTKCETEQFYMVQRYRWNNWSHERCMVYEMEQNGTLNPSGKTWEFQTININWCNKIPHFADFIYAFFWWKMTFSHCLTKVKGLCCTVPYHYMKLSNAYSPASAHDKAINNCSALRNHTTKSRDNGFIIKQTEQETWSFGRYDGIHGHPTHRLRFLQIHDTWEEGGHSMNELLGYDEQIYMRMQAQVIQEGQPCCRAYLDIFAWMAVDENMYFWNHKNCFFLHGDAILLVQWWHEARCLRNNYPFYPNNNGAFPCRYYDFEDLYKGWYQRAFQYPKVVHVEGKEAATFECTGMQDAKEHKKDDYPCHDIITKYFPHYDSRKPVGKCTYIDKFSERTHHMPQRTIMGDHCCEQCYPHSRNYFKLEKNCNMFAYWCWPMCEYRDCKRRPINWFIIQQTEDRFNQQPFEIERKLIFEHNNGTFQPLQKRHIDMMMHAEYCARFNRYWEMSISHGGGWNNCPAFQHSVYTPEPGWDFNQEYECEDLMLQRPCMEITVAYPQFFYKSHKDYNTMQEYEFKLTIFYSAYNTQEGRWATMFYNSRAVGPKKGMCGGRDPFACPGQDNLEPYVDYGHWVVMTGMFMPTTDIRESVAITQPRTTDTHNQCHESNKYWSSMDHAFHLFNLETFPHGDACVFGEDPAEVADNGGRHNVFYKHCQTERWGEHYNPAMVGIKVPYYFWNHERFLKSAGIEWHDSFYKPGQRKWQVHSKATMPDVDWIDADTWVMLLQEDCEEYPSRFSEGMIRYSGPKNTHPAAIRWCHWLACFQEIWETGCGCISHVKTCMVWQTRFEMMQTGRARNQEVSQKGGGMLFHIRDLKSIHVGNNMGGIPTLARFSYQWSMLYWVDYMWCDKHMMCILTPACVDFKMVSHSTFDFWQAGTFGNQWYKCLMDMEFDKAWSTIAPINLNSCTEQTLCETPFSVMHKATFTNLSDCPFGMIMRFEMKEEAGRGAEHAMLVEGFAVAISHTAFIWEMHQMYDTAVQHMGRAAVQSMNVECPDKYNNDFLCLFHHAGKKTARTFIIEQWGHWPFFMADNHSMLFPYCCTKDPTGLTDHQCMECRSRPCTYVFTRKFKLKHKRWGGMASQPLQVPPLDNKQCSRECEKGFDYDTTFVPSSERMSMYKFPTKSERKGWYAHCLVWERLMHTRREYHQRIGWDEITRSDWAGFVMNGWVTIFNSPCMWELWFQFNLYDQQCPCYIIENDIEEQRQHWRWRSQYVEGQHLVWRDCMLCGIVNNTNVYSDMNQDPHCPTQQVVTDFYWKSILWYSSIAECLGSHDSIGQFASKRFICNPQLYLGNMKSVMSNCMAPYKCVPVFYDYQHNPMDITLKTEFMGQGDFHYGWEWQHHACSVVEGRMWGSREGQTPGPGCDKTYMGDTAFIPTCWWVVHHFQYEHTKDPLHFAKEYVGVVFFYVIFNCCYNLEYDWHVQHQTSDWMTWEFLRKKMAYGVNWQCLAVREQTACADDIFFSKWNSLGHENWHFLFRVIHMQAVIRYCPLVFDAVWLPVDAPQTFEARAGKCWMWLVSVFASALCNSWGVWDGQNIPFMCNWCPRDNWGFQMAFYCVWWESYTFWNTNQNTMLWQSEWVYQNTIPQGTACRDMKMETTISFERFPAMAKHFNNGNMLANNCWPAAYREHKYKMPHEKFTWICYEALCTTNACYHWVAGSQKCDMGFRISCWNYAKRWQDDLSYRFSHNGDHQQYRLLCVDLVIMTPWQRVDQHKYPWAIWPIEYPNDYIPYRFFPERVLEARQFGISPSLQGFECWQESDAWYMRRYQFFMQIICINGVHMNISGFCHSQGISFPWNYKNDQLVHMHRYIHFPYMTVHMIKCAWNKTGVRPSFQGTAREMWFDTVPIARGNAEIITYSIPTMLKHPVDDRNEVHPNDEEAPLQYWAIADINAMQGEMTDVFAFPYHGLGWAYPDQFVLIYWVWDIRHMHLGRMYHEVVFCAKQTVVSIMRCEMGNDIFMRFYWCRGTYIQHVCFPVKTQSFKDCEISASGRYVHREDHKCFCISCRAKPMHPVIVYHKNIMSQCLIIPQSKWKNTPTPCYIDVREVQRTPPTRSTELNTQYDVYESPHADKHYDYVAFQVMEVCWMSQPYIHEKYFVEEVNFPTKWMSHRHGNTRWPFWHAFHSMTAMRDYGDWMMLTAIMWGYRKIDQFPRAARNRLLRPPAVFFRK'
word2 = 'RHYNLEVEIADNSFSPPLYYVEVQHAVERHRFFPAHAAKSHSQKSSEEVYITYWMTGYMQWGDRRKMNHLLKTDITNVPYIFGATMSASHWHVQSRRKPMEYRQIDRKEFKADWKPKKMHLYKITMWTMQHMYGKDDPSVEDKWGHMKNISCHHNIFLWTERQENQWFCKVLCMYNCVMHMYLDMHCGEQCNGNYEHLPALDKLVWRLKGQMEPSIWAMKHQMKSHPVGHMHANNCLEMDFLVTDYISYQAYEFLRITLGQGEVNDYLWFFMTDGDAPFPVAAVGQYRKEEMCQIKNYNVDEIFQRSKFHWFTGRWWTKGYHNYAPKYFYVLVIMRQYWGMFDGFLSFLRFWVHIRTHWGSVDIDQEEYRNKRAWPKNWHFALSVYLYGFPELYDSPAIFALLNVTQNVSQCHQYYVQWPKYNNKSHFTHVTVKVKWKPEKVFATNNQGPMYHKKRAHACTACHHSPDAQTQFFMNAIVYPQKNFRWIICWVFCMCRFYNCTLQMQWLAIEKIYWTHINCVNEDFQCGYCQMFNTIIDEGSRTCVLTYNNWCEIQFPWKYSCCHLMKVYETVSMLGVHKWEIDVQQQEGETNVGICAYIIHSARALYGYLPGWCEDQMKTTPWCIRHPFDDTLARACIICINILYRKTGSWSHLRYKQGKLQGCWRHQIGETIVWALGWYFRLTVMEDEFRNTGDIAEMDGRFNLDCEPIENNGQYIVSSAGEWTRLCPCYPFAFIDCVREDYQMCLWFMNAKRWTWQLETMAQFWFGWEWQCHCVPHLRFCQHVLHMQEQIYGCDRSPHEGQSCKAADHIQYQNQWDSSTSRENQIVNIYYRCSQTFISCAKYSQHFTFRCVLAIFTDEMQYFFLTNRVSCSWTGCWRVDIRAVFVCMPDQFEGRSAVSWYCALDWDVCMDHDCEWNWQTAYAYRVEQKAQTGNNMEANSWPYKPVCHQYMFQIAAKNIYGQHTIWGHPLFFLVFLDMVWENIHGEHLYNYGKRYMEKQHTRRQPFPRQNFPNPSQIWRRMHGMTLKKYLIDNEIQANDWFLEYPCKGHTKIDPFACFCGSNLHDRCTVGIGYKHCMKAGWSGSVHCMYYTATMPNWETMNWMFKHSWNTVNFMMELTGNRNRSKNAKHITNTYCSFWFLSWGCPREFVAHKHQIFAREQVVYYCDSGTGMTVYTPFEKYTRWKFMEHKGNHQIHALGDYSWYFKFQCENEKPPSAARNPLTELKRFPKMCWQGHKTLIHCQWFSWSRRMASTRVGYYQQIDAAWRNLAIEWIWYTKGMRLIYEWINMNEDMKPYVICKYKNFWVGQQPRVFLYNNNSGKPDEAMGSSNFNVHVQDWHLFRDDDRYFNCGSLMIKHAQQANRQTSRNLIHEGFHIKQCTPTKKRWHEEIYWKLEGVEQGCQMSCGHNPDRWNGECMGHQKIENMSMKVAENRYGLFFMNCIVDKQKYAHQGEREPYVTTRLTQKWCLFCVTRFWDFLPYRLADQEKIWPICGFKAKFTQASWLKFRHLNIQAPVKQYIFRVIFKNTRGTTQKLMTFDDKIQQRYAPCISAVLCMEKHSTFHSQCYPPGMSYDEHKAFWHKWVFIGDQCINLKWIPQIDPFGEGGTKCETMQFYMPQRRKKFQANQTRNWHLFCINSAGAWRCRGCERCPKANDRQNGTLNPSGKTMEFQTININWCNKIANAYFFWWQLTKVNHEGLCCIVPYHYMRMTFFAWAFDCASAEMVRDKAINNCIAMHGFIIKQVTTECETWSFGDGIHLKFLQIHDTWEEGGHNMWIVCDSIYMVGCRMKNQAFVGRMVWQQEGVNGGQPCCRAYLDIFMAVNENMYFWCGSVHKNCVQQWAEKQARLWHEACLRNNTSFGDIDFEMMNNGAFPCRYYDFEDLYAFQAPKVVHVEGKEAATGPQVHCTDTRHMQDAKEHKKDDYPCHDIITKYFPVYDSRKPVWIDKFSERKHHMLQRTIMGEQWYPRSRNYLKLEKNCNPPDFAYWCWPMCEYRDCKRCQQMKCSLEPIFQTEDRFNRQPRKNNGTQQPLQKMHIDFQTVTGIVMMHMLPDAEYCARFNRMSFAPGHWEMSISSMCFQHGYMPVQAEYGGWNNCQHSVYTWDEPGWDFNQEYECEDLMLLQAQQIIHVSASMPIHYPQRYYKSPKRYNTQRHLDDFVQEYKHNLTIFYSAYNTQEGRWAMMFYSRAVRDPWAVPGQDNKEPYVDYGHWVVMTIVWITQPRTTDFHNQCIESNEYWSSMDHAFHLFNLETFPHGDACKFEDPTEVAGLIIVPNGPTRESHQMTHSQNHDSYQSYYWGVMPYYFWNPRERELKSADTINNPGQRKQVHSKATMFYANDADTWVMLCQEDCEEYMYIWAHVGASRFSEGMIRMSSSGPKNTHPAAILFFLGPHWCHWLACFPNLITMHVIWECGMGCISHVKTIMVWITRFEFMQTGRARNQGVGGGMLFHIRDLKSIHVGNMNLGSYPEFMGGIPVLARFSPAESYVPSARWCDKHMMLTPVSHSMFDFWTCSVCAGTFGNQWEEKCLMDMEFDKAWSTIAPINLSVMHKATFTSLSDCPFGMFMRFEMKEEAGRGAEHAMLVEGCAVLDFFLDYSHTAFILLRVMHQMYDRAAVQSMNVAAENNDFLCLKTAYTFIIEQWGDNHCMLFPYCCTKDPLFLTDSQCMECRSRWCTYVFTYKFKMGFNDDIYEVQPLQVPNSMMPHCRMEYNKQCSRECEKMQYDGPGMHTFVPSWKSERMSMVFYFDIVEQKGWYAHDTASHGCWRWERLMHTRMFAHFQEYHQRIGWCEITRSWAGWVTIFNSKCPPELWFQFSIFRLYMPNDIEEQRQNAYDVRWRSQYVWRDCMLCGIVNNTQLVPGLMMYWKWILWYSSIADCLMASHDMIGQFASKRFICNPLGNMKYKCVPVFITTKTEFMGQGDFHAGWEWQHMWGSREGQTMICSEGSLRVDCNVAKLPGCDKTYMGDGHAWIKAFIPTLWWVPKYEHTKDALHFAKEYVGVVFQYSIFNRCYNLSYDWHVQHQTCEPQWGVNWKCLAVREQTACADDIFFSKWNSLGHENWHFLFFVIHMQAVVYHIMVRYMCLMGKPLVLYQPVSDAVFMSMVPLIVFAPQTFEAYAGKCPMNYEGHMCKGGCDVAKAFCNSWGVWDGWQNIPFMCWWCPFQMAFYCVWWESYWETNQNTVLWQSEWVDHVRQNTIPQGTACRDKMETTISFERFPAMKHDVCAQKWFNNGNMLVNNCWPAAHREHTYKMPFTTHTHCFGAALCTTNAKSIPQNWKCDMGFRISCWNYLSYRFHKTHADLVIMTPWQRVDQHKYPWARWCPYYFFPERVPEARQFGFSPEPQSFECWCESDAWYVRRYMQIICINGVITLRFHNFMNQEGFCHSQGGLFPWNYCNDQLVHWHPYMTVHMIKCAWNKTGVRPSFQGTAREMWPGTRDTVPIRRGNFIQPITFSIPTMDDRNEVHQNDEEDPLQDCTRNNSNAMQGEMTDVFAFPYHDQPARQSLIWAYPDQFVLIYNVWDIPTNTYDSFEEWPWWAAYQVFCAKQTVVSIMICEMGNDIFYWARFTIFPMLATRRGPYDMYVCFVHVVKTQSFKDVEISASGRKTTAFFTFCISCRAKPMWPVIVYSKVIMSQCLIIPQSWKNTPTPCYIDVISKQENMTDLERQCSTELNTQYDVYESPHVRSHYEDKHYDYVLFQMEVMSQPYIHEFTTKCKYFVEEVNFPTKWMSHPHGSTIYWMRDISMVYVYTAGDWMMLTASMQFFAGNKNWGYRKIDQFPAAAVFSSIHTRSVFFRK'

# Get the alignment.
alignment = global_alignment(word1, word2, scoring_matrix, 5)
#print(scoring_matrix)
print(alignment)

# Get the alignment.
alignment = local_alignment(word1, word2, scoring_matrix, 11)
#print(scoring_matrix)
print(alignment)
    

('9099', 'RP-EVE-E-ADNSFS-----V--QHAVERHRFFPAHNIAAKSHSQKSSEEVYITQWMT--C--GDRC-MNHLLKTG-P-IGDKDGATMSSSRWHVQSRAKPTWKVWAMEYRCMEHF-IDRKEFAKMHLYKITIWTMQHMH--ECP------G-L--IRPHHNIFLWTERQENQW--QA--MYICDMHSMYRARLDMHCGHEQCNGNYEHLFALDKLVWRLKGQVMEPPIWAMKHQTKSHPVGVMHAINNCLEMDFLVR-----QA-RFLRIIVTLCGQG--S---WF-R-DFG-APF-MS----YRKEEMCFIKNYNSHVDEIFYRGWREHWFTGDPHTHEDQWQFIRWCHPRPTYPKNYRPKYFGYWGKFDGRLSFLRQWVHIPYHWGSVDI-A-----KRAWPKNWHFTY-LYGFYNEMPELYDSP-V-GCIRQCQKTGPTH-LR-Q-RLISHFSHVCVHIINQRMFLSPPMIASYVTLFATKNQGPMYQTRKFAHACTACHYF------YFMNAIVYPQKNFRWIMEGVDIHCVSIYWVHGMCHCQWLAIHKIYWTHINQVTEDEMYVFQCGYC---S----EGHRTCVLTIVFFFELCPYEYQFPWKYSCCH----H---SMLGVHKWEIDVQQQETDAMETNVGICAYIIHSAMALYAWCICMDYGYLPGMYRVKTTPWCIRHPFDDTLFRAGCDWCIGILYRKTGSWS----EPGKLQGCWRHQIGVTIVWALGW------MEDEFRNTGDIAEMDGRFNM-CEPIENNGQYICTSVSSY----PCYPF-MKDYIRQWSLGPQMCLWFMNAKRWTWQLETQTDVMAMAEIQMCIICAHWFGWEWYCHCVPHL---P-MMGCEKPI----RSPHEGQHQSCKAADH----GQWSSSTSRENQIHMHDDLYGFVNIYYR--RTF-G-A-ISCAL-FRCVLANFTDEM-------KVSCSWTGCWRVDIRAVFTVMHVCMPDQFEG-

In [4]:
scoring_matrix = {}
with open('BLOSUM62.txt') as input_data:
    items = [line.strip().split() for line in input_data.readlines()]
    scoring_matrix = {(item[0], item[1]):int(item[2]) for item in items}


word1 = 'MAYITKAALSLYDARPFHTAHQWCGWGRECYIMVLACWSLRLCLNNYSEDHVWERQFMNHWQKNVNYPRSCYMNQPMCDYTDRGHADDSEDMGYAICNWEFIQPKGVGVWSLFVKWVHSSITWYGHHGNDGHGMGYLWKGWKRCQQALVEQFFAHVMVWSLWVNMMWADQEYVRLMWFYQQRPVFPPFICMVWCLQQTCEHIKHWEAYAAWCFNMSSATAGWVQSNCTINKMWQAWAINVGTFWLWNAYFRHHNQLYQHRDKTHHEMNFYKSRLGRPDISHMSCLWFLSNWYCRYQTFHLFRGAQKWDQRDIHEDCYFEPILVLAVGEQRTGTDVHSRMMLSAEAYVDNEHDMTDSDPTLDMTQCTNGGWTEMQSCWQKEMGCNNDTQQCALWMHKRRACENTFFILPMHQAHMMNVFQNKEKGFSYQYFFEESNYNQSLLNRLKGLISSKGNGWSKFHNDTMIGPWDAQQCYKLAVEYQCKRVNNWKWLDWNVESSENWCNLWVYHMYKHMGRTDRCQGDEHQADETEMDNHPSGVTRMDTCDPWYMESCVVDVISTGDYRTDDMQQITRNIICQLVKFKRDCYAEKRIVWVGSHRVELTYINQYSGMANTTANCYDKPMTETYALDRISAYFYFKQSKKPLQWVQHYDGAKQFYIYHCFSYENKNFSNEDNTHDSIVSWHIMRWIKLACCLLYTCIQHIGKAVWNLEAYAYVAGWRMIEFRSDGMWKRLNVFYVHIQREWIPLVVFYAIDTYEIFDGWRDPIWPGSKPPTEEWAGSGYQLFIAVKTYTGNYEGCSVVYSFVPHHMKCQKGFAPCCADVQQEMSCQLYQMTVNPYPTCYMGDQKHFCRCWIELTSHYPTFTSVKKAQRQGSTQLIVEIVHEERRNFFDRFDEVSCAMCARTNNWSGLYINRYWQEMICRQTLRCEMRMVNLRRNMTCWKENGYIVQWVPGWSNCINWRPPQWTFCIQCIECQENDWKHRQLHYWCDQNRITTHTMKEPPQIYFAQAYMCMKKVWEEHNVDRSRGMNSCMYSAKSQSFYPCQWLIAREEQMCWNDNNKAEHTEHRHFTMMQLQIHWPHIRCTWSGLGYWADMRRRMCQWESQQQGELHHVVFHDKFRQADIMYCVQKAAYNVCVQNASDESLSNTYMAWRTIWDLELLVEMREYHYKHYQAVRFMASGYRRGVMETSGQFYIWESYCYTFPCSNNEGIRWRGVYKFKTMFEHACYIESLVYFAKEHSVLREGDPWSDDVAMMMEWSAIYMAKLKENQVCNLDCNYIFMLSGRYGDDEFPKFSRLPSFSMIIQEILHDMNRTGNQYYVYYRYGMTSRDNDKGMWENAQGKPHVCMRRFKDGQMYRRLCHDKSYICGWHHESRVADGSVKWVFESVWCIFKDLCDGNCHAETDERRDQGHDGADAVFMWYWDSANIQDLQWGEKDHVTETCTGCMCLSREAWNQKVSSFMHRWISNVYCKKYHWCETDERPHGACMLQEGGTPINWMGVHCIQSANAFEMSAEMLRVYMQAHKHYWNQQGRPLCSHFKTRMVDISSRHATVMVQDWEDGTSLNMDWGFFFQTFDYTFFLNNLSEKHSCLENQGGQKVSEGNYEAAWVADNDPRLWEFALRFLCTQITCVVLTGSEAVWVQYHNHFSFESSDACIDVIEDIRYIQISAGVPFHSTWIEHAYRHYYTVGMQGFVCTNHCDRMDYGLSERDPREQVMKTYIPTSICRHVAFSQSSIKLEDMDCSLHMFEGLSQDRYGMMKPVNLWNRRSKHDECRQCQHQWQRRSTQTNPSSMYTSFLEISPNAILNLPLMYEGWAQIRAIHTYSEISQVIEECMNNHCQINSYWKAERGLYIAWIFDVPMLQRLHVHYLLLHQCPQKNNDKKPGGKLPHRAVVLSFVTAQIFNSKGRGRHPFKRNTKKRWAICYTDAYMCTISWNFKPSCNIQMIWSWVCQFLGMGDEFEVGCFHGYWHTYLNKFKVEKKWLNNCRFEQRAKTSAYRGHSETFMSIKVSIEDPKAQDHCMAAMALWRQWLMFMMCFYRNVHIWKTQGPNFSIWASNAPDTNHIHYWPPINDWLMDNVLSKYIPASINRPTINAQHRNVEFAKHAFFLKWFWVYKFNVPNFIRFVEHCLILGHHWPISETTAYECSRHHPSRLSCHQKEDVANAWAWNSGATYWDKLSMMRGCYMQAQARYEWWICCHRSQVRMIYEPNRAENHYHCHNYGYTSRRSNRDEWCTVRSYLCVDDEACCTGTRSMALHTQSACQIHIGMPKYIMLIGKRVSRAADNNKKGQDPPCCAKKTERPNIVEATWRIVSKGPMTCCSASMFSKHNVQAYCPWPWQWQSTTVTEFGTEEETKPFFIRSNYTQVVNREFVSNPCQFSLTDMWKRSTIPYHPAYKKQFFHWSTTEKYLWEGMGTKKSYQEDDTHDMQCFFCPVQCHAMPPMHYDIFVCWCVKTWLEKSRHQHGHWYSPECDLTTMSHKPWSWGAMQDHDWIKILCGWIFTMINNCRGFRQCPSMKRKFNCKYIVSHRFWQMFQMFYTPDLSNQCECLRWNVNWPDIPADDDLDSENKYIALCVYDHYLTLVFYGSQGVECKMKQPRHVMGKMHNAHEMTEPDGTRNINQSEYLMNVRQTGYSCQLHQPMDDKSECVEFKMTWGWPWMAIPQYRDTNMMNIYYHCHTCNEYNRHTWWYCECYPPWNTQDNDHKCMTIHPSKMHNFKNKHMYAIDTDYMGRVGWKLMTKYYCAVRMNTESQEQVLWSYKWGVFVRQSTKMKFHTDKVKCVMTIGPALYVRHCQGGCKTGAVSNGCVNMMFFFRLTEDPAWVIASASICPYEFSDAPKKERYAGELNYCIPCYWLNDGVILEKMGWPLQHMVDYSLWHQSCSDLNTLHRGKWHHYGWGNHFEYMVKLHMVRNGDTQKQHDHMLWPPDGFYIDPNLVDEVLAYPVEQPDMKVVLFPNGSTLNMKKAWWRKIIQKHDNIHMDLICISWLPGWKDKMDGHLVTSWYKDREEPLPMHQSRGSVCLHKVHVYVMFFSWGKSIMIIPHGGDPITVFRGQVEPHMYGDCYYMAACIIGDQPGVQPATTDGTRYTYRQLGYTEPGKRFFYCLMKKTHEDVHGWSKEYGQPVIPDTEECTHNTGYFCVEWSVLEMHALKILLEIRRQCTDTEEGECAYDCIQLYAFWVNDRVVHWILRQRRYIHNFSTTFNMNSTIWGKNEKDDDAMTFEFKTGPRACVVGHSQKRCDMLSTMDCGLTFNDTTCREFETYVGAEGPLYHERHPHSVQVYTAHYLGACSSMFGYNYACWQKQNMDPPAYGRQMWSNNPCLAIEMIDMIPLSCVEQASITNYQMSCQFTWVHVDFFVHFTEHNDQGKEAERMDAFCSYYNQFEWVLMMPFTEEARAGLCGSFVNNLRRLEDMYWTGFCEVWYAPHLIQSLRQKRDTPDYCVYQIMKDWYYVTGYFHDDLFVRLLEDKLPNNHANTRISPILFYPMMAEQELHEDVQMDGENIFNKLNTYHLVAWMEFVMNQHRCMDWEQEMHDPITHSWTMWPTSMEYYGGRGVDTVDGKPCNGLDYNSGPGDLKMYMSMIAIHCYTEVYWWTYYKYGFHWPFNYANGDATTRCDYLQHDIYQAVCWKMFWPCDIPRCCRDRRWAFGEWPTRRVLLAKHLIAINNHNSPIYQRMIRCDENCSWINASKDMPAAIIFAVTGARSHTIMSMWRNHCAEHRYWLHFGKGNNDHAFPTWSWQYIDLYWDTDTPMCQTVAKCIAEQREVHKGDSYGQGSGDWSPTKRFYPFCYCATDLYATLKSFFEQCLEFSGVCRIFCKEDDWINDVYNLFFEQCYEHEEQQDDADSGEENPANETARSPSGCINHQMTCGKDMQDICRFDWSQNVRCYFYWTARWAIPIVGCHEEPFWDIQIHLCTHCVPNTFDGIKEKYLYEGSVQMDWVWSYRGNERDRDQAFDDHYAFMIGPWSIPRKMSTPHDFKGPMGPYHKWHIYTLVCRYLMYKVSSQNYCANQQQAPMNNGAWKMCYGCVYYNTEIWAEASVRDQKDDMFGNAIEMASSTYASVFMSISGGWINIPKRKKMPWGQWEKWRFWSFEKEWQVGYPDCDPFSRKINQQAFNAFVNFTFSIYTFKVKYEDGGYLGIKRIMHVIGCKRDINILHLLPMLVQHWHYHNNSAKMLELRETLSFNWQAHNVPRQGWIFCGFFLLRIALIPDNLDCRFIYVMAHVPKDPWIDFWTDGPWCQLMLACKMVSDALRRTPMVYFKYGLKQRCWMQTMKLIYRNPFTPYEQAMPETDCQSYVHSLVRKLWKYMWERLTCLGFSKELFMTPDDIYKAYRYRDRNQRCVAPCIFAYNRLQQMYCAIWGTMIMHKTTQVQQLCTGDPSCGWCSPSIVKRMFREPGQTKHEMERDICTQRVKQEADFHEMQCNQCAPAASKMSEWKHNEKYANAEKRVFTECFHCKHPHEWWPTWMYDCMHCMSTLYTETMLCNWHCSPFPEKINLYHAVSSTYWDFWGSCELSFEQYCCHPFRTGCLAQWYMHAYEYITSSHNEFIQRGCWGWKQHMGMVQQCHSDCLWFTYFIGNMQKIPNNDRSAYSRCFFIMACVLWAGQGYMHGYEDICPFDENRMMMNRYQVNQHQKGMGDSLTIVSHMYYRLFHQSNARSKIPRNGEMCTCPCYWNARGVVQPQHHPNARYQIQTRCPDQFANQTWHAGAVYNAVHPKIYNANWMRNVLHMQLTTELYNMSCVNWYWFKASMLWTINDKWFGICPSGVIMAIPTLKDESFRCMWQQWCLHKKCLIAYNMNKQCEWASVPMDHRCAHVTAERNRFSMTYVDYILNYRTHDRPKNCYEWPPDKKVGWVYCECWGCNSSIRGTCMLQFGTLQHKEDTRPCKYALWKQFCSHHLRGTEHFITREQITWPQAYEFWDPYDAFWRCTTCTEYCVVICAQVNWPHEIPKEGLNIVTLKCYASRHHVTHKQFNLMPAPNKHVSHCYCPDCGVNRCEHKFNFIWHRHMLLYEGFSQYQIHYARDENHLEQFLQWPSTYEMHGDFARWMPRTLWMTFRVGPMGPPQSIGGWFGHQGDYWQRDMREIWDIYIVRKCMCIYMWGTRAHIMNLNTLPPAYGICMEKLIMWLPTAYPDVQQSMHIYPHYPGFNTECMWDYMPGSFSDNQPFLFDKRCADCKYCTGECNVLSAVVLARLENIDKTWQWELAQLQMSFEMVCWIMNVPGTEWVMLENCHKYYDRGPGRSVGHEMYWSIFNRDWFYQVMQLMVVFESLCLWLAHFCMMHCYVDPHPTQPESELHDEADDKIYENPNRVKWRVSAKPGCWHVVSKMISCTMVLSRYVQVTNGNQCWFSCSDIYVDPICGAKWMYKEKMLGHAQTDAGAGNHAVRPFEKIRNFRGRRYTIMARKFDDPCQADMMDKCDDGLYITGFDPMFENVWFDCFLDSSQELSHVQTFPGFESLDQLAHQPAWGPHIKHILAKVDPDSGTCFSPMHWTRGNFKQWYERLRLWIRAINQVVIEDNGYHDKNRNTQTRMRFIRQCLNDGTQVREGNMELTMYYLFDCKANHSYIYRCADVNQYWDYKYANADEFKKFQISPMYFCYAAKNKLNNTALRDPWDGVICTGLCGPLLSWDTVNKWQVDFVKGKNDQYWLGIQIITEHHQTGYHDAMQKFMGGYGMQNGEWPDFDYSCREEGHPRMCACLAPYLEAEDQDIPFGMVGQPVPHCNSTKTNKGHCCRMNQPWHGSTHHHPGSYDPYNQPYQISELGKDYDIQYQIWVRIDLINKNVEHHAFSKFPICQERKYFTDYGYTVLRTKQANLTMQHSEYKLCPMSRNYDNPMNLMTEWYSYNNPQLSVAEDRLDEKCNNAAEKGEYWRDKKWNWHDDMWHLAQWSPRPCVAPSQAFIMKSEKVVVTGTSEFGYQNMAYPAIITYTAGWLVETDHIHATSNVQICLFIKRENCCKGKGQHKELEKLPLPVKQHDGCHQPDHFLHSAMELRTTQPHPPGEDSETDMLKWEIFHEPFFRNPHATGETQGTVFWHTSLDLMATFFALIWHPHDGFPYRNWHVKYVYWFNISILQMMRSGFMNMFHGHAGIMVCVVGYSQYETWLRHCCEIFERDRVHYIEESLFLKKMHATLCDYIKITYMKAFKENRGPKWVWVCAWKIRDCWVLQMQGPRGHDNGDYEYIMNAHEESTNPCCYINAVNTGIPMMMIWTKLFFGGYCWRYYSTRGNQTMTPLWMFSRTALEINSRNFHWVTRDKEDNQEHKDRAHPYAVIVKCTNRNPQVWDPWSGEMAPSKCFMWFDIALHMWGECPGCPWPQLTSKPTRICGPHENGDGPDVPIVDTPCDIVMMQVYTCMNNNSYVCDLSRWRMRHSNHAKMFKNHFCCHINLLRQDSNNCFWKVCIAECTQTYQSMPWKPSCSYHYHAVHQQPRMGMNCYWNRGCTNICWPMMASQGRFAVGRMKYPFKPLQWQMSSDCDPIPRHWSARTRAAHFTYTPETEESPSTPYSYNMGWVNTDLYICDGTEQPIYECDVLGGKTCKINSNISQDIADYEVHEHNQEAQTIQYVQICLWGNGWEWSGWTGDDAKMKRAKIEARYQPYEARDKICIECCEDIDRQYRNYEMNMSHATYCRYVLCKTRGLNWPDMNWNCTWLFRRFTQMPGDWRVCCYGHRKVFAAKAGFAGSNQFMVQTPCHIVEVWEWDALNNYYDSNRDNLEFFVAVTTNWQPEVLIMRALTWWFSSNKRFWDKQCGVSEGTFLGAKWIHHKNIKDQSSCKMQDSMHRMKHLIYPIEPWAKTWTDTLPYMSMPVLYCRNRVAMFTSHWMCTCNCWKFISAIGPRPLAMCMWDEQNMPGSGFMLMHHYIPIMFCDKDGTTGDLSPPTRFKQDKTNGFVQRCGSNIASSEAIYSVHPKESSMRYQGMFRHYRNAKYVIKIHCWHFIQDIANIHKHGTCIVLALNFPGWKAEENMYDDVVCQQKPRKVMNFHTWHVYCLVVKPGFITALMHQGECPRGNMKPAQYWYGSQVGTHMVWRQVLGDCPVIIQMSEISNQNDKTGEESDGRTHGALMGDLFLMHCAVRAKEPSTFNQCMAQSKDKFRCNQEMTYVYRTCEYHWTALDMLAKKVYGAFTYRLVFSVVWKLDACMPYVEDRKNQSIENEAYFIQAGMNAVSKSPWHCVWLHTQDYLKRYKLKYGDCQAWKHDCNNPKNIRMCFCKVIHYYFGYGTVMLDCVVLSHSTSWKGIGWVKSSVGWYTDYWVEPYWDVLGMIGDFSWMQLDDLADPSASFTECQDIQPHNRHFIVVFQEHTFQEEMATLEDLEANEYLFRWYLSWGQTSERVKLAKFKIMHWDFIITALEIVIKIMCPYVTAVYCTALFHTWERMRMKRMMEATTACCIYEIRHTFNSNDCCHAHLSSDNLNNMYVETNYRHMPECNVICTKAWLFQFEMERFMSGGDAMFVRDCYCICSHFGGFCERAEFYKTDENRIYFFIGASTEIQMERWPRHNTVLDSRLYAQLHATGAWYRYFLNRHQYHGQRVEYHNAAVYKFVVVWHERRACMASGHDWVIPDYRQFKGSDNPQAMYMRCATRTYFGCCWGLCAIYPGHDEGFLPPYHEEQRYSHDPIMDADALRAAAWSDWTDNQYSYMISGEQEDVLDTNNARSPHNWGVKFLDQSAMWFTMQNILYVI'
word2 = 'PVHECEYITKAAGYVVKERCKLYDARPCHTWYHQWWGWGDECYIMLACRLCLNLHVDQHWQQLMVLKGHNVNYPRSCYMNQPNCDYTDRGLGEADDSEAMGYAICNWEFIQPKGVGWTQIFDWMPWSLFVKHVYGYLPGWKRCQQALMFRADPLEQFFAHVMVWSGSGDALHTVMVAMMWADRDVHYVRLMTSYQQRPVFPPFMERIMHWECYAHWCFNMSSATAGWVQMERNTIAINAGTFNPPIYFRHHNQLYQHRDKTHHEMNFYDSRLGRPDISHMSCLGDTTHCEGNFRGAQKWPILVLAVGEQRTGEDVHSRMMLSAPVHPNCMWVDNEHDMTDSDPTLDMTQCTNGGWTHMQGGWQEEMPCNNDTQGCALWMHKRRLKENTIFILPQAHMMNVFACCCAKHNKEKGNAGEFSYQYVFEESNYNQSLLNRHILLWSNKGLQKDKHWNKKSSKGNGWSKFMHTDKNNDTMIGPWIHGKTWVEYFCKRVNNQYFQVNCKWLDWNVESSENWCNWVYVGDFTDRCFCAMGDTLEFAIAMQANECYETEMTGAMTPYSPCEKQLLRFFHGPDPWYMESCVVDVISTGDMQQITRNIICQLNLQMDVPMIAYQLHATGDRAKRGSHRVELNVPEVSALPHFGMANVTCNCYNQAWPTSKPMTETYALDRIVMYFYFKQSKKYGQQHLIGAHYDGAKQNYIYIDNKGMLWNCRCPFLQGCNEDNTLDYIVSWHNVMRWMKLACCLLYTCQQHIGKAVWNLKCNTKRHRLAYTWRMLHEFRWGCDDGMWKRLNVFYVHIQPEPLTYRPIEEWNGDEAFALLMIGYKGFMTAVKKGVKLFMLYYWLFGGEMPGKYDVVGSFVPHHMCCQKGFAPCCADVQEMSCILYQMTTNPYPQCYMGEQKHFCNCWVECHWFLTSHYPTFAYHVKKAQLIVEIDFTNFFDRFDEVSCAMCARCCVDNNYMWFNSGLIRRYWMICLCQEMIQTLRCDVFMTLSVWKENGYIVQWRIGGRIPQWTFCIQCIECQCEKHDDWKHNRITTITMKTPIQIYFAQAYMVAMKKVWVGIRGEHNVDRSRGMNSCMYSKSYSCTDMTSGDPYHNFIAWRDCSWEIAREEQAQWHMQKAVKEHKNYPNIHFTIETYHAMTMQLQIHWPHIRCTWSGLGYWADMRRRMCQWESQQQDKFRQADIMYAVQKAAYNHNASDELSNTYMAFRTIWDLECHWEMREYHQAVRRTYCCTSGQFYIWESYCYTEGIRWRYILGMRTMFEHPCCIESLVYFAKEHSVLREGDPWSDDVAMMMFWSAIKCQTMAKLEENQVCACCKLFLDCNYIFYLTCQACVTKIQKVPCGDDEFGKFSQKILQASQLPSIQQFVWLMWRVIPYHFDYGNHDMNYYRYGMTSRDNDKGMWENPNMLPKHMHPQVWEGEQTPKTKDVYHHVFKDGNRSTLWYCLESKPWCEFICGTHHEHSIRVADGSVKWVRESWCILLMIPIDYSERRDQGKQMKQPETPWHCASLDNWYWDSANIQKLQWKHKDHVTEGCAGCLSREAWFMHKRHEYDWISFIEDHANYWCETDERPHGACMHQEGGTTINWGQVVATCIQSANAFEMSDLQKHTEDTEMLRSTNDSPYMIDLQAHPLCSHFKTRMVDISSRLATVMVQCWEDGTTFDYTFFLANANFKCEGNLLENQTKLYYLGQKVSEGAPQYEAAWVADNDPRLKEFTCVVLTGSEAVWVQYMEPTNHFSWEMVIEDIRYIQISAGVMFHSTWTVGMQGTNHCDRMDGFSTRDPREQVMKTYWPTSICRRYGMVKPVNAWNFSKHDECRPCQHQWQRRSTQTNPSGLTVCMEISPNAESRLPYMYEGWAQIRAEIVSAMCTMHVIPYPMTIGNECMNNHCQINFPWKAERGLYIAWIFDVYWLKRKHVHYLLLHQCPSKNNDKAPGGVLPHRAVVLSFVSHKLTQAQLFNSKRHPFKRSHSPAGNWFYTDAYCCTISWNFKPSCNGGEWYAVEFQMIVNPQYCHSWVCMFLGVHGDEFVFIGCFHGYRHFNCYGKYLNKFKVEKKWLNNWRFEQRAKTSGYHMSIDWDQDMAVMWNDEQINWYKDGKQWCMFMMCFRRNVHIWKTQGTVNFASNYPDTNVIHYWPPIDFIDNGYILTRTKYTPDSINRTINAQWYEYPSGKLNVEFAKHHFFLKWFMVYKFNVPPFKGHKGITACRLVEHCLILGSCCLNVCHWPISETTAYECSRYIRHPSRLSCHSLFECLIDEDVANAWAWNSGATYWDKLSMMRKCYMQAQARYENWTLLPTKNGSQVRMISNKMYNHYHEHNGSIDQGHTSEWGPHQKSTVRDTLHYKGTIPMFITWSMALHTQSACGFECDRPEQIHIGMMKYIMVIGKRFSRAADNCKKGQDPPCCERPFMLWAVSKGPMTCASASVFYKHNVQAYHANQQPWPWQSQEFGTEEEVSEAPIVIGEFVSNPCQFSLTDMVPFYEALESKRFTIANNLGVNKCDVHQFFKRIKISWCWCTTPKYIYGCIIDVGDTSDMQCFELHDVACPPMHYEKFVCWCVKTCHVIDTLCKSRHQHGHWYSPEWDLTTIMIFYDRMSHKNWSWGAMQIFTMINRVSLSTIRHCPSFKRKFNSKYISSHRFWQMFQTKKDGLRWNVNWPDIPADDDLWHLHGFQPRVFYGSQGKFKTNLPRHVMGKMHPAQEMTEPDGTINHLWLSEYIRVNLVRQTGYSCQLHQPRDDKSECVERKWPWRAIPQYRDTNMMNIYYHCHVWSGMSYTCCNQFYNRWTWWYCECYPPWNTQDNDHPCMTIHPSKMHNFKNMRDTDGWHMGRVMKKCAVRMNTESQEQVLWSYKWFVFVRQHTDKVKCVMVSAVDPIGPALYVRCKTGAVSNGCVFRLTEQPASSCKKSYVIASNSICPYEGSDAVENTYKKERSHSMQWGECGDWNYCTCYVGWPLQMMVDYSLWHQVPCLCWLNLHRTKWTRYGYGNHTMQNMVWLHMVRQHQHFLWPPDGEYIDCNLLEALAYPGDSQPKIVEQPDMKVVLNGSDGAAPGTSQQYFWMKVAWWRKIIQKHDNIHMDLICISWLPGWKKMDGVRKYHTRLVTSGYVDRLPMHQSRGSVCLHGANTWLGNSHVYVMFFSWGKSIMIIPAGGSNCVFLIPETVFRGQNYLMFADSDCPEWYTVYMYGDCYYMAACHIGDQPGVQPGTTDGTRRKRSPIAPYCPYKARQLGYTEPGKRFFLTKKTHIDVHGWSKEYGQPVIYDPRLMQSKTTEECTHCTAYKYFCVEWSVLEIHYGTPDHALKGYTHVPTFLLDFNITIRRTCTDTEEVGIAKTTNQERAYDLIQLYAFIRLPWNDMVVHWWDTMFFLRQRIYIHNSSVMTFNMNSRIWGKNETKEGLDDDAMTFEFKTGFRACCDMLSTMDCGLTFNDTTCREFETYVDAERPLKHNDHPHSVQFYTAHYLGACSSMFGFYSFNDNYACWKMQMFYNKQDYYMDVPAYSRCLAIEMIDPLSCVEQASITNYQMSCHVDFFVHFTWVDHLELTHNDQLKEAGRMDAFCSYWYKQVRFFNQKFEWVLMTEECRANNLRLQKITLFEDMYNNWCMPNKWGFCENWYAPHLHQRDTPDYCVYQIMKDFQYHPAMQRYDLFKNWHRLLEDKLPNNHANTRISEINFYPMMAEQELHDGENIFNKLNAYHCNQHRCEQYTDLEQEYVLIVKFDWQPVLKRDPITHVQIIFGCCMNVQLGPTSMEYAWNYCPGDADHNCHQGVDTMDGPQPCNGLDYNSGPGDLKMYMSMIAIYSLLAVLQLCWTETPLKFWEYWWTYYKGFHWPNYATLCYYYHTGDEGNTTRCTLQNNKYVAVCDRMFWPCDIPRCCRDRRWAFGEWWTGRVLLATHLIAINNHNSPIYQRMRWCDENMPAAIIFAVTGARAPPYIPDATFMHRVDTYHCAEHRYWLHFGKGNNDHAQPQWSWQYIDLYWDTDTGCVHFIMCRNLWPDMCNVAHEQPRKYVWKWEGHSDHIHKGDSYGQGSGDWFCYCATDLYATGKSFFEQCLEFSGVCRIFCEDDWMNFFEQCYDDADSWKHRTARSPSGCEHGMTCGEDMQDICRFDNKSWTISYYWKEGARWAIPIVGCMEEPFWDIQIHLCRHCVPNTFDGIKLKYLYEGSVQMDWVWSYNFGIGHIRDRDQAFDDHHPRDDFKGPMGPYEKEHIVRTFHRCQPKYWAMCRLMSSQNNCAPWTLPDMGPGCVYYVTEIWAEADITRGVRSAPQKDDMFGNAIEMQCTGAWMSISGGWINIPKRMKHGMVEHPWKQWEKQRFASFEKEPFSGACSLCERCKIQWFVNFTFSISTFKVKYKGTDGGYLIGHKGQRIMHVIGHGWYNCKRDINVQHWNNFDECSMMTQMTFDSFHYSHGSRNYQAHNVPVHCKEQGWCFCGFFIALIPDNLDCRFIYPMAHAGHFYLVPWDPWIDTFIAPWDQLGQLACKMESDDLRRTPMVCFKYGLKQRYGTPYEGFSAMPETDCPKNYKESSYVHKLVLKHHLPTLIVRWKYMWERLTCLGFSVKIGKIYKAYRCVAPCIFAYNRLWQMYCAIWGTMIMHQQLCWGPTSCGWCSPSITFITDNLKKRMFREPGQTCGWFRYHEMNRFFQTQRVVWMKRGTMMQCNQCAPAANAEKRVFTECFHCKHRNHEWWPTSMYDCMYTETMLMRGQVGNWHCRRFPEKINLYHAVDSTYWDFWGSCELSFEQYCCRTGCLAQWYMHAYEYITSSHNEFIQRGCWCWKQHMGMVAVPGPTYWAIGNMQKIPNNDRSAYSFCFFIMACVLWAPRCAGEKVDHDCPFDEHNRMMMNRYQVNQHQTGQCDAGKVGQTIVSSMYYRHHAQNWFHQSNARMKIPRNGMMCTVICSQSQPNPCSGWNARGVVQWLYRHQNQHHPNARQTRCMIPQEIHAWDDFAAQTWHAGAVYNAVLPKIYNANWMRATIFCVLHCQLETEIYNMSCMNWYQKGPFKASMLWTINWTYYVCWFGICPSGVIMAIPTQKDESFRCEWDSPQSFKKCVKLHFYIAYNMNNQEPCINQSENPCVCISVPMDHRCAHAERNRFSMTYVDYILNYRTHDRPYNCQEWSFKDWLPDKRMLGAYAVGYCECPAGGCNSSIRGRCHLQFFTLQHSEDTRPCKYALWKTFKFELNEHTITREQITWPMAYEMWDMTSVLTRAEKIWPHEIPKEGKHENCPFKCYAILTQRRHHHKQFNLMPAPNKHVSNRCEHKFNFIWHREGFYARDENHLEQFLQWPSTYRVYIMPGDFARWMPWTLMGPPQSIGGWFGHQGDYWQRDMREIWDCYIVRKCMCGYMWGTRAHNMESQTRPMNLNTLPCAYTCVKIFVIICMEKLIMWLPTAYWDVQQSMHIYPHYQWEWLGFNTEHSLMWDYMKMRNPYFAPGSFSDNQPFLFDKRCADCKSETGEVLARLQMHVSEDETKCQCHQCTYMTWQWELIQLQMMNVPEVVKLENRTTYWYHKKTTQGLYDRGPGRMFFLIFNVMELTVVFCLARWYYLSLCLWLAHHQFCMMHCYVDPHPTQPTSELHMHTNEADDKIYENYNRVKWRVSPTPWWGARLFMMSFFEWHVMVLSRYVQVTNGDDIYVDPICPVWAQIYKEEMLGHWQTDAGAGNHAVRPPRVLMWATHEKIRNFRYRRYTIIVVLIKARKFDDKCQADMIIEDYAHKYQLGLYIAGFDPTFENVCFDCFLDSSQPDQHEQWGTAHQCPCAQGPDMDSYTCFTPMHWTRGNYERLRIWIRAINQVVIEDNWYCTCTMYHDKNRNTQTRMRFWLQMLNVGTQVRIGNAELTMYYLFDHKANHSYIDYHILNWVNDYACNPTHCCYDKRFKKDKAKDRYCPMYFCYARKIWTCGYKSKGTQGTNNTTLAFRDPWDGVICTGLCGDTVLGIQIITEHHDTGYHDMGGYGMQSHQIICSWGEWPDPDYRMCQEPPPKQCLEDQMIFGMEGQPVPHCNSAKTNKGHCGRMNQPWHHQPGSYDPYNQTIEIKDYDIQYQIWVRITLFHHAFSKFPICQTRFTDYGYTVLRTKQANRKQTMQHSEYKLCPMTHNYDNPAMNLEDAEKGEYWPDKKWNWHDDMWHLAAASPRCCVANSQAFIMKSPVRWLQTGTSEFGYYNMAYITYTAGWLVETDHFHATVERERNMYIIKRENCCKGKGKMYHKETEKLPPVILKCVKQHDGCHRPDHFLRTTQPHPPGEDSETKMLYWEIFHRFLPATGHGRQCSRLDLVATFFGFPYRIWWVKYTGTQHPFNISILQMMRSKWICKRGFMNMFHFHCCEIFEYIPGYEKMHATLCDYIKIAYMKAFKENNGPKWVWVCAWKIRDCWVTQMQSRGREYMMNAHETNPCCYQNAVNTIPMMMIWTKLFFDGYCWRQIWWLSWLWICRENQTMTDLWMFSRTALEIRYKTCSRNFHHDKEDGQEHKDRENESIQAHQTNRNPQVRFSMNNFDCLQIWSSEMAPSKCFHNENSRWFDIALHMWGECPGCNWEFQLTQKPTRLLNQAQSCKCGPHENGDGPVKGCDICLDAFPLYLMLYTCFLYVNMYRMGNHAKMFKNHINLLRQRIEKVRYCFWPVSAIAECTQTYQSMPWKPSLSYEKMPRVRCPVHQQPRMGMNCYWNRLCTNICWPMMANLVATINCQGRRAVCMRRIIGWQMSSDPRHWSASTRAAHFEPCNFYNMGWVHTQLYICDGKMNMTTIPEGVPIDVLGGKDSYEHVCPNKTSQAIADYEVHEHNQEAQGMKHSYQVIHEDHDFDNMKLQWWEWSGKMAKIRAGTMTNADDKSCIECDEDHDRQYRNEEPNMGFHMIHHATYCRYVDCFFAGLNWWNCTDMNVYRYFLFRYFTWRCMCCYGHRKVFAEGDSAKEGFAGSNQFMVQQPCHMCTRSGHHVEVQEWNHFLDWAILSNYYDDGRTSHNRFELEFFVAVTTNWRPEVLTPWFNLCNKEFWDKQCGVSEGGAKWINHKDQMHRMKHLIYPIYPWTWTDHLPYMSMPVLYCGKGCMYNRVAMFTSHWMCTCNCLKFIIKFMCMWDEQNHHYIPITGKGSNGFVIISHDIRCGSNIASSEAIYSVHPKESSMFRHMCWVDNTWIRNAKYVIHETFVTSFPNAHKHGNCIVLALMFPGWKAEENNKMYDDVVCQTKPRKHDCPDYRIMMHQNFHTWHVQMQCLYVKPHIDSFIPARSWIPMHMGECPLGNMKPAQYWYGSQVGTHMVWRQVLGDCTVGTCIQMSEIQNELMGDLTCLMSHCAVRAKHPSTFNQCMAQSKLKFRNQEMTYVYRTCDYHWLDMLAKKVYYRLVFSVVWKLGKSEQIIYGACMPFVWPKDRQRKIENENSKWIAYGMNATSKSPWHCVWLHHQDYLKEYWKHDCDDMKIKCFAYTNLGTNPMTWIANLVFLIHYYFGYNTVMLDCVVLSHSLMEITVMLRCTDAVDQDVACVLLDDLADSITQDIQPHNRHFIVVFQEEMATLEDLEANEYLFRAYLSWGQTSERVKIMHWDFIINAVIKIRCPYVTAVYMTAEFHTWERMFCWWATMNRMMELTTACIYAIRHTFNYNDCCHAHNMYVEQNYTHMPTCSSGVINLVQVICTKKWLFQFEMERPMDKGPFSMCVWRSICGGDAMFVRDCYCICSHFEGFCEFYKTDENDTRVGASTATVSKRWLIQMERWPRHNTVLDNKRLTAQLHATGAWYRYFLQRWHTKKFVVVWHERRICMASGHDWVEPDQHYGHYKGSCMRCATRTYFGCCWGLCAITEGFRDKLPYHEEQRYSHDPIPAAAWSDWTDNMISLARSPNDGVKFLQSAMWFTMQNILYVI'
# Get the alignment.
#alignment = global_alignment(word1, word2, scoring_matrix, 5)
#print(scoring_matrix)
#print(alignment)

# Get the alignment.
alignment = local_alignment(word1, word2, scoring_matrix, 11)
#print(scoring_matrix)
print(alignment)

('9910', 'AYITKAALSLYDARPFHT-AHQWCGWGRECYIMVLACWSLRLCLNNYSEDHVWERQFMNHWQKNVNYPRSCYMNQPMCDYTDRG--HADDSEDMGYAICNWEFIQPKGVGVWSLFVKWVHSSITWYGHHGNDGHGMGYLWKGWKRCQQALVEQFFAHVMVWSLWVNMMWADQEYVRLMWFYQQ-RPV-FPPFICMVWCLQQTCEHIKHWEAYAAWCFNMSSATAGWVQSNCTINKMWQAWAINVGTFWLWNAYFRHHNQLYQHRDKTHHEMNFYKSRLGRPDISHMSCLWFLSNWYCRYQTFHLFRGAQKWDQRDIHEDCYFEPILVLAVGEQRTGTDVHSRMMLSA--E--A-YVDNEHDMTDSDPTLDMTQCTNGGWTEMQSCWQKEMGCNNDTQQCALWMHKRRACENTFFILPM-HQAHMMNVF--Q-NKEKG----FSYQYFFEESNYNQSLLNR--L----KGLI--------SSKGNGWSKF-HNDTMIGPWDAQQCY-KLAVEYQCKRVNN-W-----KWLDWNVESSENWCN-LWV--Y--HMYKHMGRT-D-R-C-QGDE-HQADET-EMDNH-P-SG-VTRM-DTCDPWYMESCVVDVISTGDYRTDDMQQITR-NIICQLVKFKRDCYAEKRIVWVGSHRVELTY--INQ--YSGMANTTANCYD------KPMTETYALDRISAYFYFKQSKK-PLQW-V-QHYDGAKQFYIY--HCFSYEN-K-NF-S--NEDNTHDSIVSWH-IMRWIKLACCLLYTCIQHIGKAVWNLEAYA--Y-VA-GWRMI-EFR-S-D-GMWKRLNVFYVHIQREWIPLVVFYAIDTYEIFDGWRDPIWPGSKPPTEEWAGSGYQLFIAVK-TYTGNYEG-CSVVYSFVPHHMKCQKGFAPCCADVQQEMSCQLYQMTVNPYPTCYMGDQKHFCRCWIELTSHYPTFTSVKKAQRQGSTQLIVEIVHEERRNFFDRFDEVSCAMCAR--T

In [191]:


def make_Bwm(t):
    s = t * 2
    t_array = [ s[i:i+len(t)] for i in range(len(t)) ]
    t_array = sorted(t_array)
    return ''.join(map(lambda x: x[-1], t_array))

def rank_Bwt(s):
    d = dict()
    ranks = []
    for c in s:
        if c not in d:
            d[c] = 0
        ranks.append(d[c])
        d[c] += 1
    return ranks, d


def reverse_Bwt(bw):
    ranks, d = rank_Bwt(bw)
    first = {}
    dic = 0
    for c, count in sorted(d.items()):
        first[c] = (dic, dic + count)
        dic += count
    rowi = 0 
    #print(dic)
    t = '$' 
    while bw[rowi] != '$':
        c = bw[rowi]
        t = c + t 
        rowi = first[c][0] + ranks[rowi]
    return t

m = 'TTCCTAACG$A'

print("String")
print(m)
print("\nBWT String")
print(make_Bwm(m))
print("\nReversed String")
print(reverse_Bwt(make_Bwm(m)))
#print(rankBwt('panamabananas$'))


String
TTCCTAACG$A

BWT String
GTA$TACCCTA

Reversed String
11
ATTCCTAACG$


In [179]:
def suffixArray(s):
    ''' Given T return suffix array SA(T).  Uses "sorted"
        function for simplicity, which is probably very slow. '''
    satups = sorted([(s[i:], i) for i in range(len(s))])
    return list(map(lambda x: x[1], satups)) # extract, return just offsets

def bwtFromSa(t, sa=None):
    ''' Given T, returns BWT(T) by way of the suffix array. '''
    bw = []
    dollarRow = None
    if sa is None:
        sa = suffixArray(t)
    for si in sa:
        if si == 0:
            dollarRow = len(bw)
            bw.append('$')
        else:
            bw.append(t[si-1])
    return ''.join(bw), dollarRow # return string-ized version of list bw

class FmCheckpoints(object):
    ''' Manages rank checkpoints and handles rank queries, which are
        O(1) time, with the checkpoints taking O(m) space, where m is
        length of text. '''
    
    def __init__(self, bw, cpIval=4):
        ''' Scan BWT, creating periodic checkpoints as we go '''
        self.cps = {}        # checkpoints
        self.cpIval = cpIval # spacing between checkpoints
        tally = {}           # tally so far
        # Create an entry in tally dictionary and checkpoint map for
        # each distinct character in text
        for c in bw:
            if c not in tally:
                tally[c] = 0
                self.cps[c] = []
        # Now build the checkpoints
        for i, c in enumerate(bw):
            tally[c] += 1 # up to *and including*
            if i % cpIval == 0:
                for c in tally.keys():
                    self.cps[c].append(tally[c])
    
    def rank(self, bw, c, row):
        ''' Return # c's there are in bw up to and including row '''
        if row < 0 or c not in self.cps:
            return 0
        i, nocc = row, 0
        # Always walk to left (up) when calculating rank
        while (i % self.cpIval) != 0:
            if bw[i] == c:
                nocc += 1
            i -= 1
        return self.cps[c][i // self.cpIval] + nocc
    
s = "smnpbnnaaaaa$a"
cps = FmCheckpoints(s)
cps.rank(s, 'an', 2)




0

In [9]:
rankBwt("smnpbnnaaaaa$a")


([0, 0, 0, 0, 0, 1, 2, 0, 1, 2, 3, 4, 0, 5],
 {'s': 1, 'm': 1, 'n': 3, 'p': 1, 'b': 1, 'a': 6, '$': 1})

In [123]:

d = [i for i in "T$GACCA"]
s = [i for i in range(len(d))]
#print(d,s)
n = len(d)
for i in range(n):
    for j in range(0, n - i - 1):
        if d[j] > d[j + 1]:
            d[j], d[j + 1] = d[j + 1], d[j]
            s[j], s[j + 1] = s[j + 1], s[j]
#print(d,s)
for i in range(len(s)):
    if s[i] == 3:
        print(i)
        break


1


[('$a', 12), ('a', 13), ('a$a', 11), ('aa$a', 10), ('aaa$a', 9), ('aaaa$a', 8), ('aaaaa$a', 7), ('bnnaaaaa$a', 4), ('mnpbnnaaaaa$a', 1), ('naaaaa$a', 6), ('nnaaaaa$a', 5), ('npbnnaaaaa$a', 2), ('pbnnaaaaa$a', 3), ('smnpbnnaaaaa$a', 0)]


[12, 13, 11, 10, 9, 8, 7, 4, 1, 6, 5, 2, 3, 0]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
['$', 'a', 'a', 'a', 'a', 'a', 'a', 'b', 'm', 'n', 'n', 'n', 'p', 's']
['s', 'm', 'n', 'p', 'b', 'n', 'n', 'a', 'a', 'a', 'a', 'a', '$', 'a']
[13, 8, 9, 12, 7, 10, 11, 1, 2, 3, 4, 5, 0, 6]


In [180]:
string = "CCT CAC GAG CAG ATC"
last = "TCCTCTATGAGATCCTATTCTATGAAACCTTCA$GACCAAAATTCTCCGGC"
last = list(last)
first = sorted(last)
nums = [i for i in range(len(list(first)))]
x = []
for i in range(len(first)):
    for j in range(len(first)):
        if last[i] == first[j]:
            first[j] = '.'
            x.append(j)
            break
first = sorted(last)

print(nums)
print(first)
print(last)
print(x)

string = string.split()
z = []
for t in string:
    #c = len(first)
    top, end = 0, len(first)
    c = len(nums)
    i = len(t)
    print()
    for i in reversed(range(0,len(t))):
        s = 0
        print(t[i],top,end,c)
        for j in range(len(last)):
            if t[i] == first[j]:
                top = nums[x[j]]
                s = 1
                break
        for j in reversed(range(len(last))):
            if t[i] == first[j]:
                end = nums[x[j]] 
                s = 2
                break
        if s != 2:
            top, end = -1, -1
        
        if top == -1:
            c = 0
        elif c > end - top + 1:
            c = end - top + 1
        print(t[i],top,end,c)
    z.append(c)
print(z)
    

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
['$', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'G', 'G', 'G', 'G', 'G', 'G', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T']
['T', 'C', 'C', 'T', 'C', 'T', 'A', 'T', 'G', 'A', 'G', 'A', 'T', 'C', 'C', 'T', 'A', 'T', 'T', 'C', 'T', 'A', 'T', 'G', 'A', 'A', 'A', 'C', 'C', 'T', 'T', 'C', 'A', '$', 'G', 'A', 'C', 'C', 'A', 'A', 'A', 'A', 'T', 'T', 'C', 'T', 'C', 'C', 'G', 'G', 'C']
[36, 15, 16, 37, 17, 38, 1, 39, 30, 2, 31, 3, 40, 18, 19, 41, 4, 42, 43, 20, 44, 5, 45, 32, 6, 7, 8, 21, 22, 46, 47, 23, 9, 0, 33, 10, 24, 25, 11, 12, 13, 14, 48, 49, 26, 50, 27, 28, 34, 35, 29]

T 0 51 51
T 24 29 6
C 24 29 6
C 41 46 6
C 41 46 6
C 41 46 6

C 0 51 51
C 41 46 6
A 41 46 6
A 15 1

In [239]:
import numpy as np

def creat_counter(last):
    d={}
    count_matrix_head = []
    for char in sorted(last):
        if char not in count_matrix_head:
            count_matrix_head.append(char)
        else:
            continue
    #print(count_matrix_head)
    ret_mat = []
    for char in last:
        if char not in d:
            d[char] = 0
        d[char] += 1
        ret_mat.append([d[c] if c in d else 0 for c in count_matrix_head])
    return(ret_mat)
     

def BWM_matching(first, last, pattern_r, LtoF, Count):
    top, bottom = 0, len(last) - 1
    letters = list(set(last))
    letters.sort()
    while top <= bottom:
        if pattern_r:
            s = pattern_r[0]
            pattern_r = pattern_r[1:]
            s_i = letters.index(s)
            top_i, bottom_i = top, bottom
            #print(np.shape(Count))
            for i in range(len(Count)):
                if Count[s_i][i] != 0:
                    top_i = Count[s_i][i]
                    break
            bottom_i = Count[s_i][bottom]
           
            top = last.index(s) + Count[s_i][top_i]
            bottom = last.index(s) + Count[s_i][bottom_i]
#             if s in last[top:bottom+1]:
#                 lastColumn_top2bottom = last[top:bottom+1]
#                 topIndex = lastColumn_top2bottom.index(s)
#                 joined_lastColumn_top2bottom = ''.join(lastColumn_top2bottom)
#                 bottomIndex = joined_lastColumn_top2bottom.rfind(s)
#                 print(topIndex,bottomIndex)
#                 top = LtoF[topIndex]
#                 bottom = LtoF[bottomIndex]
#             else:
#                 print("p-correct")
#                 return 0
        else:
            #print("correct")
            return bottom - top + 1
    #print("incorrect")
    return 0

def length_of_patterns(string,last):
    last = list(last)
    first = sorted(last)
    string = string.split()
    nums = [i for i in range(len(list(first)))]
    x = []
    for i in range(len(first)):
        for j in range(len(first)):
            if last[i] == first[j]:
                first[j] = '.'
                x.append(j)
                break
    #print(nums)
    first = sorted(last)
    #print(first)
    #print(last)
    #print(x)
    #print()
    Count_matrix = creat_counter(last)
    Count_matrix = np.transpose(Count_matrix)
    patterns_length = []
    for word in string:
        word = list(word)
        word.reverse()
        #print(np.shape(Count_matrix))
        patterns_length.append(BWM_matching(first,last,word,x,Count_matrix))
    print(patterns_length)
    
string = "CGAGTTGGGC GAGTCGGGCC CGTAACGCGG TTTTGATCGT GTGTAGCATT GACATCTCGA AGGTCTGGGG TGAACAGGTC CAGCGGATGT TCCGGCATTG GAATCTCTAC GCCAATTCGG AGACGAGATG GATAGGACAC GACGAGGCAC CTAACCTGGA ATTGCTCGAC ACACCTAGTT GTAATCTGCG ATCCCCAACC GCAAATAGCC AGCCATAGCA ACACTTCTTC CCAGCCGCTG TTAAAAATAT GGTGGTGGGG CTAGTGCCAC ACAGAGATTT ACAAGGATCT GAAGGGTCAC ATTAAATGCT TCAAGTATCT GGTTACATAC TGCCCTACCC GGGAAGGTAG CTGTGGGGAT CTCCCCGATG TACACCGCCA AACAACGCTG TGCCGCAGAA TCAAGCCCTC CGTTTGATGT TGATAGGACA CGGTCTCGCT ATCGCTTTCG CATGGGGGCA ATCTTCCTGA CTACGATTGA TCATAAGTCT GTAATATGCT CAAAAGATTG CAAGCTCCAA TACTGCGCCC AATGTAATCC AACCGGTCGT CCCTTCTTAA CCCTTCTTAC TGTGTGGACG TCGTCTACAG CCTATACTGA TGCACTTCGC GCTGTTGGAG ATATCCAGAT CGCCGTCTGG CGAATGACCA TGACTTCCTT ACATATGGCT CATGCCAGGA CCATCCCGCT GGGGATCGGG GCACGACATC TAAGATAACA GCCCTATGCG AAGCGTAGTT CTGACAGTAA CTCGCACAGT AGTAGTTCTT TCATGTAACA AATGTGAGGC GTACTAACGC TCAAGACCAG AAGCGGAGCT GGGTGGCTTT TTTCGGAACA AAAGGGGAAG TCTGACGGTC TGTAAGTTAT TTCGAACTCA CTAAATAACA TATGTGGCGA TTGATGTGCC AGGGGAATTG ATAGACTAGG GTAAATGATG CACGGCTTAT TGATTGTCCG ACAGCTTCTA AAGACTCATT GGTACATCCC CCGGTCGCGT AACCATCACC TGGTGTTTCA TTACGAGATA CCGCCAGGCA TAGAATTTGC CTGAATACTC TGGGAACATT CCGCATCACT AGGCCTTATC TACATCCCTG ATTAAGGCTA ATCCCGGCCT ATTTCGGTAG AGGAGTCCGA TCTTACTGAG GCTCTTCGTT CACACGAACG ACTGCCTGTC GGTAAAACCT GTTTGACGTG TGTAGCTTTA AACCCGCCCC GTACGGCGCG GGTTTGTCAA CGAGACGTCT TAGCCCGGAA ACCCGAGTGG ACGATTAAAG CTGGCCGTTA GCGTCCGCTG ATGTTTGTAA CAAGTCACGA CTTTTTAATA CCTACACTTC AACTAAAAAG CGCTAGCGCT CTGGAGCTTA TCGCTTAGAA CATTTCGGCA GCTTTATCAA ATAAGTTCCA CTTGGAGAGC ATGGACAAAC TTCCTGATGT GGCAGCGTCA ATTACGAGGA CCCCGTTTAC GTACTGCCCT TGCCGAGCTA AGCCGACCAG ATAAACCGAG GCTATACCTT GCAATTGACA GGTCGCGTCC GTGTTAATAG TTGAATCCGC CGCCAAACTC GAGCTGAAGG AGCGCGCGGA CGAGTCCGTG ATGGTTGTAA TGGTTCGCTC AAATCACCTT GACCGATGAT AGATTTCAAT GTTTTACTGA TAGAGCTGAT TAGTCAACGG ATGTCCCACA TGAGGGAAAG AGCGTTATTT CTTAGACGTG TGCCGAGTAT TCTGGTGTCT GGAAATCCCC CTCACCATAC CAAGCATGTA TACTAAACCT CCCTCTGACG TGTTCGAGAA CTCCTCAATG CGTGATAAAG GGCTGAGCGG CTCTGATGTC TTACAGCGTG GTACTTAGTA ACGTCCGATA TGTAATTCTC GTCAAACTAA TAAGAGAGGA TTCTCTCAAT GCTGAATTAC CGAGAGCGCT CCTTGGTGGC TTTAGGCCCC CCTTGATGAA AAGGACATTT CTGTTGGTCG CTCTGGCGAT AAGACGTCGC CTACGTGGGC AATACTGGCA GTACACGGTC CGTGCACGTT CCGTAATGAG AGCAGTTCCC GTGACACCGC TTGTGCGCAT GTCAACCGTA ATTTGCTTCC TCCCCATTGA TCACATATGT CAAAAGGTCA CTTGAAGGAC GGTCCGCCAG ACGCCATCAC CCTCATACGT ACTTTGCAAG TCCGGAAGAC CACGATGTTC CTAACTCGTC CACGGCATGA TAACCCTGAT CAAAATGAAA GTCTCGGTCT AGTTCATGGT TAGGACTAAA AAGGACGTGC ATTAGTTTTT GTCGAGTATC TTAGAACCTG CCTTCACTGT ACGCCACCTA GATACAGCTT CGTAGGCTAG ATCCCTCATG CCCCAACACA AAAACTGCAC AGCGTCATTA CGTAAATCTC GTAACCGTCG ATTCTACCCT GAGAGGTTAC TAAGGGGGGA TTCCGGTCTG GCATGTACTG ACATCTAAGA AGTTCCCCTG GTCCTCGCTG AGAGTAGGTA TGCCGGCAGC AAAGAGGGGT ATACACAATA CGTATGCGGC ACCCTTGCTC CATGAGCGAG AGGCTAGAAC TTGATGTGTT TGTCGCTGGT TCAGTACGAA TTTTATTATA AATGTCACAT GACCCTGTCG GCAAGCTGAT TGGTCGTGGC ACCGCCCGTA GTTTGTCGCT TACATCACGC GGTGGTACAT TGACGTCTTG GGGTGTGAGA CTTTTATCTC CACCTTTTTC AACCTGGAAG CTGGAGTTTG CTCATTTCAA AAAGGCAGGG GAGGTCGTTC CTACGTTTCG AATTTTATCC TACCGGCAGT GGTAAGCGTT GCGGGAGCCA AGCTTAACTG CCTCTATCAA AGACGTGCGA AACCATAAAG GGAGCACTGT ACTGCCAATA TGCCGGAGCC GTAAACAGTT TAGCTCCGCA GACTCATTTT ATTGGCTTAG CTCAGCTGAC TCGCCCCCAT AAGACAACCT AATTAAATCG TTTGTTCGCG AGAAACAACC TAGTCGCTTG AGGGTCTGTG CGAATGCTTA ACACACTTGG CCAAACTCCA TCGCTCCCAT TATCGCACCG CCGTTCCCGT CCACGATTGG CGCACTCGTC CGGGGTAACA TGGCCGTGAG GGCGACCCGC ACTCAAACGC TTGCACATGG TCTCAGTGGG TAATCTCTCC GTTGCGCCGG AGCCGGCAGG TGGCGGGACG CCAACTCTCC AGCTCGCATC GATTTGGATA CTTGTCGACG GCGCGAGACC CAATACTAGC GCAAATTCTT CGTGAGAAGG CTACGGATTA TTCTTCCCGG CGTAGCTTTG CGTACACCCA ACTTTGACGG AGAGTGGGAA TGGTTTGGGT GCACAGTTAC CGATCGGCGG AAGCACGACA CAGAGGAAAA TTCAGCAATG GTCGATGGAG GTACGACTCT CGTCAGTTTC AGCCTACACT CCGTATCGAA TCTCGTAGTC CGAACGGATA CATTCACTGC AAGCGATTAC TTGCAAGAGC AAACTGTTCG TCCGCAGGCT CACTTCATGC AGCTGTTTAG CCCACGATTT TGTCCGTGTT TTGGAGGCCT GGTTTTACAA ATCAAATTCA TGGAAACAGT CCCTGCCCCC CCTTCTTCCA CGCGTCATTC TCTTACCGTG TGCCAAATTC GAGTGTCCGA TTAAACTCCG TAGCTGCCGG TCCGTGCGCA GGGCTGAGCG AGTCTTGTGC GAACCGCTGG CTCCCGCCAG GTGACATCTT GGCGCGGCTT TGTCCTCTCA AGTCCGGCAT TCCTGAAACT CTACAGGCCA CGAGGGCCCA CCGAATAGCG AATCTACATA GGTGTCTTGG CCGGTACACC ACAATCGGAT ACCTGGAAGT AGGATGCCTA CGAGACCAAT TGATAACAAA CCAGGTTAAC CCCACTTCAA GAGCTAGACC GGTTCACTAT CTGGTTAGGC TCGTGAAGTT CCTTCCTTGC GGGTGTATCA TTATTAGCGA GCAGAAACAA CGGTACCTAA CTCTGCACTT TCGAGACGAT GATTACATCC TCTCAGGTCG TCCATTGCGC TGTACATTAA AGAACGTGGT CTCGGAAGTC TATTTAGTCT CTGTTTCTCC TCCGGCTGCA ATTCCATAAT GTCTGGTAGG CTTGTTGCGC CTTTAGAGAT TCGTGACGAG TGCCTTATTA AATCACGACT ATCCTGATAG GCGGCCCAGA AGTTTGTTCG AACCGGCCGC AGAGCTGATT CCATGTGGAA ACGTCGAGGG GTAATCTCTC GAAGTCGTCT CGGACGTCAC AGACTGGGAT TAGACTCAGC CCAAGAGGGC AACTGACTAT TGCACGCGGG CACCCTAGGA CCGCAGTTAA GACTTACTAC ATCACTTTGA CCACATCGTC ATAAACCGCG CCTCTTATAA CCATAAAGTT ATCGCATATG CGGCTATGAT CACGACTTAC CCTTACTGAC CGATCGCATA GGGCCGGGTG GATGGGCAAA GTCCCGCTCT CAATCTCGCC TGCAGGTCAC TAAACTTACA TGTGCTTACA TGCGCTCTAG CTGCTACACA GGTCTATGGC TACAAGGCTG TAGTTCATCT CGTTCTAGAT TTTTGTTCTC CGGAGAAAGC CAAGCCGGTT ACTACCAGCC GAGAAAAAGC TGGGGATTGT CAGTGCTTCC GTGTCTGCCA GGATACCGGA AAACGACCGG CAAAGCTGGA CAATTCTGAT ATAGGCTCGC TCTTGGCCGA ACAGACGGTT GTGTGCTCCC TGGGCTACAC GTAGAAAGCG GGGCGGGGTA TGACCGTAAA TTTGGCCTCG CATAGACGTG TAAGGCCTTG TTCGCGTAGC TCCGCAACAG CTGCTTCAAC ATCATGCAGG AAGCCCCTCG GGCAGGGCGA GATTGAATTG CAATCCCTCC AATTCGGTCA TAGGGAGCTC TTTAGATGCA GTGAGCCATA TCGGGATGCG GAGCAATCTT GGACTCCCCG GTAGCCGGCA GCCCCTATCA CATATAGGCT TAGAAGGTGC CTCAGAGTAT ATTTACTAAC GTATTCGGGA TTATCAATAG CAAGGTGTAA TGCAGTATCT AGTAACAACG CGGAATCTTT CCGCGTTAGT CGCCTTAGAA AGTAATCTCT CTGTACTGCC GCTTGTTGCG TGAGGCGTAG AGCTATATGT TAAACCTTAC TTTTGAAGTG TGATTGTATA AAATACGTCA CGTGCCCAAT AAAAAGCACG ACCGGTGTCC TCCACTGTCC CGATGCGCAC GAACGATGCA GGACCCTACT ACCAGGTGGC GGATGGCACG AAATGCCGCC TGTTGCGCCG GACCAAGAAG GCAATCTCGC CCTCCACGCA GGCTCGCTCT CTCAGCTGCG TGCATTGTGC ATTCTACGAA CCCTATATAT GAGGAGTCAG TAAGTCTGCT CTCTTTGCCC TAAAGTGTTA GGTTTAGATG CACCGTTGCG GACTTCCATG TTCTAGGATC ATGGTGTGCC AAAGCTGTTG TGTGACAGGC GATGTCACGC ATTAAGCAGC TATGAGCAAA TCTACCTAAG TCAAAGATAT TCGGGCTCAG TACCGTTCCG AGCAATCTCG AACATATAGG GTGCGCACAT TTCCCGGTGT GGCTTGTGAT ACATCGGGCA TGGCGGAGCC CATTGCTTTT TTGCGCCGGC CCCCCTTATC CAGTCCTCAT TATAGGCTCG CACCGATACC TCACTGCACC CTAGAGTAAG ACGAATTTTG GTATCAACAT GCCGGGCTGC GTTCATGGTA CTAAAGATCT AGTTCCCGTA TGCAGTCAGG TAAAAAAGTA TGACTGCTCA CTGAAACTTA TCTGCCATAC CCCTACCACG AGTGATTACA GTTCCGTTCC AGCCCTCTAT TCGGCCACAC GAGGTGCGAG GGGCCGTAAT TGCCCAATAT GCCGGAGCAG AGGTCCCAAG CCTGGGTTTT GCTCCGATCT GTGCTTACAT AGTTTGTCTA GGACGTCGAT TCGACCGGCG TATGGATTCT GGACATTAAC GCCCTGCATA TAATCGGCAG TGGACAGTCT CACTCCTTTG CAATCTTCCT TCCTGTATGT ACGCTGCGTT GCCCCACATT CGTTTTATCC GTGTACGAAA CTTTTCAGTC GACGTCTTGA ATGAAGCAGC GCGACTGACC CCCTCCCGAG CAGCTCAACA TTAACAGAAA ACTTTAAGTC CAAGGTGCGA CTGGGGACGA ATCTGCACAA ACCCTCCCTG CAATGGGACG CGCATCACTC GCTAACTTAG GAGCTCAGCA TGTATTAAAC TAGGAACCGA AGAACAACCC TAGCTATACG CGCTGCAAGG TCCCTAGTGC GTGTAACTTG CCTAGGTATT ACGACCACAG CCGTGATAAA AACACTGTGG ACAAGCGTAG AAGCAGTTCC AGGTTTTACA TTGCCCCAGA CTTTGGAGCC TTCGGTCAAT TTCGTCTCAG TTCGCTTAGA TCCTTGTACG CCGCGCTTCA CATGCAGGCA AACTAGGGGC GCAGAGATTT CAACCGAATG ATAGCCAGGA GATAGCGTCT TTCTGCCCGT TTGGAGAGCT GCTGAGTGAA GCTGCACCGA CTATTAACTA ATGAAAGCTG CCCCTTGTAA CCGTAGCCGA AAGACCTTTT CGGCTGCCGG GATCGCTCCC CCACGATTTT CAGTTTAGGT CAAATTCTTC CAGTCCCGCT ACGTGGCGGG ATAATCACGA CTCGAGTCCG TGCTTCCGGG CACTTTGAAT GGTTCTCGTG TAAAAAGCCA AGGCTTATCA CGGGAATATT CAGCAGATAG ACGTGATCTT GAACTTGTCG CCAGGCACAC GAACTTGTCA CGGGAATATA TGAATACGAC CTTGCAAGCG TCATTCTCCG AGAAGTCGCG TCCTAATTTA GGGCAGGACC ACTACCGCTG TGGGCTTCAT AATTGTTAAT ATTGTATGTT GGCATCTGTA TCCTGATGAG AATCATTAAC AAGACCGTGG CGGCATTGCA ACCAGTCTCT AAATTCTGAT CACAGGGGTG AGGTAAACTC CACTAGATAC GCTGCATTGG GAAACCACCT TGCAATCAGA AAGAGGGGTG GATCTCTCAC GAGGTATTGC CAGCGATAGA ATAAGGCGCC ATCTGCGAAC TATCGTGGTT GGCAAAAACA TTAAGTAGGG TGGTGCCGGC AGCTCACCAA ACTAGCTAGT AAAACCATGT CGTCACACAG GGAGCGAAAG CTTTACCTTC GGCGGAAGTG TTCGATATTT CGATCCGATC TCTAGCGGGC CCTTTAAATC AGCTTAATGT AAGTAACCTC CCCTGTCCTA TACGGAAGTA GAGAAAAGCT TAACTATGGA ATTGGTAACT CAATGAAATG TCTATGGTCC TCCGGCTGTT TTAAGTTGCG CCGTAAGTCC CTAACCTATC TAGCACTGAA CATGCGATGT GGCCTAGTCT CGGCAGTATA TCAAAGGCCA TACCTTTGAC TAGGATCGCT TGTGAACCGG CATACAGGTG GTAAGACGAG CACTGTATAC AAGAGGGCTG ATCGTTGTCT GTAAGGGATG AATACCTATT CTTCTAGCGA GTACAGGTTC TCTACAGTTC CTTTGCTGGC CCTAACCTAT TGCGAGGGTC ACGGCGAGTC GCTGCGGGTA CATAGAGAGG CAAAGCATAG TTCCGTGGCG ACGTGCCCTG TAATTGATTC GGGGCTTAGG GCTACGACGT CCGGGGCCGT GGCCCACTTC GTTTCAGACA AACGGAGATA ATGAGTTAAG AATGTTCGCC ATAAAGGACG TGAAACTCCC TTTCTGTTTC GCCCTTTAGA CTTTTGGCTT TCACAGGCAG GTCGTGCGAT TAGCCGTAGG GGTGCGAAGT AACCTTCTAG CTCCCCACTT CAACTGGAGG AGGGCTTTAT AGTGGCTGTC TGGTCATAGT GTCTACTGAG TCCTGCGATT TATGATCGTC TCACTTTGAC CGTATTACGA CTTTCGGCTT TTCCGCCATA TTTCTCCTTA CAATGGGTCG CGAATGGACA GGGAACATTC TGGGCAGCTT CGTGTAAGCA CGTACTTTTC CGCTTTTTGT GTCTATGGCG GTGACTGCCT GTCATTTTTG TATGCCCAAG ATTCTGGCGG CCAGGGATTA AATAGGCAAT GCCGAGTATC TAACCGGAAG ATCTCTGCTC GAGCCAGCAT TAACCGGAAT CATCTCGACC CTACCAGGTC TCTTTTATCT ACCATCCGCT ATCGTATGAG AGTCTGAAAG AACGCCTGGG AGCCACCGAG TTAGTAGCGT GGTGCTACTC AGGCCATTCC GGCTGCCTTT CTACACGTGT TGGAATCAAA CTCAACTGGT GGAGCACTCC CTGTCGCAGC CGCGTTAGTG CCTGTGTGAT GGCGCTAATT ACAACCTGAT ATAAAGTTGT TTCCCATCGC ACCATAATTC AACTTCGCGG ACGCGCGGAA GGGTAGGCTC CCGGGGCAGC GGGTGTCTGG GAGTGACATC GCAGAGTAGG AGTTGTCGGC TAACCGTCGA CAACCCGTGC AAACATACGG GGCCATACAT GACTTTGGGA GCTCATCCCG GAGCAATGAG TCTAATCACG CGAAATTTAA TTTTGTGGCG ATCGGCCCTG TAGGGGAGGC ACCCAGAGGC CATCTGGTGG AGTCTCTTTT TCGAAAGACC TTCTGATCGT CAGAGGCCAT TTGGTCTATG TATTGCCAAT GCCTGTTACT ACGGAGCCTG AGATACTTAT TTACAAATTG TAGTGCGGCC CGAGGGTGCC CTACAGCACC CAATCCCGCA CAGCGTCTCG TGGAAGCAAG TCTGTTAGTC GAACGTACAC GACAGGACAG GTAAGTCTGA GTATGTGTCT AGGTTAATGT GACGCAGCCT TCACTCATGC AGGTGCGAAA GTTCGCGTTT CAGGTGGTAC GGAGAGTTGT GCACACGTAC ACACGGCTTG GCACTGAACG TTTAAGTTTT CGTTTTATAC TGTATTGTGG CAGAAGGACG CCGAACTATA GGATTACCGG CTGTCTAGAT CATGTGTCCA TGATGAACCT TGCCGAGCTT GGAAAACGTC AATCCGCAGG GATACTGCAA AAGCTATATA CACCACCTAT GGGGCTGCAA GAGGCAGTGA GTCCTCTGCA CGACAAAGCG TACTTGCACG TTACAGTACA TCTGCGCCTG GGACGCAATC GATGATGGTG TTCTAATTTG ATTTCATGTC TGATGTGGGT GGACCTACTT GATGGCCAGA TCTGCATTCC AATGGAAGGC AAGCACTCCC GATCCGATTG CTCCATGGGG ACCGATGATG AACGTGGTCC CTAAACATCA CTCCCTGCGC GAGAAACTCA GGTCGGGTCC CCCTCTGTAG ACCCACGATT AGGCAATTGA TTTGGCCTCT GCGCAAACGA ACCGCTACCT GTGTTCGTTA TATCGCCGTG CTGTCCTCTC GGTAGTTTGA ATTTTTTATT TTCGAACAAC AGCCGTCCAG TCGGCAGAGT TGTTTATATC ATCCATTCTG GTCTCAAGTT TTGTCGGCAC TCGTGGCAAA GTGAGTCGGG ATATGGTGGT CTGTCAATGG GGGAGTGGTC ATTTTTGAGA TATGCGCCTC AGAACATGTG GGCTTGACCG TATGACCTTT CTAAAGTAAT TACTGCAGTC ACAAATTGCA GCATGGTTTC AAGGCCTTGA GGCAAGACAG TAACAGCTTC AAGACCGCAT TAAGTTATTC TTTATCCGGC TCAGGAACTG CATTATGGTT GCAACTCTCA CACTCTGAAT CAGTTAGTTC ACCACGACAA TGAATTGGAA TCCTACGCTC CTAAGGTGCC GACGTTTGAG AACGGCGGTC TGGAAAGCGT CGTAAGAAGC GAAATCAGCA CTGATAGGCG CTGTCCGTGC CCACTACCCT GATCCATTCT GTTGTAATTC GCGCTCCTAG ACGTACACTG CGTCAGAGAC CCTTCTGAAG GCGCTCTTTC TCGAGACCAC AGCGGATGGT ATCTTACCGT GGATTCTGAA ATCCCATCCC CGGAGCCGAT CTTACCCTAG TCCTACCGGC GACGACGAGC ATACTCCGAT CTGTGATTCC CGAAAAAAGG GCCGCCAAAC TGCTCAGCTG ATCAACCGCA AAATGACAGG GGACGTTTTC GCCGCTGACG CCCAGATTAG TACAGGACAC CTTCATTTAG GGAGGAACCT TGGCTTAGAC AGCGACAATG CCTCAGTGAA TGAAACCGAA GTTGCAGTAC CGGGATATTT ATCTAAACCG AATGGGCAGC ACGTGTAAGC AGAGAATGGA GGATACTGCA TGGAGAGCTA CGTATACCTA AAGACAGCCG TCCCGCCAGA ACCCTCCCGA GGAGCGTACG GATACATACA ACCGTCAATA TACATCCCAA GGGCTCAGGG GTCCGTGGGC CATATATTAA GATGGCCGTG GATTCCTCAT AATCGGTCTT ACACGGATTC CCCTAGGATC AGGACCAGTT ACCACCATGT GCATCAATAG ATTATCATCG TGTGCGAGAA CGCAGGATAA TATCCAGAGC AAACGTAGGC GGTAGTAACG AATCGTAACT AAGGGTCGGG ATTCCTTGAG AATACGTTAT GAAATTGACT ACTCGTTATG TAAATGAGGT GTTATCGTCA GGACAAAGCT AACTGCACAA AGCACTCGAT GTCTCGCTAG AAATTAAGGT ACAACAGACT AGCATGTAAA TTCGACCTCA AGACCTGAAA TCCTAGTGGA AGGAGGGCTA GTGAGTGGCA TAAGTACACA AAGACACGAG TGCCGTACCT ATTGAAACCC TAGAGCAAAT CTCCGTCCCA CAGCTCCAGT CTTCTGAAGA CAGGAACTGA TACTAATGTG GCTACGCGCC AAAGACTCCC TCCACGTAGC CTGAGCAACG CCGCAGGCTT GGGTGGCCCT TGACGGACTT AATAGGCAGA CGTTGACTAG GAGAGTGACA TAGCGTTGGA CTCTCCGCAG GTGTATAAGG CGCCATAAAG TTTTGTTGAC ATTCCTAATC TGGAGGTTGG ATTCTGATCG TGCGTTGCGA TCCATGTATG CCAAATGTTG AAGGCCCTGT GGCACACTGC TTCCTTGAAC AGTTCAATTC CTATTACAAC CTCAAGACTT CATCGAGAGG GTCGAACGCA CGCATCCTCT GCACATCAAA TACAGGTTTA ATGGCGTCAC CTCCAAGTTA CAATGCCGCA GCATTGCACC GCAAGAATTT ATCGGGAGAT GAATCACGAC CAGGAGAGTT GCGAGAAAAG TAGTACGTCG GGTGAGACTA CGTTAGTGAT TCCCGTTTGA TACGGGAACA CTATACCATG TTCCCAAATT GGAAAAGGGT TCTGCACTTC GACTTGCTCA CATATGTTGA CAAGCGTAGT GGACTTGTCG TTCATCATGC GGCCACTCCC AAGTCGGGCT TTGCCTGGGA GCTCGCTCTG GGACCAGTCG GTCTAATAGT AGAGTTGTGG TGAACGTGGA GCCCAATTTG TCTCTGAATG CAGGCAACCG AACATTTCGA TCGCACTCGT GGTTAGTTAG CAGGCTTATC TGGGACTGCA GTCCGTGCGC ATTTAGACAA ACACGGAACA TTGATATCAT ATACAGGACG GTGTCGTGGT ACCCGGTCGA GGGCCACCGA ATTGATGTGT ACGACAATAG CGTCTCAGTG GTTACCCTGC CTCGCATCAA AGGAACTGAC AGGAACTGAG GAGTTGTGAA AGGTATTGCT TCTCCGCCTC GTTGTAACTG ACGAGTTGGG AGATCGCTCA CCCGTCTAAG TGGCAAGGTG TTACGTAAGC TATGCCAGCT TAGAAAGCGC CGGGAAGGAC GAAGGACGTG CAATAGACTC AATAAAACTC ACGTACCGGA AACGACAAGC CAGACCATCC GAAGGCATGA TAGTGTCGTA CAAGTGGGAA GTCGAAAGAC CGATGGGTTC TTTATACGTA GTCAGTTGGA GCTGACCAGT GGGTCATGGC TTAGAGTTAA CACTATGTTA AAGGCCTGGG TTATCGTCCC TTAACTAGAA TCAAACATTT TCCTTGTAGT CCCAGGGGGT AATGGGGTGG TCGGATACGC TGATAAAGGA GAGAGTTCAG GTCATGTAAA TACGCTCTGA AGTAACGACG CTAGTAGGGG GTTAGTGATT GTAGCTCCCT GTCTGGAACG TGCCTTCTAT GATCATACCG CCCACTACCA TAGAAGCAGT AGTGTTGTAC TATAGATTCA CACCCTCCCG TCCATTCTGG AAATGTCATG AGTTCAAAGC ATTAGCTGCC TGAGCTTGGA GCCTCCGGCT ATGCTACGAG GTAACATGGC TCTACTAGAC TGAGCTCTCG GAGCTAGGGG AATAGATAAG CACTATATTG GCTGCGATTA GCTCCTGTAC GATGTACGCG CGACCCGTCA TGTAGACAGT CCCTCAGTTT AGACTCCCGC TAAAGGTTTG GACACACGGT AACCCATGCG AGACCGGTAC CTTCGAGTAA AGACGCCCAG ACAGATAATC TTGACGTCGG AAAAGTACTA TCTAAACTCC CCATGGGGAC GGGGTGTGAG GCGGAGAGCT TAACACGATG TTCCACGTTG AAGGAAAAGT AATCTCTCCG GAACTTCGGC CACTTTGTCC GGAATAAGTG TTGAAAAGGA AAGATCTGCT GACGAGTACA TATAGCGCCC AGTTCCCTAC CGGACAAATT GATGTTTGTA GCCCGATTCC GGCGATAGTC AATGGAGTGG GCGAAGGCTG AGCCGCAAGG TTTTCAGTAA ACGATGTCAG GCAAATTCCT AATAATTAGC TTAGTTAGCA ATCAATAGAT GTACTATTGA GTGATCGGTA TGCATATGTC CCGTTGATAG TGCACAGAAT CTAACGAGCA TCATGTTCGC ACCAATGAAA CGGCTCTGGG TGCGCTGTTG AGAGGCACAC GGTGCTGCCT GCGCGCAAGT TCATTTTTGC TAGTAGCTGT TAGAACATCA CAGTTCAGAA TGCGGCATGG CCCTTGCGTA CTTGCGAATC ATAGATGCAG ATGAAGTAGG TCTATGGTGT ATATATCAAG TCGGGTCCAA TTAATGATAA TATATTAGAG ACCCCTGTTC GCACTCGTCA TAGCTAGTAA CATTTGTTTG CGATCAACCG CCCTTGGAAT ATGATTATTA CTGCAAATAG AGGGGCGGCG GGGTTTGTAC ACGGGAGAAA CGGCTCAGCG TTATTCAACG ACTTATAGGA CGAAAGGGGG TGATTAACTG ATAGCCCGGA TTAACTGCTA TTACCTAACA GGGAAAACAC GGTTAGTAGC CAGAAACAAC ACGCGCGCAA CCGACGCTAA CTGAGGAGTC GTATGTTTAG GGTTCGCCCG CAATTCGAGG GGTGTCGGAT AATGAAATGA GAATCTTCTT ATCACTGACA GGCTTAGACG GGTAGGCGAT TATAGTCACA GGTGCTAACT TTACACAGCA TCGTCGTGCC GAGCCCTAGC TTCGACCGCT CAATCTTCAG CTTGCCGACT GCAACCGGGT TCTTATGACT GGCATAACGT GTGTTTCTCC AGTTCTACCG AGAAAGCGCA CACGGACCTA CATTAGAAGG TACGGGCAAA ATGCGAATAC CTAATACGTT AACTATGATT GACCATCTTC CATTTTTGCC GAAACTCATA AAAGGGGGGT TCTTCGTAAC AGAAGGACGT CCTTACGTGT AACACCCTCC CAGCTTGTTC TTGGCATGCA GACTCGAGGC AGACAGCCGG TGAGCGAGTT AAAGCTCTAC CATGTAAATA CAACGGTCTG GACAAGCGTA CTCAGTGTCA CCCTGCGTCC GCAGGCTTAT TGGGCCGGCA TTCTATCGAA GAGACCCAAT ACCCGGTTGG TCATAAACCG CCGGAGGTAT TATAGCAACG GGACGTGCGC ATGTGGCTCT GAAGTTCACT ACCTCTCCGC TTCGTGGTGG CTCCCGCGTC TCCCACAGAA TTAAGTGCGC ATGGTCATGG CAAGCCAGTC AAGTTATTCC CTAAGCGTTA GAGACAGGCC GGGGACTCTC ATCGAGACAC ATATGTAACC GAAACCTGGA TTGATGGGTG AATGGGCCCC CCTAACTCAG GTTTTTGACA ACGTTGATTG GCGATGCCAT TCATACGATC TTGAATGGCG TGCGCACGCG CGGTGAACAA CTTCCGCTCA CCTCTATCCT TCATTGGATC GATGGTCTGC TTACGGGGTC AGCTCTCGCA TCGATTCCGT GGAGATTGAA GTAAAAGGAG GGACTCTCGA ATATGAAGAG AGACAGGTTG CAGTCATCCA AGAACAGACG GGAGCGGAAC ATGACACGAC TGTGTCATTT TTTTGTGAGA CGCCGACCAA CGGTAAAACC TTTTGATCAC TTAGCAAATA GTTATGGAAT CTCAGATTCA GAAGGACCAG CGTGAGAAAG TTGATAAATG ACGAGCATTG GCGCAATCGT TGTTAGTATG GGCTAATTTT TCTCATCAAG GGATGCTACC GTGACTATGC GCTCGTAGTT TTACTTGGTC GTAACTGCAC TATTTAGCCG ACCATTGAGC CCTCATCGAA CCCTCTCGGA CGACATCTCG GGCATTGTAC GATATCCCGC AAGGGTATCG ACACAGAGGC CCTGAAACTG CGGGGCCGTA GCTACCTTTG TAAGTTGTCT AGCCCAATGC TTAACAATAA ACGACAAGAA TCGCCTTCAT CGCCAAATAC GGGCAGGGAC ACCCTCGGAG CCCGACGGGA TGAGAAACTC GTATGACGCA GCTAGTTAGT GAGGTCACTT GGGCGCCGCC CGATAGTCTA GGACTAATAC AGTACGGTCA TTCTCGTCAC CTCGTCCCAC CTCATGGTAG TGGGATGGCT CGCGCGGTTC CTTAACCTCC AAATCGAGGG ACGTCAGCTG CCAGTCCGTC ATCAGGCGAC CGATGTTTCT TCGCCGTGAT TTGTCGTTGC CATGGCCTTG TGGCTACATT ATAACAAATT AACGTCTTTC CGCAGGGATC CCAAGTCACG AGGTCGGGTA TACCAATCTT TATTAGAGAT GTAGTTCGCG CGTTGGGTCT GCCTTAATAT GAGAGTAAGG TACCTCATTC CCTAGTACCG CTTCCACCAT CCCACGCAAT AGCCCGATTG ATGTAGACAT TGGGGATCGG CGAGGTCAGA TGATTTTAGG CACTCCCAGC CCTTTTGTTT AGGGGTGGCT CCACGAGCTT GCTTAGCGAA GAGGCACACG AACTGAATAA TCAGAGGCAC TGCCCGGGGC AACGCCTGTT CCCACCGCCC CGGAATCGAT TACTACGTCG GCGCGACCTG CCACTCCCAG TGGTGCCCCG TGAGTGCCAC ATGTGTCGTA AGCCCTGCGG TTTGTCCTCC AAACTGGTAA GATAGTGGGC CGATGTGGTG GTGCGAAGTA TCAGCAGTAG TATACGGCTC CGAAACGCGC CGAATTTGTT TAGTCAGGAA TTTGAGTGCC TGCCCAACTG TCGTATAGCC CTCTTATTGA CATCTAGTTA AGGGGAACGC TTCTTCATCA GCCAATCCGC CGAGTTCCCT ACTGCAATCA CGTTATGCCA CCAGTTACGC CTTCGATCGC CGTAATGCAC CGGGCAAATT CTACCAGCCA GAGCAAATCG GGGTCATAAG ACCACCTCCG GAGCAAAAAG CAGTTACGCC GCTTTACCTT GGAGGGCAGG GATAGCCCGG CTGTCTACTG CGACCACTTG CCATGATCGC AGTTTGTAAT TTTGACACGC CCAGTCCCGC CTTGGGTTTT GACAAACGTG CACGCTTGAC CACACTTGGA TCGATATGGA GACGGACTCT CACAGAAGGA GTTACATACA AAAGCGCAGC TCACAACGGG CTTGAACTTA GCCGGAAGGA CTTGTCGCTG AAGGACCAGG TTAGTATTCT AAATATAGTC ATTTCGCTTC AAGCCATCCA GTAAATCGTC TGTATAAACT CTAACAAAAC GCTAGTCCGC TAGAGCCGTC GTCGACAAGC TTATGGCGTC AGCGCGCAGC CTGATTGTAT CCGCCGTCTG CTTATCTGAA ATGACGTAGA AGTTTAGGTG ATATGCTCCG CTGACTTCCT CGCCTTTCCT ATAGTTAGCG GGTGCGAAAT CGCGTATGCA GCTTCGTTGT TGTCAAAGGG CTCAAAGAGA CCTGCCGCGA CTCAAGTAGC AATCTTTTCT AGGGTTCAGT AGTTCGCTGG TCCCATCCCG AAGCCTGCGA GGTCAATACT GCTATATACC ATTAGGGGCT CATCGCGGCC GTCGAGACCA AACCTTATTA AGCTACCGAG AAGCCCTCCC GCCGATTCCG CATCCGTTAA GATACCTCGA GTTCCGTGGC AGCAAGTCAA GATGTGTGAC GATGTGTGAA GGCTGGCCAC TCCTAAAGTC ACATGAGGGT TCCCTCCCGT ATGTCCCACC ATGTTCGGGA CATCACTTTG GGTTAACCGG AAGTTTTCGT CATTCACGAA TTGGTAAAGA CGGCCCTGCC TACTTCATAA GTAGGGCCAA TTTCTACACG CTTGCAGAAA GACGACGGAC CGATTACCTC GGTGAGGGGA CTACAGTTCA AGTTATATAG CACCGCGCCG ATGTAAATAC TTGCCCGTAA AGTTGCGGTA AAGCTACACG TGAACTTTAA CCGTGAGCTT ACCACTCCTT CCCTGCTCAA TGTCTCGTAG TCTTATTTAG AGGAGAGGGA GCTATGAAGC CGCCAGAGTA ACCGTCGAAA TCGGTTGTGT GGGTGCCGAG CTGTGTGAGT GCTGCAAAGT CGGCTGTTTA TACTTTGCAA AAGCAGCGTC GATAATGATT ACACGCTTTA GTCTGAAAGC GTCGCGGCCG CAGGCTAATC GTCGTGGAAA GTCAAGCTCC AGGCGTAGTC CGCGGCCGAT AGGCTTCGAC TTACAGTCGA TCCCACGGCA CCGAATCTCT GGTGACATGA GAGACGGCAC TAGAATAGAC TAGAGTACTA ACTACGAGAT AGCCAGTCCC GTTACGCCTT TCCTACGGAA CTGATGCAAC AGTATCAGCC TCGGTCTAAA CTTCATTACA CCCCCAGAGG TTAACCGCTC GCGAACCTCT CTTCCTGATG TGTTACGCCG CCCAGCTTTA GTGCCTCCAG ATGGGCATCC ATGGCCCATC GTGTCACTGC AATAGCCATA AAACCCAAGT CCGCCAGAGT GTAAGCTCCA GTTAGTAGCG AGCAAATAGC TTATAGAACA TTATACGTAG TATGGCGAAC GACAGGACTT GTCGTGGTAC TCATGTAAAC CCTCCCGCGT GAGTTGACGA GGTCCAAATT TTATGTCCCA AGCTCGGGGT ATGGGGGCAA TCATATACGG ATGGCATGTG CGGTCCTTCG TCGTATTTAG CAATAGATAA TATCCCATCC AGGAGAGTTG ACGTAGGTGG AAAGGTTGAG TAAGGCTAGA TAACCACAAC GCCACGGAAG CCCCAGCGAA TAACTTACCA GGGGTGTCTC TGAAAGCTGT GAATATTCAA GTCCTTACGC ACAGCGGAGC GACGGACTTC TCAGTTTAGG CCCAATAGGG CCGCGCTACT CGCTCCTAGG TTTTCCGTAT GAGTGCCGCA ATTGGAGTTA GATTTTGTGA AAGGCGAGTC GGACGTAACC CATACATTCG TGACATATAT CCTTAGTACA ACTGCGGATG GAGGTCGTCA TGGAATATGC GGTGCGTTGA AGAGGAATAC CACATCTTCC TCGTGAGGGG ATTCTAAAAT TACCACTGTC GGGACCAGCT AGGAAGACCG ACCCCAGCAG GGACCAAATA CCCACCCGCT GAGTTACCGA TTGGCTGCCC GAAATTTGGA GACCCTTCTG TGCGCAGCAA TGTAAATACC GGCATGACGT ACTTAGTTCC GAGGAAGAGT GCGTGTGAAA AGAGCGCTAC TGTGTGAATG ACCCTTCTCA CAACACGTGG TGCCCATACG TTTGCCCGGA CTCTCACTTT CAGTCTGAAA TACTCAATAG GGTTGGCAAG TCAACACTCT CAAACAGCCG CTGCCTAAGC CCCGCTGTCT GAGTCAAGAT CGGGCGCCTT TTAAACTGTA GCTTGACTGG CGGCACGCAC GAAAGTTTCA ATCGTATATG CGTGGAAGTT TGCACGGACT CCACACCGTA TATCTGAAAC CTATAATGGT GAGCAAAAGC CGCGCGTAGT ATTTGGTAAA AGTCGTCCTC TGGTACATCC ACGCCTAGGC TCGGCTCGGC AAAATTTCTA TAAGGCCTTT ACTAACGTTG AGACCCTAGT GACGAGCTCT CCTCATGGAC GTGTGAATGA GCAATCGTAT AGTAGAAGGT AATTTGGCGT GGCACGACAT GATCTAAGAG GAAAAGGGTC GAAAAGGGTA GTGGGTTCTG AGGGGTGTCT ATGGCCTTGA CCTCAGATCC CTACCAGGAC ATTTAAGGGG CGATGGGGGT TGGAGGACCA CCGGACATTA GTTACATCCC TTGACACGCT ATAGGAGCCC TTCTTACCTG TCGACATCAA CTGCGTACAA AGGCGCCTTA CGAGACGATG CTAGTCTGTG CACCCTCGCA AGTGGACCCC TAGGCTGAAC TCGAGAGCAA TTCACGAAAG GCTCCACCCT ATCAGGCGTC TTAAGGCTAG GCCTTGAACG CGTTCGGGTT TCCTTCAGTA CTACCGGCAG TCCATTTGTT CGTGGCGTAC TCGGGATCAT GGACAACGTT TTCGCCGGCG AGTATTGGGT TACGATGGGC CATGTAACTC AAGGTGCGAA TGCACTACAC AGTGCCAGCG GTGCGGCTCA AATGTCGGGC CATGACGTCA ACAGTCCATC GCCACCAAAG TGCGCGCAGA CATCTCGTCC TGTGAGCGTT GCGAACGGTA GACTGCGCAG TTTTATCTCA CATTATTGGA ACGGGAGTTG CGAGGTTCGC CTTATGATCG GCCGGACATT CCGCACGCGC TTGCATCGGC ATTTCTCTAA CCAATATAGC CGGCAGGCTA TCGCTTCAGA CTGCTTATGA CTGTCCACTG AGGCTAGAGC GCCACCACGC AAGCCAGTCC ATTCCGTCCG AAATAGCCAT TCTGAAAGCC TCGCTAGCGC GTCGATCTCA GCGTCTCCAT CCGGGGCAAC GCGCATGCGG CATGGCTGGC AAATCACTTT CGTCTCCTGC CGCAGAGGCC AGGGCTCGCC CGTAGTCACT GTGCGGCGTC ACTCCTCTGG ATTGTGCGCA GCACCCGTTC ATGATTTAGC"
last = "GGCCGCGATGGTCGGGATCTAGAGTTTACGAATTAGTACTGCGTCGGGGAATTTACGGAGTTGGAATTGTTCCCATAGTGTAGGTACACCGTCATTAGCCCTTCAAAAATCGGTGGTAAGCGAAGACATTGCTGTAGCTAAGGGCTGCTGTCGGATAGGTTATTTAGAAACGATTTTTATGAGCTGGACGGATGCATCCAGAGAAATGGTGCACCTCGAACCTTACATAGGAGCGAGCTTTGTTAAAGTGCGGTCCCGGTTACAATCATCATCGAGCCAACTATGATATGCTGTCATGCGAGTCAACGTTATCATGAACGCGGCGCTATTCCACCTCCTACACACGGTGTACTGCATAATTGGTGATGGCTTAATTGTGATCATTCACGCATCCACCGGGGCGCCACTGAATCGCCTAAATATAACCGCGATTGGGAAGGCTTAGTAAGAGGGAGGGTCATGGGGGACACGCGGCGCGCACGGCAACGTGCCTCTTCTTCTCAGCGAGCTCAAGTATAGATGTGAACTGTTGTGAAGCTTAAAATGATTTAATCTGCCTCACTACCTGCGCTTGTCGGACTGCCCGGTGTATCCAACTCCGGACCTACTAAGCGGCTTTAGGGGACAGGATAGGACGCTTTACCGTTTGGTCCCCCAGTCAACCCATTGAAAGACAGACCTAGCCCATTCTATCAGAGCGAACCCCACTCCAAGGGAAAGGACTCATGCTATCCCCGCCGTATCTCTTTCTATCTTTCATTGGGCGTAAAAGTGAGTGTTAATCCTATAGTCTGCCCCAACCCGACGGAACTGGTACCCGGAGTGCGGCAGGTGTGTCTGGATTGTCAAAAAACACGCCATGTTTGAAGCGTCTGCGATCAGTGTATCTCGGGTACATCCCCTTAACGTATAGCGGCGCGTTCCCACACGCGCAGGTTGAAGGGAATCCTAATGCCGCTTGCTGGCAGAAGGTAGTAGCACGTTACTAGCTTCGGGGGGAACCGACAGCGTTGCGCGTGTAAAATCCTTGCGCAGCAACTTATTGGTGATATACCGCCTGCCACGTCAATGTTGTCGCGTAACGCAGGCCCTCACACGTTGCTTAGCGAGGCCCCTTGTCGTGGCAGGGTCTGGTCCAGTGCCCATCGGGTGCTAAAGGTGGAAGGAACTCAGCCGCGAAGTCTGCATGCACATAACAAATCTCACGCCCTGACTCCAAACTTCAAGTACCAGTTCTCCACGATCTTGCTCGCCTCGAGTGCTGTGAGGTAGGGAGGGGATACCACAGAGTGAGGCCGCCGGATAATATCTCCACTGAGTGGGCAAGCGTGTCCATCAGCTATGTTTGTTCAACGAGCTGAAGTGCCCCTAGATACACGGGACGGGATTGTGACTGACGGTCTGCGGATTCTCGAGGGTCGAC$AAGCGAACAGGACTCGATATCTTCCATTAGGAATACCGCTGCTCGAATCGCGCGCGCTACTACGTAGGAGATTTAGTGTGAGGACTGCGCATGTAGAGGGTGCCACTTTACGTGGCATAGCACGCCCGTCTTGTGTACCAAATTACGGGGTGCAATGCCCGCGTGGCGCCTCGAATTGGTTCCTACGTATGACTAAGAAGAGCCTGTCTACCGAGTTAAATCGCGAGAATTATGGATATTCCTCTAGCCGTTTGCCCATTCCAGAGAGGAGAGAGAGGTTTGCAGTCCGTTCGGTGGGATGGGGTCAATGAGACGACAAGTGACAGCAGATAGTTCGTAGACCAACAGGTTAAGTTAAAAGGGGTATACTACGCCATTCATCGCAGGACTCCGTCACTCGTCCATATAATCAATCCGAACCCAAACATTGCGGGCTTGGACATGCCGCTCATCGGCCAATCCCATGAATACCCGATCCATGAGGGACAGGGGCAGCTTATCCTTTACATACTATTAAAGTTAAGATCGCAATATGATACCCGTTGAGTCTTTTTACCTTTAGGGAGTGGGTAATTAAGAAAAGAATGGTAGAGTAGTCCGCATGGAGAAAGGCCTGGAGATTAGTGGTTTCAAGCTTAGCTACACACCTACGGTCGAATTCGCGGGGAGATAGTGTGTACGAAGCACTACACCGGGGTCCGTTAAAGGGGACTTATTCTACCAATGCGTGGCCGACACCCTCCCGTCCTACCACCTACCAACTTCATATTTGTATTATCCAGAGCAGATGTGTCAAAGAGTCGAAATGTATAAATATAGGAGGGTACGGAGCATACTACCCTTAAGCACGGAATGGTGTCTCACCCTACTTTTCAACCTTAGTTAGAATCGTCGACTCCCAGTAGTAATTCTAACACGTTGTGTTCAAGCCCGCTGTCCCTCCCCCCGGGTGGTTACCTACGTATGTTCCCTGATTGTCCTATTTTCTCGATCCGCTCGATCTTTCATTCCCCACCACAATAATGCGTCTTGACTTCCCGCAGCGGGCTACAGCAGTAGCTCGTCCTGCGTCGCTGCTCCACCTTATATGATCCGGGCCAATTCTCCCCCGAGCCGCGAAGTGCTACAGAGATATAGTCAGGCACACGCATGGGGCATTTACATCCACCCTAACACGACGACTGAGAGGGGGCACCGACATTGAAGCGACATCGGCAACGCTTAAGTCTACCGACTTTCAATCGTGTAAGCCTCCCACCGACCTTGAAATGCCCCAGCCCGGGACAGACATGTCAGTCCTAGGAAGGTCTTACGTTACTATTAAGCACAGGTTCATTGGTATCTCTGCTGTGCGCCTTATTTTATCATCAGGGCTCGGTAGCTTATTTTACTGGGAGAGTTCTCGCCCGTGAACGTAGAAGTGAGATACCGGAATTGACTACGATTCTTCTGTGATACGATCACTATGAAAAAGTGAGTGTGCTTCCCTGGCGAACAAGACGTAAGGGGGCGTTCACGATCAATAGGCCCCATCTAGCTAAGGACACCAGGGCGGGCATGTGACAATCGTACCGCTGCGCAGGAGTAATTTGCTATCCTAATGCAACGGGGGTACTGGGTCTCTAGATCGTGGGTTGACGCATGCGAGCGATACGGCTGCTTATAAGTGCCCTAGTTCAGAGTAGCACTAGCGCAACGCGGGACTACTCGCGGCAACGATACGTCAAATGTACGTACTAAGTTTATGATGCGTAAAGGTGGTCTGTCTGCCCGCGCGCTGGCCAAAAAGGTATTAGGTCGGGGGTTTGCACGTATAACGGCGGATCACAACTATGACTGTATCCGCCCGAACAGCCATCCGATCCACATCCTAAAAGATGACAGCGAGATCATACGGCCACCCAGCTGAGTGTTGTGCGCAATTTCGTTACCGCGCTTTTGACCTTTCGCTGCCTAGCCTTAGAAATTTATCTTACATACTGAAGTCATCGATCAGCGGGCCATTTGTACTTAGCTATTTCAACTCGCAAGGCGGATGACGCTGCGGACGTTGTCCAATGGTACCCACCGGGGAGACGCTTAGTAGCTCAACGAAGTTGATGGATTATTTTACCAAAGTCACTCTAGATTCCGACTTCTACCAGATCGATCGCCAGTATAATCTCCCTGCATTGTCTGGCGGTGAAGGGTATTATTCCGCTGAGACAGGAATTCACGCCGGCGTGGCCGACAGAGGATTAACAAAGAATACCAACTCTACACTGGGCGAAACAAAATTGGCCTCGACGAAGGCGCCCCAGGACTACACACAGTCACTGAATCCGGGTACGTCGTAGAGGAGGCGCCTCGCATTGGGGGGTGTGATACAAGTCCTGTCGTATAACATAAATGGACGACAAGGCCCCTTATACGTTGACCCACGCTATAACTTTAACTAGGAACGTATTTTCATCTAGTAAACCTACCCGTGCGACCGATACCCACATACATTCCTGTGATTATGTACTGACCTGTGGCGTAGTTATAAGATATACGATCTACGATCCATGTATCTTTTGCCGCCCAGCTACCGGCTACGCAACTTGTCATAAAGTGCGGTAAAGCATAACGTGTACACATAAGTAGCTGTCGGACGGTCTTAGTAGCTCTTCATCCCTCGCACACACGGATAGGGCGTCTTCGGCCTAAAGGAAGGGTCTGGAAGTGGTCAAACCCGGGCTATCATGGAGTCTGGTTGCGTCTATGGTCTGCGTTGAATATACCTAACGAGCGGTAGCTGCTCGTTGAGCCAACGCACTCCTCGTTTACGCAGTCCGCACTGACCATCGAGACTGCACCAGATCTGATTACTCCATCTGCGATACCATCTATCGGCCAATTTGCTGTATTGGCATGGCCCTGCTAGCCGAGGAACCCTTTTAGCGTTGTCCCGCTCGGCTCCTATCAGCGAACATGATCTGGCGACTAGTATGCCGACGCTCGTGGCACCATTAGGCCCATGTTGCCGCTGTGATGTGGGCGAAATAAAGTCGACTAAACGGGAACCGAACAACCTGTTGTATAACTTACCTATTTACGCAAAGCGTATAGACCATACCTTAGGTAGCAGGCCGTGAACACGGCTAACAGATGAAAGTGCGGTTGCGTCAACTAGCATGGGCGCAGGCCGTCATGCCCAGGGCCCGGAGGAGGGGTGTAGGCCTAGTTATGGAGCCCCTCTCGTAATCCATCCCTACGCGTTAGAATTATTACGCGCTCTATTAGGTAGTTACGGTCACAATACGCCCTAGGACAATCTTAGGACATAGTCCTAATTAGACGTAGACGGACACCTAGCCAGTATCACCCAAGACCGTCGATGTCTCGTTAGGAGTATCTTTCGGATGCTTTATTGAAACTCAGTATTTCAGTCCGTGCATTACAGTCCACTGTATACCGATAGACGTAGATCAGTGGTATGCAGATCAAATTACGAATGACGAGGTGGTGTACAGGTAAGTAGTAACTTTGTCAGACCCGCCAGCGTCCGCGCCGCCGCCACACTGCTAGCGGGCGTAAGTTGCTTCTAAAAGGGCACGTTAACACCCGTTGTTTTAGCACGCCAGTAAGCGGAATTACCTGTGTGGCCCTTGGATAATTTACTAATCAGTGGTCTGTATATAGCTGACTTGTTTGCCAGTACCTTAACGCAGTTTTTATCACCGATTAAACCGCGAGTTAGGGATAGTTCGTCACACTAACGTGGTGTCAGGCATTTGTGCGTACGAGTACCGCCTGTACCAGGGACTAAACAAGTTCCTATTGCTAAGACTGCGAACGGGTGATTGTATCGATAGCGAAACCCAGAGTTCGTTCACCTGACTTTTCGTTTGGTATCCAGCTCCCCGGACTGGCGAGTCGGGATAGGCCGAGGCAGGTCCGTCAGTCCGGACAGAGTCATCCACGCCATGTTCGATTCCTCTGGGCGTCTCTGTAGGACCATGGCGCGGGCGGAGACGTAGCGCCCCACCATCTGCTGTATCTAACCAAGGGATATACTAACTGTTGTATAACAGAGAAGCGCGCGATCGGATCGCTCGGGAATGGGGATATATAGGTATGGAGCCTAAACTAACAAAAACTATGGCCTTTGTCGCTTTCGCTAAAGCGACTCCTCTGTAAGGCGCGTCGTGAGTACGACTTCTACAATAACCTAGCGACATTAGTTCATCGATAAAGTGAAATATCGTCGCGGACGACACGTACGGGCCTCCACCCACTGATAACTACGGGAGCGAGTGCTTTCATTCTCCGGCTCGACGTCTAGTACGGCGTATGCGCAGCCGGCGGGTGGATAGGAATCCTGCCCAGAATTCTGCGGCGCGCGAGTTATATCCTCCATACCTATTCATAAGATGCACTCCCATAATTGCCGTGATTCTACTCCAAAGATAGTGGAGTAATCGCTCAATTATGACATACCTGTAGTGCATAACGGTCAGTGTTCCCTTCAGTCAGGGATGCTATACTGGTCGCGTAGACACCTGGATGACGCCTGACCTACAGATAGGGGTTGGTAGCGCATCATATAATCGCAC"
length_of_patterns(string,last)

[0, 2, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 1, 3, 0, 0, 0, 0, 0, 0, 1, 0, 3, 3, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 2, 1, 1, 0, 3, 1, 0, 0, 0, 2, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 3, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 3, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 

In [219]:
letters = list(set(last))
letters.sort()
print(letters)
count_forEachIndex = [0] * len(letters)
Count = []
for i in range(len(last)):
    for j in range(len(letters)):
        if letters[j] is last[i]:
            count_forEachIndex[j] += 1
            break
    #print(count_forEachIndex)
    Count.append(count_forEachIndex)
    print(Count)
pprint(Count)

['$', 'a', 'b', 'n']
[[0, 1, 0, 0]]
[[0, 1, 0, 1], [0, 1, 0, 1]]
[[0, 1, 0, 2], [0, 1, 0, 2], [0, 1, 0, 2]]
[[0, 1, 1, 2], [0, 1, 1, 2], [0, 1, 1, 2], [0, 1, 1, 2]]
[[1, 1, 1, 2], [1, 1, 1, 2], [1, 1, 1, 2], [1, 1, 1, 2], [1, 1, 1, 2]]
[[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]]
[[1, 3, 1, 2], [1, 3, 1, 2], [1, 3, 1, 2], [1, 3, 1, 2], [1, 3, 1, 2], [1, 3, 1, 2], [1, 3, 1, 2]]
[[1, 3, 1, 2],
 [1, 3, 1, 2],
 [1, 3, 1, 2],
 [1, 3, 1, 2],
 [1, 3, 1, 2],
 [1, 3, 1, 2],
 [1, 3, 1, 2]]


In [241]:
string = "an"
last = "annb$ana"
length_of_patterns(string,last)

[1]
