In [4]:
import math
import re

In [14]:
def counting_nukleodits(s):
    assert re.match('^[AGTC]+$', s), 'String contains some others symbols'
    d = {'A': 0, 'T': 0, 'C': 0, 'G': 0}
    for c in s:
        d[c] += 1
    return d

In [15]:
def transcribing_DNA_into_RNA(s):
    assert re.match('^[AGTC]+$', s), 'String contains some others symbols'
    return s.replace('T', 'U')

In [20]:
def complementing_DNA(s):
    assert re.match('^[AGTC]+$', s), 'String contains some others symbols'
    complement = { 'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    res = ''
    for c in s[::-1]:
        res += complement[c]
    return res

In [2]:
def hamming_disstance(s1, s2):
    assert len(s1) == len(s2), 'Diffrent length: s1 - {}, s2 - {}'.format(len(s1), len(s2))
    return sum(c1 != c2 for c1, c2 in zip(s1, s2))

In [89]:
def finding_Motif_in_DNA(s, subs):
    return [n for n in xrange(len(s)) if s.find(subs, n) == n]

In [54]:
def fasta_to_dict(data):
    d = {}
    last_line = ''
    for line in data.split("\n"):
        if (re.findall(r'Rosalind', line)):
            d[line] = ''
            last_line = line
        else:
            d[last_line] += line
    return d

In [80]:
def fasta_to_list(data):
    res = []
    for line in data.split("\n"):
        if (re.findall(r'Rosalind', line)):
            res.append("")
        else:
            res[len(res)-1] += line
    return res

In [69]:
def cg_content(s):
    d = {'A':0,'C':0, 'G':0, 'T':0}
    for nuc in s:
        d[nuc] += 1
    return float(d['C'] + d['G'])/len(s) * 100.

In [83]:
def overlap(a, b):
    best = 0
    for i in range(1, min(len(a), len(b))+1):
        if b.startswith(a[-i:]):
            best = i
    return best

def merge(first_word, second_word, overlap):
    return first_word + second_word[overlap:]

def is_first_word(word, words):
    for j in range(len(words)):
        if word != words[j]:
            over = overlap(words[j], word)
            if over > len(word)/2 and over > len(words[j])/2:
                return False
    return True

def get_first_word(words):
    for i in range(len(words)):
        if is_first_word(words[i], words):
            return words[i]
        
def get_next_word(word, words):
    for j in range(len(words)):
        over = overlap(word, words[j])
        if over > len(word)/2 and over > len(words[j])/2:

            return words[j]

def min_super_string(words):
    first = get_first_word(words)
    super_string = first
    words.remove(first)
    last_added = first
    while len(words) > 0:
        next_word = get_next_word(last_added, words)
        words.remove(next_word)
        super_string = merge(super_string, next_word, overlap(last_added, next_word))
        last_added = next_word

    return super_string

In [151]:
def overlap_graph(s):
    d = fasta_to_dict(s)
    res = []
    for i in d.keys():
        for j in d.keys():
            if i != j:
                if d[i][-3:] == d[j][:3]:
                    res.append((i, j))
                elif d[i][:3] == d[j][-3:]:
                    res.append((j, i))
    return list(set(res))

In [156]:
rna_to_protein_dict = {"UUU": "F",    "CUU": "L", "AUU": "I", "GUU": "V",
                       "UUC": "F",    "CUC": "L", "AUC": "I", "GUC": "V",
                       "UUA": "L",    "CUA": "L", "AUA": "I", "GUA": "V",
                       "UUG": "L",    "CUG": "L", "AUG": "M", "GUG": "V",
                       "UCU": "S",    "CCU": "P", "ACU": "T", "GCU": "A",
                       "UCC": "S",    "CCC": "P", "ACC": "T", "GCC": "A",
                       "UCA": "S",    "CCA": "P", "ACA": "T", "GCA": "A",
                       "UCG": "S",    "CCG": "P", "ACG": "T", "GCG": "A",
                       "UAU": "Y",    "CAU": "H", "AAU": "N", "GAU": "D",
                       "UAC": "Y",    "CAC": "H", "AAC": "N", "GAC": "D",
                       "UAA": "Stop", "CAA": "Q", "AAA": "K", "GAA": "E",
                       "UAG": "Stop", "CAG": "Q", "AAG": "K", "GAG": "E",
                       "UGU": "C",    "CGU": "R", "AGU": "S", "GGU": "G",
                       "UGC": "C",    "CGC": "R", "AGC": "S", "GGC": "G",
                       "UGA": "Stop", "CGA": "R", "AGA": "R", "GGA": "G",
                       "UGG": "W", "CGG": "R", "AGG": "R", "GGG": "G"}

In [167]:
def get_all_k_mers(string, k):
    k_mers = []
    for i in range(len(string) - k + 1):
        k_mers.append(string[i:i+k])
    return set(k_mers)

In [175]:
#     MaximalNonBranchingPaths(Graph)
#         Paths ← empty list
#         for each node v in Graph
#             if v is not a 1-in-1-out node
#                 if out(v) > 0
#                     for each outgoing edge (v, w) from v
#                         NonBranchingPath ← the path consisting of the single edge (v, w)
#                         while w is a 1-in-1-out node
#                             extend NonBranchingPath by the outgoing edge (w, u) from w 
#                             w ← u
#                         add NonBranchingPath to the set Paths
#         for each isolated cycle Cycle in Graph
#             add Cycle to Paths
#         return Paths 
def parse_graph(g):
    res = {}
    for edge in g.split('\n'):
        f, t = edge.split(' -> ')
        if len(t) > 1:
            res[f] = t.split(',')
        else:
            res[f] = [t]
    return res
    
def maximal_non_branching_paths(g):
    paths = []

a = '''1 -> 2
2 -> 3
3 -> 4,5
6 -> 7
7 -> 6'''
    
parse_graph(a)
    

{'1': ['2'], '2': ['3'], '3': ['4', '5'], '6': ['7'], '7': ['6']}

In [172]:
k = 50
s = 'GATTCTCCATTGGCCTGCCGGGGTAGACACAGCACTTTGTCATAAGGTTAATACGTAACTAGCGGAGCTGAACACTGTCAAAGGCCTCCCCAGGTTTTTATTCCAGCGACCCAATAAGTTTCCAGAGTATTTCAGGCGGAACGGCAGGACCGGCATGTCTTAGACAGCTTACTGCCCGGGGAGCAATTCTTGTAAGCGTTGCATCGCCTGACCGGCAGAACTTTCACTCCAGGGGCCCCTTCATTCCCCACATGCGCAAATTTACAGGCCCGTACTTTCGTGTTGCAACGAAGCTATTCTTCGAGCCTACGCGAATATGTTCGCACGTATTAGGACTATTTTCTTGGTTCAACTTGCTAGTCTCACGCCTTTGGTTTAATGAGATGCATAGATTGAGGTGGTGCCCACAAAGCTAAATCGGGGAAACATGTCCAGGCGGATCCTTAGTGGTTTCTGCCGCTTAGAAACGTGCACGTTATAATCGCAAGTGGATACTTACCGGTACGTATAAATCATTTAGGTAAAGCTGACACAGGCGTAACAATCCTTATATCTTCGGATCGTAGTACCCGAAATCTTCTCCCGTATAAAGACGTTGGAACGCTCTCTGGCTGTCTACAAGTGAAACACCATTCGATCTCGAAGCACCCGAGCAGACGTGAGGTACCGCCTAGCACTAATCTTACCAACCCTATTTCCTTATGCATACCTAAACCGGCGGCGGAGTAGCCAAAAATCCAACGGCGAACTAGGAATTGTTCTGCTGTCGAATGAGACAGATAGGAAGACGCGCACCATGGCACCTTGGGTTGAGGACGACTGAAAGCCAAGCTAACTGAACACGCCGATAGCATAGAAGAGTGTACCATCACAAATGACATCGCTATTACTATCGGTTGTTTAACGTTATTTGCACATGGCGCGCGATGTGGAATTGCCTGTGTCGAAGAGAGTGAGTGGGTCGACTCATCGGAGTAAAAAAAGGCGGTCGTTTCAGGTA'
for i in get_all_k_mers(s,k):
    print i

CAGCTTACTGCCCGGGGAGCAATTCTTGTAAGCGTTGCATCGCCTGACCG
AGGAATTGTTCTGCTGTCGAATGAGACAGATAGGAAGACGCGCACCATGG
TGCCGCTTAGAAACGTGCACGTTATAATCGCAAGTGGATACTTACCGGTA
AGCACCCGAGCAGACGTGAGGTACCGCCTAGCACTAATCTTACCAACCCT
GGCCTCCCCAGGTTTTTATTCCAGCGACCCAATAAGTTTCCAGAGTATTT
CGACCCAATAAGTTTCCAGAGTATTTCAGGCGGAACGGCAGGACCGGCAT
ATTGAGGTGGTGCCCACAAAGCTAAATCGGGGAAACATGTCCAGGCGGAT
TTAGGTAAAGCTGACACAGGCGTAACAATCCTTATATCTTCGGATCGTAG
TTTCCAGAGTATTTCAGGCGGAACGGCAGGACCGGCATGTCTTAGACAGC
ACACGCCGATAGCATAGAAGAGTGTACCATCACAAATGACATCGCTATTA
CAGGCGGATCCTTAGTGGTTTCTGCCGCTTAGAAACGTGCACGTTATAAT
GAGTGAGTGGGTCGACTCATCGGAGTAAAAAAAGGCGGTCGTTTCAGGTA
CCGGTACGTATAAATCATTTAGGTAAAGCTGACACAGGCGTAACAATCCT
TTGAGGACGACTGAAAGCCAAGCTAACTGAACACGCCGATAGCATAGAAG
TGGATACTTACCGGTACGTATAAATCATTTAGGTAAAGCTGACACAGGCG
GAATTGTTCTGCTGTCGAATGAGACAGATAGGAAGACGCGCACCATGGCA
AGCTAAATCGGGGAAACATGTCCAGGCGGATCCTTAGTGGTTTCTGCCGC
ATCGCCTGACCGGCAGAACTTTCACTCCAGGGGCCCCTTCATTCCCCACA
AGGCGGAACGGCAGGACCGGCATGTCTTAGACAGCTTACTGCCCGGGGAG
GGAATTGCCTGTGTCGAAGAGAGTGAGTGGG

In [168]:
s = '''>Rosalind_6026
GACACCGGTCAGCGAACCTTGATCTATAGAAGAAACGCCGGTGGAATCAGGGAGGAGCCG
GAGAGGTGGGCGGTTATGCACGCGTAGGTCAATTATAAGACAATAAGAAACGAAGGTTGG
TAAATAGAGCCCCTCTTACAGTCCGGGGAAATCCACTACTTGCTTGTCGAAAATCGCCCT
TCACCCGAAATACGCTAAGTTGTGATCAGCATGATGACGCAACCTACCGCTACTTTTGTT
AACCATCATGACGAAAACTAGGGCTAATCTCGTTCCCTTCCCCCCTAAACGGGTCGGGCC
CGTCGATAGACCGTTAGCATCCCCATACGAGTGAAGAGTTAAGGGACTGGTACTCTCCTT
ACTCTATTGAAAGACACCCAGCCGAGTTTGTCAGGGATCTCAATCTGATCTGGTACGCTC
TTGGCGCGTCTTTGCCGCCCTATGTGGCCAGTCCAACCCTTAAAACCCACTCTGTGTCAA
GATTTCGCCCAGGAAGCTTCAAATAGGGCGACAGGACTCTTCAGACCCGTTCTTAGGAAG
GCCATGCGCTGAATGCATAAAGTTTCCCGATATGTACATTGGGAGCATGGAATGGTGCTA
CAGATCGCACCAGACGTCACTGCACTCTCCGAAGGCTCGTGTCGCGTTTAAGGTGCCGAC
GTATATTACATCATGGATTTCATCGGAGCTATTATATCGGCCCCCGTGTCTTGGAGGGTG
GAGTGCCTTAATCTTAGTATGGTTATCAGAGTAAGCGTGTGCGGCCTGCGGTGAAAGTGC
GCAAGCTTTCAAGACTACTAGATAATCAAAAGCTCGCTCCGTGCGATTAGACTTTCCGAC
GCAACTGCTTAACTTTTCCTAGAAGTTGCGAGCTCGTGCTATCAAGGACTGCGCGTCATC
ACCACTGCTCGCGAGTAACCTGCATCGCAGCGTAGGGGCGCGATAGCGACGGCCCCGACT
AGAGCCATACGTTTCTAACAAAAGGCTAATTG
>Rosalind_7451
GCAGTAAAACGCTATTTGTAAAAGGGTGGCCCTACGGTCGAGACCGGGTGCAGGTCAAAC
ATGCTCCTGGAGGGCCCGCTCAGATCGGTACTAAGACTTCCCTGACGATAGTCTCCCTTC
AATGGAACCTGGCTTGTGGATAGATACTAAGGAGTATAGTATTCGTAGTGGGTGACTGGG
ACGTGACGAGGTAATCTCGTCGCACATCGACGGAGTATATAAGCCGATGACAACGCCACC
AGTTTAACCGAGAACCTCGATCGGGGCGGACCCATGTATAAAACGAGACCATGAAAACCC
TCACCGCGTCAGCATGCCTAGGAACCTGCCCACCTGAGCGTTAGTTTTATTCACCGTGAC
GACACCAGTCACACCCGCGTATGTCGAAAAGGTCTTCACTAGCAGTCTTTCATGATGCAG
GTCTCGAGCTAACCGTGATCGTATTCATGTTAAAGCAGCCGAACGATCTTTGCTAAGGGG
ACGAGGGTGGCCATAATGGTGTTTCGTATGAATTGACTCGCAGCTTGCGCCGCCGACACC
TTGCACACCCAAGGCAGTAGGCCAGGTTTAACGGTAACCTAGCCTTCATTCTTCGAGTAA
TAACAGGCTGGGCGCTCGGTATCAGCGTCCCCTTTAAAGCAGCTCTCTCTTAAAAGGGTA
AACGCTGTGCCGGCTTTCGCTGAGGATCTAAAATCTTGTTAATACTTCATAGCATAAAGT
GTCCCCCCCCTTAGGTAATCGCCTTAGTTAGTTCACGTATACCACTAGATTGAAATATTG
CCGGGCTAAGCGAGTTTTTTTTGAGACGCAGCACGGCACTGCGAACACCTCGGGCTCATA
GCCTTTCAGACTATTCGTCACTATGCACGTAATGGCTAGTATTTCCTGTTGACCATAGAA
TGGTATCTGTTTGCAGCACGATACGGGCAGTGGCCAAGAATGAGTCTGTGATCGCTTTTA
TGCGGTCAGTGAGCCCGTAAT
>Rosalind_8732
AATGGCTACTGGTCATGGGGGAACTGAGGTGTCCGCCCGCAGACCAGAGGGAGGCCCGTG
GTAAATTATGAGAATATTACGACAGCAATTATGCTGCGTTCCAATTGACTGTTTTTGAAA
CAGCGACCGCAAAACTATCTGTTATGGCACCGTTTGACTCCTACATGCCTAAGTCGGGCA
GGATCCTTGAGTCCAATTCAGATGTTCGAACCAAAACTCCTAGGCTAATTAATAATGATG
CGTGCCTTACGTAACGGCTTGCATTCTTCGTACCGGGCCTGTATAGGGAATCGCCGCGAT
TGTTGGCAGTAAAACGCTATTTGTAAAAGGGTGGCCCTACGGTCGAGACCGGGTGCAGGT
CAAACATGCTCCTGGAGGGCCCGCTCAGATCGGTACTAAGACTTCCCTGACGATAGTCTC
CCTTCAATGGAACCTGGCTTGTGGATAGATACTAAGGAGTATAGTATTCGTAGTGGGTGA
CTGGGACGTGACGAGGTAATCTCGTCGCACATCGACGGAGTATATAAGCCGATGACAACG
CCACCAGTTTAACCGAGAACCTCGATCGGGGCGGACCCATGTATAAAACGAGACCATGAA
AACCCTCACCGCGTCAGCATGCCTAGGAACCTGCCCACCTGAGCGTTAGTTTTATTCACC
GTGACGACACCAGTCACACCCGCGTATGTCGAAAAGGTCTTCACTAGCAGTCTTTCATGA
TGCAGGTCTCGAGCTAACCGTGATCGTATTCATGTTAAAGCAGCCGAACGATCTTTGCTA
AGGGGACGAGGGTGGCCATAATGGTGTTTCGTATGAATTGACTCGCAGCTTGCGCCGCCG
ACACCTTGCACACCCAAGGCAGTAGGCCAGGTTTAACGGTAACCTAGCCTTCATTCTTCG
AGTAATAACAGGCTGGGCGCTCGGTATCAGCGTCCCCTTTAAAGCAGCTCTCTCTTAAAA
GGGTAAACGCTGTGCCGGCTTTCGCTGAGGAT
>Rosalind_6094
TAAGAGAGCGGTGCAGATCAAACTTCCGGAGACAGTGCGTCTAGCTATTCCCGTGTATTT
CTCAACGAGTCCTGCAAATGATAAGGACACACCTATAGACGTAGGGGAGCAATGAGGGAC
GTACTCAGCACGAGTCTGCTCTGTACCCGAGCTCCGCTCCAGCTCAAGATGATCAAGTAA
TGAAACATTCCCGGGCGTTCAGGTATTATACCTGTAAGTGGTCTAGCAACAGCTCCCGTA
TGATGTCGCGCAAACTAAGATATCTAGGACCAGGCTGCAAACTTCCGGTTGTAGCGACTC
CGGATGTCATGGTAGTTGAATAAGTGGCGGTCTATGCCCCTATCGTCCTGTCCGGATGCC
TGGCCGGGGAAGACGGCCCAATTAAATACGATACCAAACAAACTACTTGAAGATATGGTG
TCTCAACTAGTTGTCAGTGATATCGAACGAGTAATTATTGTAAGATCGTACAATGGACAT
AATTCAGAGAGTGGGGCGGGCAATTACGTTGCTTAGAACCAATGGAAATTGCTCGTTATA
TAACCGCTTTGGCGTCAACCACCTAAAGGGTTTAGAATCGTGTTCACTCTTTCGTATAGT
TAAATCTGTTCTGCGCGAAGACATAAGGGGTGGGGTAAATCGTGACTGCCTCTACCCCCC
ACTGTCGACGTAATATGGACAGCAGCACAAGGATTTGTAACTAGGAGGCGTTCTCTCCAC
AGGAAAGGAAGGCGTTACAGTGCCACACTGCCCGGGGAGCCTGGTCCGACTGTCTGACTT
AGACCCTCCCTAATCATACCACCGTTTCCACCTTTTTATGAAGAACCGCTACAGACGAGG
ATACTACATGAAAAAATTCGTCTCTTGTGACACTAAGTGGGTGTTACCACGACTTGGGTG
GGCGGAGGCGTGGTCCTCGTCGCTCGTAGCCCCTTACTCAGAGTGACACCCTACCTACAG
TAGAACTTACGGCGCACCCACTTGAACTTCCAGG
>Rosalind_8503
TCGTTCCCTTCCCCCCTAAACGGGTCGGGCCCGTCGATAGACCGTTAGCATCCCCATACG
AGTGAAGAGTTAAGGGACTGGTACTCTCCTTACTCTATTGAAAGACACCCAGCCGAGTTT
GTCAGGGATCTCAATCTGATCTGGTACGCTCTTGGCGCGTCTTTGCCGCCCTATGTGGCC
AGTCCAACCCTTAAAACCCACTCTGTGTCAAGATTTCGCCCAGGAAGCTTCAAATAGGGC
GACAGGACTCTTCAGACCCGTTCTTAGGAAGGCCATGCGCTGAATGCATAAAGTTTCCCG
ATATGTACATTGGGAGCATGGAATGGTGCTACAGATCGCACCAGACGTCACTGCACTCTC
CGAAGGCTCGTGTCGCGTTTAAGGTGCCGACGTATATTACATCATGGATTTCATCGGAGC
TATTATATCGGCCCCCGTGTCTTGGAGGGTGGAGTGCCTTAATCTTAGTATGGTTATCAG
AGTAAGCGTGTGCGGCCTGCGGTGAAAGTGCGCAAGCTTTCAAGACTACTAGATAATCAA
AAGCTCGCTCCGTGCGATTAGACTTTCCGACGCAACTGCTTAACTTTTCCTAGAAGTTGC
GAGCTCGTGCTATCAAGGACTGCGCGTCATCACCACTGCTCGCGAGTAACCTGCATCGCA
GCGTAGGGGCGCGATAGCGACGGCCCCGACTAGAGCCATACGTTTCTAACAAAAGGCTAA
TTGCTTCCAGCCACGTCCGTGGCTACTGGCTTGGTGTCAGTAGTAGTAGACGGCAGTCAT
GTCACAGGTAATCTAAGGAGTAACCCTCTATTTACTCATGGATACTTTATTCTAATCACG
TAAACATGTCCTGATGTCGAAAAGTAACAGATTAACGCTTGTTTATTCCCGGGCTACCTG
TCCAGTTGTCCCCTGTTTCAAATCTGCCGTCCGGCAAGAGCCGAAATCGGCTTCTGTCCT
CCAATGTCACCGGGCGGGCAGTGACGCGTT
>Rosalind_5740
TACGCCCCGGCCCAACTCCTTGTGACTAAGCGATGATGCAATTTCTCCCGAACCTGGCGG
CCAGTGAGGGCTCTGGTTTTAACACCTACGAGTTCGACCACGGGCTCGCCTAGGCGGATC
TATGAATTAGGTTTCATGGAGTGGTCACTAAGTGAGTGCTATGAAATGCTCTGAGTTGTT
CCCTACTCTGGGGCACGCATGTTATGCGTTCGGCGGTTCGCACAAAGGGACGGGCACGCT
AGGATGCCATAACTCGCGCCCGAATCTTGAGTAAATAACGACTAGGCCTACTCTGCGTAA
CCCATCTATTTTTTATCACCATCCGGCGGCATGCACATGCAACATGCGAAACCGTGAGAG
CCCATCAGGATTATAAATTATCGGAAAGCGGAAAGGGCCTGGGAGTCGCGCTACGAACCT
GCGCGAGTGCCACAACTGCTATCTCATGGCTTCCACGAGCGACACGTCTAATAAGATTGC
CACTCAATCTTGCCGCCCCTCCCAGTACACCACTCCCGCTACGGTTCTGAGATCCGCTGG
GAAGACTTGTCCGAAACGTTGGGCAACTTACGACCTATATGTAGCCTGAGAACAAGACCT
CATAGAAAGGTATCTAATACGGGCAGGGATCAGGGGATGTTTGGCTTTTGCAGTACAGAA
AGAATGTTACCGCGCATCTCTACATAACAAAACCGATCGTTAGCATAGAAGTACAGAAGA
TAGCTGCTTGATAAATTGAGTCCCAGACATAATCAATCTGTGCGCCCACCCCTGTTATAT
GAGTAGAGACCATATTTAATAGGAGACGTTTCACGAGCTATTGCCACCATGTGTATTCAG
GTGTGTCACTTGACATTGTGCTCCCTAGAAACACCTTGGGGTAAGGACGTTGAAGGAATA
CCTATTTATATGGTCGACCATACAACCCGATCGACAGATGAGCTGTAAACTATGTCCATG
CGCCCGAACGCCATCAAAAGTGGACGTCGAAA
>Rosalind_3741
CTGGTCGAGAGTCACCAGTTCATATTAATAAAAGTTGTGTTAGTCACTGAGTGGCGCTGC
CGCAGCAATCGAGAAGTGCGGCCAATCGTGCCTTCATAAGTTGCATCGGCAGGACTCCGT
TGTGTAGATACCCCCTGCGTCTTACCTCCAGATATACCAGAGATTTAAACGGTCTCTTCC
GTGACGAGTAGATATCCATTTTACCGCAATCAGTTTTAATTGGCCGACCGCTATCCCTGT
GAACGCGCCATGTAGGGATCGCGCACAGATCTCTCGGCGGTGACCTCCCAGATCGAGACG
TGCTCAAACTGGTCCTCCTCTCGTCTGAATTCCCCCCGAACACTCGTGTCATCCACAACC
CCGCGCTCATAAGCCGACACCACTGACTTCAGTCAAATTTACCATTAAGTTAGTCCTTGA
GGCCCATATCTTGAACTCTTTCGGCAGATGCAGTTCGGGAGATAGAGATTCATTACCTTA
CTTAAGTGTCAGCGCGGCAGAGATACCGGCAGCCCGCCGCACGATAGGTATGTCATTGGA
GATGTAATCCGTAAACGTATTGGCCTGTCGACGATACGTGGACTCCAAGAAGTAATTTCA
AAGGCCCCGACGCAGACCATAGGTTCGTCCAATCATAGGTTGTTCTCCGCCGGGGTACGG
TAGCCTGCGTTCGAGTGTTAACTACTCACAGATCGTATTTCTAACGAAAGATTGGGCAAT
GGGAGAGTGTTACATAAGGACCAGAACATTTGGCTTGAGCATTGGTCACACTCGAGAGCT
CCCCTTCAGCGCATGAGCCAGCGTAACGCTGCCTATCAGTACAATATTCAAGCCTTGTAT
TCCACACATGGCGTTCCAATGCCGTGAGGAAGCGAACAGGCCAGAATTCAGCTGGACGTC
CAAAGTCGGCTTTATCCTCCCCTGAGATCGAAGAGAGTTCGTACCCTTCTTCAAGAAGTT
TGGCATATACTGTGGTGCATGAACTCAACAAGGGATT
>Rosalind_1530
CTCATGGCTTCCACGAGCGACACGTCTAATAAGATTGCCACTCAATCTTGCCGCCCCTCC
CAGTACACCACTCCCGCTACGGTTCTGAGATCCGCTGGGAAGACTTGTCCGAAACGTTGG
GCAACTTACGACCTATATGTAGCCTGAGAACAAGACCTCATAGAAAGGTATCTAATACGG
GCAGGGATCAGGGGATGTTTGGCTTTTGCAGTACAGAAAGAATGTTACCGCGCATCTCTA
CATAACAAAACCGATCGTTAGCATAGAAGTACAGAAGATAGCTGCTTGATAAATTGAGTC
CCAGACATAATCAATCTGTGCGCCCACCCCTGTTATATGAGTAGAGACCATATTTAATAG
GAGACGTTTCACGAGCTATTGCCACCATGTGTATTCAGGTGTGTCACTTGACATTGTGCT
CCCTAGAAACACCTTGGGGTAAGGACGTTGAAGGAATACCTATTTATATGGTCGACCATA
CAACCCGATCGACAGATGAGCTGTAAACTATGTCCATGCGCCCGAACGCCATCAAAAGTG
GACGTCGAAATGGTGCAGAGAGGGTCTCATGCTATTTGAAGCACCCTATGCTAGTCTCGA
GACTTCGTTGCACATAAGCTGGTAACCTTACGCGTAGCCAGAGTCAAAAGTAGTTCGATT
ATGTGAAAAACGGACTGCTAATCCCTCTATGTCTAGCCCGGCATCCTGTAGAGACTGCAT
CTCGTGCAAACTCGCCGATTTATTGATCTGTGGAGAAGAATCGCCCAGTTTACCAGATTA
GCGCTTGGACAGTTTATGTACCACGGACGCGGAGGAGATAGCGAAACAAGTGCGGACACT
GGGAGCTGCGCGTTGCAGTGTAGGGGCTTTCCATATTACCAAGTACCCAAGTGGTCAACA
GTGTTCCATTTGGATATGAGAATGTAAATGCAGGGAGGATCACGGCGCTTCATTGCTGGT
CGTGTGTTACCATTGAGCAAAAATCCTCTCGGT
>Rosalind_2872
GAGGAAATTTCAAGTGCCTTAACCGTTCAGGTGATTCATGAGGTATTCTCAATTCACCCC
GAATAACGTTCTTACCCACCATGGTAGACAGACCAGTAACTAGGGGTGGATATAGTCAGA
CCCGTTACTCCCATTCCTCATAATAGAACACCAACATCACAGTAATCCGGACTTAGAAAT
TTCAGTGTGCTAACTCAGACGACATTGAACAGGGTACAACGCCTGGAATAGGTCTTTGTT
GCGCGCTTTTGTATATAAATCTCTTACACGGAACGGCGACGTCCATATGCTTTTCTAGAA
ACCGTAGGGAACACGTTGATAACATCTTGATGGTTTAAGAGGGGATTCGCCTCGCCAACC
CCCCAGGAGGCCTAGAGAAGCAGCGAGGTAACACGCCTAATGTACAAGGAACTAATGAAA
GCACGTCCGGTAAAAGCAACCTGAATGGTATCAGAGCCGTTGCTACACCTAAGAGAGCGG
TGCAGATCAAACTTCCGGAGACAGTGCGTCTAGCTATTCCCGTGTATTTCTCAACGAGTC
CTGCAAATGATAAGGACACACCTATAGACGTAGGGGAGCAATGAGGGACGTACTCAGCAC
GAGTCTGCTCTGTACCCGAGCTCCGCTCCAGCTCAAGATGATCAAGTAATGAAACATTCC
CGGGCGTTCAGGTATTATACCTGTAAGTGGTCTAGCAACAGCTCCCGTATGATGTCGCGC
AAACTAAGATATCTAGGACCAGGCTGCAAACTTCCGGTTGTAGCGACTCCGGATGTCATG
GTAGTTGAATAAGTGGCGGTCTATGCCCCTATCGTCCTGTCCGGATGCCTGGCCGGGGAA
GACGGCCCAATTAAATACGATACCAAACAAACTACTTGAAGATATGGTGTCTCAACTAGT
TGTCAGTGATATCGAACGAGTAATTATTGTAAGATCGTACAATGGACATAATTCAGAGAG
TGGGGCGGGCAATTACGTTGCTTAGAACCAATGGA
>Rosalind_6491
GGCAAGCCTGTGAATCCCCTCGCGTTAACCTTTTAACCGCCGGACAACCGACCTCGCGTT
CATTACTAACTGAAGAGGCACTTTGATCGCGTTAAATCATGCTGAACCGCGGGGAAGGTA
ACGATGCTCGTCTCCGCAACCTTGAGTCCCGAATTGATTAAGTTTGTTCCACAAAGCCGG
ACACCCCATGGCGATACTTGCCTCGTCGCTGAATGGCGGCATAGTGCGAATTTGGTTTAG
GAGATCATATGCCCTGACCAGTATCTTAGAAGCCTCTAGGTTGACTACGCCAGCTGCAAC
ATCCAGGATGTCACCAGTCATGTATGAGCTTGTCGACCTGCCTATAGATTTTACCGCAGT
TTCGCCGGTACGCTAAGTGGTTCGATCTGGACCCGGGAGCGGTACTGCAAGCAGTTCTTT
ATTGTTCTTTTGGAACCAGGCCACTTAGCATGGATATGGTCTCCACTTACATCAAGGAGA
GTGGGTTGGAGTGCTCCCAAGTCACTCGCACGCACGAAGGGATGTATTCCACAACCGTGT
CTCTCCTGCGATGATTCAGGTCGTTGACGTACCACTAAGTGTCGATCGCTGACCTGAAAC
AAACGGCTCTAATCGCAACTTGGGGTTTGTCGGAACGACAATGAATGCTGCATTGATGCA
TGCCCCGTATCTCGTGAAACACCACGGCCTCCACACAATAGTGTACAGGTTAGTTCTGAT
TCGAATAGATTCCAAACGTCAATAATGGCGCTAGGTATTTAGCCAGTCACACGGACTTCT
AGCTGGCTCGAGTCTGTCTTTCACGGTAGTTTCAGAAGACATGTGGCCTTCCGCCTCAAG
CCGCTAGACTACCGGTCGAATATCGCACCGTAACCCACGAGGCCACCACATGCGTACCCA
CAGCTACAGCCTGGCAGGCACCTGTCTCTTTGCATAACCCCGCCGTCGGACTGCCACTAA
AGAGTTGCTCCGGACGCAGT
>Rosalind_9989
AGCTGAAATTTATAGCACCTAAGGCGGCTGTCATGTTTACCCGTCGCGGCAAATGACGTG
GCCCGGCTGACCCCAAAAAAGTATACTACGCCCTCCGGATCACGCGCCACCGGGGTCACG
AACCGAAGTCATAGGTCTCTTGGGCACCCCCCACACTGCTACGTACGAGGACGGATGCCT
CGGAGATTATGGCCAGGAGCAGTTCGGATGTCAACACCTTGTCTTTCTGTATCCCCTTTA
TCGTAACAGGAACGTTCGCAAACAGCAGCATGTGTCAACATACATCTAAACACCGACTTA
CGGGTCGGCGCATCCAAAAAGGCTTACGATCAGTCAAACATACACAATTTTCCCTAGATG
TAGGTCTCTTGCTTGCAGCATCCTTCGGGTTTAAAGACATTCCCATGCTGGTAGAAACGT
AAACTATCCTATTAAAAGTATTTCGTTGTGGTTTTTGCTCATAGAATGAGGTGCTGCGGG
TGCAGGCGCAAGTGGACTTGATGGAGCTCCCTCCACAAGGGCCTACATCAAGCACATCGC
GCCCAGCGAAATCACAGTTTCTCCATTTATGCCGCGTCAATTGGAACAGACTGGAGTTCT
GTTTAGTTCGCATTACTAAGAGGGGGTTCGATAGCTTTAGTTTCGCGTACAGATTGGGGA
TTAAGGTCCGAGGGGTCCCACAGTGGCCTTGCGCGGACCCCCGTCCAAGGAGCCGCTTCC
AAGCACGTTCTTCGTACTGTTATGCGCCGGGCAGCCAAATGCAAGGATAATGTCATGATT
TACGCTAGTCAATCTTGGCCCGATGACTGCTATTGTTTAGAGGTACTCCCACCATCGGTT
ATCGGTCTACCCGCGCAATGATCTCCGATTCGAGTGAGGCCAAGAGCGAGACTTATGAGG
TTTGGGCCGGTCGCACGGGCGCTTAACAATAATCTTTGGCCATATTCATCCATGTGAGGC
AAGGGCGCCTTACGTGAACATGTGGTGCCAACTCT
>Rosalind_8563
GTACAACGAGCGATTATACGTACTATCGCAGGGCGACTTTCCTCAAGTAACTTTCGGTAC
GCCAAAGTCCTGCTCCATTCGTTATAACTGCATGACTATCCGACGATTGTGCAAGTAACA
CGACAATTGCCATTACGTCGACTATTTGTCCGCAGGGGCTACTTCGCTGTATCCACTTTG
ACTGAGGCTAAACGCGAAACCTGTGGGTTAGAAGTCCCTACCGTACGATGGATATCTGCT
AAACTAGTTCCGATCACTATGTTTTAACCTACTCGATAAGTGGAGATACATTTTCAGTCG
CCGGCTAATGGCAAATTATGACTCTCCAAAAAAGACTCTACACCCTGAGCTCATGGCTTG
ACTTTGACAGTACCACCGCTAGAAACTAAGTCTTTGACACACGTTAGTTCTAAGAACGAT
CCGCAAAGGGGGAAGGCGGATAATTACTGTCATCGAAAATACTTAGTAGACGTAAACCCA
ATTGAATCCTACCTCGCCTTAGTATAATTCTGGGGTGGCTTGTGCTCGCCCCCTTCATAT
CGTTGCCAAATGGTAAGTTGCAACGATCGCTATACTTCCCAACATGGCCGCATGGGAAAT
ATTCACGGAAATCAGTATTCAAGCGTCCTCTTTTTCCTCCGAGCGCCCCCCTATTTACGT
CTCGGCTCGTCAAAAGGGCTGGGTCGGGCGACGCATAAGATCTCTACCCTCGTATGTCTT
AGGATGACTCTCACTCTACTACAAGATCTTACACAGCGGAGAGTAATTCGCTTGAACTAA
ACCCTCCTACAAAACAAAATGCCAGCATGCGTGAGCGGGCTTGCATAAAACTACAAGTCT
CCCATGTCGTTTTGACGATATTACCTACGGGCCTAGTACTGACGGGTATCCTATCGCGAA
CGCGCCCGGGGCTAGGCATTACAAACGCAACAGGAATCGAAGCCTAACCTTAAAAGTTCC
GGCCATAAGAATACTGCTCAAAAATGT
>Rosalind_7208
GCCCGTGTTGGAATGAGTAAGCGCTCGAGAAGTGAGTTTAGGAGTTTTATAGGTGACCCC
CGGGCTCTCGAATTGAGATAGCTGAATTATGGAGTTAAGCCAACTGCTGTAACTCGCCTA
GAGAGCTAATAGTGGCATGTAACGCTAAGTCTCAACGTGATCGGCCTTTGACCGCCCTTG
TACCGATAGAGTGAATTGTTTTAAGCCGATTATTCGTTGGGGCAGTGCGCTTATCGAGGT
AGTTAGTCTGGGAATTCGAAATGGCTACTGGTCATGGGGGAACTGAGGTGTCCGCCCGCA
GACCAGAGGGAGGCCCGTGGTAAATTATGAGAATATTACGACAGCAATTATGCTGCGTTC
CAATTGACTGTTTTTGAAACAGCGACCGCAAAACTATCTGTTATGGCACCGTTTGACTCC
TACATGCCTAAGTCGGGCAGGATCCTTGAGTCCAATTCAGATGTTCGAACCAAAACTCCT
AGGCTAATTAATAATGATGCGTGCCTTACGTAACGGCTTGCATTCTTCGTACCGGGCCTG
TATAGGGAATCGCCGCGATTGTTGGCAGTAAAACGCTATTTGTAAAAGGGTGGCCCTACG
GTCGAGACCGGGTGCAGGTCAAACATGCTCCTGGAGGGCCCGCTCAGATCGGTACTAAGA
CTTCCCTGACGATAGTCTCCCTTCAATGGAACCTGGCTTGTGGATAGATACTAAGGAGTA
TAGTATTCGTAGTGGGTGACTGGGACGTGACGAGGTAATCTCGTCGCACATCGACGGAGT
ATATAAGCCGATGACAACGCCACCAGTTTAACCGAGAACCTCGATCGGGGCGGACCCATG
TATAAAACGAGACCATGAAAACCCTCACCGCGTCAGCATGCCTAGGAACCTGCCCACCTG
AGCGTTAGTTTTATTCACCGTGACGACACCAGTCACACCCGCGTATGTCGAAAAGGTCTT
CACTAGCAGTCTTTCATGATGCAGGTCTCGAGCTAAC
>Rosalind_3636
AACTTTGGTGTGTATACACTAACGTTCGGCACTTACATAGAACAATCATTCGATAACGAG
GCGGTAATGTCTGTATATCCGCAATTTTAAATCGCGTCCCAAGCAGTTAACTCTAAAAGC
ACTAGCGTCCAGGTCACACTGGGGGAAGACTAACCGGACGCCTTGAAGTCCAATTATTCA
GTAAGGAAAGCGCATCTTGCTGTGGATGATCGCGCTTGTGCGACGACGTGAGAACACACT
ACTGTATCAAAAGCGGACTGCACGGTGCCCGCCCATTCCCAGGGAAATCGATGACGAGGA
CGGAATTAGGAAAGAATCTGGTCGGCCCGTACGCCTGGGTCATTATTGTGTTGGCTAAAG
TCCGTGACTATGAGCGGTCCAGGGATATCAGAGTTGCGATAGTGCTGACACTTAATGAAC
ACACTCGCCCGATGCTGCATGCGGTGCACACCGAACTCATGTGGAAATGGTCTGGTTCAA
TCGTTTGTGATGTACGGGGTCTTGCTCCATAAGACTAGTGACTAGTGTGGTCAGGGCTGC
GCCCTGTCAAGAGTGGGCTGAACCTGCAAGATCTGCTGATGCTGCACGTCTCCCAGAGAT
AAGGGTGCTGTCCTCGCATCCGCAACTGGTTCATACCTCCGGACTCGAAACACCCCTGGC
CGCTTTGCAAAGATCTAGACGAGGAATGACGGGCAACCTTATTAAGGACCCGCCAGTGTC
ATGGCCAAGTCATGGCGCCCCCGTGGCAAGGCGGATTTATTACTCAGTTGCTTCAGGTAA
CAGTCTTTACCTGGAGTGCCGGAAACTTCGGGGATCTCGTATCGACCCATAGCAGCTGCT
ACGTAGAGCGCTCGAAAGAGTTCTAGGAAGCCCGCAGAAGACTACTAGTAATGGACGGCG
TAAAGGCTGCGGCGCATGTTCCACGCATGTCCAAAGCTTCTCTTAGAGTCGTGTGGGCAC
ACCGGCGTTAATTTAAGGAGTAC
>Rosalind_2194
TAACCGTTTCTAGACCGTCAGTGGCCCTGGATCGCCGCTGCTTGGATAATGAATCCACGC
TGCTCAAACCTCCACGCTAATGGCTGCCAACGGCACGATGGGATGATTCTACTCCATCGA
GCCTCAAAAGTCTGGCAGCCTGAATATGTCGTATTGGGGTTGTGATCTAATCCAGTTGCA
AGTCGTCCGCTTCTTCATGGACTCTGTTCGGACTCTCAAGCATAAGCCAAAGTCTCATGC
CCGGGGCAAATATACAGCCGCTTTTAGGGCATAGAGAGTGGTATCTACGAACGCGTCACT
TGAGCCGTCCTGGGTCCGCCTAGGGAGCGGGAAAACGCAGGCAATGATTTAATATAGAAC
CATGTCACAGCACAGGCTATATTCAATCGAGTTGGGCGTTGCGGTGGCTCGGCACTCGTA
TGGGAGGCGCCTCAGTTGAGGCAGACATGTTAGGAAGACGATTGAGGGTACATTCATCTG
CTTAGTCCTAAATGGCCCAGTATACCTGCAGTGACTTTTAGGAGTGGGTTATGATGCCCA
GGAAAAATTTCGTACTCCGGAGGCGCCGGCGAACGACCCGATGGGTTTTCCTAGTATTCC
CAGGAAAAACATTCGAACAACTCCAGCCAAGGGGACGACATGGCTTCAGCAATCCTTATA
ACGAGCAAAAGAGAGGTCTCGGGCCACAACGAATCAACTCAGCTGGGAGCAAATCCGACT
TATGATTAAGATGGGCCACGTTCCATCAGAACCCTCTTTATTTGCTACTCATCACCACGA
TGTGGCCTAGGATGGGATCACACATTCAGCAGTGACAAACTGTGTTCAGAATGTACTATA
CTGCGGGTTAAGTTGGAACCCACCATTTACACCCAAATAAATATACGCGAATGTGCAACG
TCACTGGCTACCCATCCAGTTGAGAACAAAAAGTCGAAACTTTTAGCAATATTACTTCGT
TTAATGTGCAAACTGACGTGCACTTAGCACGGTG
>Rosalind_2048
CGTGGTCGGGGGCGGCTAAACGAGATGACGGCCACGTTGTTTATTGGCTCTATGGGCAGT
AAGAGAAGAGACGGAAGGTCGCTATGTATCCCTTTATGATCAATCTGCTGTCCGTACACT
GAAATAGTTTGACAATCCACGCTTGGCCTGTGCTACTTTGGTTGCATCAGTATCCAAACC
AAATCGTTTGGGATGTAGCCAGCACAGACTATGCAGCGGCAGTCGCTGTCTCGGGAATAC
TCCCGCGGTTTATGTCGAGATGGTGCACTGTGGGACAACCTTCAGCGGTTATATCATGCT
GCGATACGTCCTCAATAATTATCGGGGCAAGTCTGGTTTTCCTCAACTATCTAGTATGAA
CCCTGTGCACGCCCTCCATATGCTCCCCTTAGGGTCGTGCTGCCGTGGTTAAGCCCCACC
CGACTTGCAATAAGTGTATTATGAAACGTGGGTCAAAAAGAGTCGCTTTTGGTACGCCAC
AACGAGTTAATAGCTAGTCAATCTGTGGGTCCGTGTGGCGTTCAAAGTGGGGATCCACGG
GAGGACCTGTCACTCGTTCCCTACGCGACAATAACTTCCCAGCGGTCCATTGGTATGTCT
GGACGCGGGTACGAGATCCGTCACAGTCTACCTTGATGGCGAATCAGTCACCCTAGCATT
TCCTAGAGGTCCTGCCGTTTACAACAAGTTACAGATAAACCACGTATGTGTTACTAGAAC
GCGCACTTGAGTACGAGTGCCCATGCATTCAACTACTGTTCGTTTAGTATACAGAACCTA
CGAGGGTGCAGATAAATCGGCTCCTGAGTACGGTGATATTTTAACCCTGTTTATGGGAAT
AAGTACGGTGCGTGTACGGCATAGTTAACATCAAGTGCGCCGTCGCATGCGTGTGAACCG
TGAATTTTTTATCGCCGGTAGGCACGTGAGCGCATAGTAGGAGTAGGCCCGAAGTACGTC
GTATTTATCACATTCCTCTAGAAAGGGTCACCT
>Rosalind_8059
AAAGGAAGGCGTTACAGTGCCACACTGCCCGGGGAGCCTGGTCCGACTGTCTGACTTAGA
CCCTCCCTAATCATACCACCGTTTCCACCTTTTTATGAAGAACCGCTACAGACGAGGATA
CTACATGAAAAAATTCGTCTCTTGTGACACTAAGTGGGTGTTACCACGACTTGGGTGGGC
GGAGGCGTGGTCCTCGTCGCTCGTAGCCCCTTACTCAGAGTGACACCCTACCTACAGTAG
AACTTACGGCGCACCCACTTGAACTTCCAGGATCTAGGTACGGCGATCGTTGCATACCGC
TTACACATCACTCGAGGGGGGGACTGCCCAACTTGTTTGACCTACACGACTGAATCCACG
GTACGCTTAACGGGTTTCGTGATAAAGAGTAGGTTCTGTCGGCTCTCCGTTAGCTCAACA
CGTTGCTTGAGTGTAACTATCCTCAAGTACGTGGTCCGTGGCCTCTGTCGGCAAGCACAT
TGTGTTCAAGTGGTGTCTGGGCAACCTTGATTGTAGGATGACTCCGATTTATAGCCGATT
ATTAGGTTATAAGTCTCTCCTTCTTGAGATCGATGTCCAAAGTCACGTTTATGGTTAGGT
CTTCGGTATGTAAATGAGGTCACCCCTCGAGCAAACAACTCCAAAGTTGACAGGCATCAC
TCACCAGACGCACTTGTCCCTTTTTCCGGACTATACGCGAGCACACTGCCTGCCGCGCAT
GTCCGTGACCCCGATGACAAACTCGAACGCGTTGTCATTAGTGCCAAGGCGGGGATGTTT
TCCGCAACTTTGGTGTGTATACACTAACGTTCGGCACTTACATAGAACAATCATTCGATA
ACGAGGCGGTAATGTCTGTATATCCGCAATTTTAAATCGCGTCCCAAGCAGTTAACTCTA
AAAGCACTAGCGTCCAGGTCACACTGGGGGAAGACTAACCGGACGCCTTGAAGTCCAATT
ATTCAGTAAGGAAAGCGCATCTTGCTG
>Rosalind_5924
AGGAATGACGGGCAACCTTATTAAGGACCCGCCAGTGTCATGGCCAAGTCATGGCGCCCC
CGTGGCAAGGCGGATTTATTACTCAGTTGCTTCAGGTAACAGTCTTTACCTGGAGTGCCG
GAAACTTCGGGGATCTCGTATCGACCCATAGCAGCTGCTACGTAGAGCGCTCGAAAGAGT
TCTAGGAAGCCCGCAGAAGACTACTAGTAATGGACGGCGTAAAGGCTGCGGCGCATGTTC
CACGCATGTCCAAAGCTTCTCTTAGAGTCGTGTGGGCACACCGGCGTTAATTTAAGGAGT
ACTGATACGTACGACGAGCCCACCTTGCAGGGTTGCAATGATACTGTCTAATGTCACTAT
AGTGAGTGGTGACCTCCTTTGATAGCCGTATTGCTGACACACTTGGTCAAATCTCTACTG
AGATGTAACCTACTGGTGGCTCGGAGTGCCTTGGCTCCTATGCTAGCAGAACCGAGCCCC
AGATCACCGACATGTAGACGGGGCCAGTTGATATTACTTACCGGCCCAGCCCCAGAATCG
TCTCTTCTTGGCGTGCAAGCAGATCTCAGCACTATAAGAATGTGAAGAGCTCTAACCCTA
GACTCTTCAATTTCCCCGTCCGCCGTAAAGACACCGGTCAGCGAACCTTGATCTATAGAA
GAAACGCCGGTGGAATCAGGGAGGAGCCGGAGAGGTGGGCGGTTATGCACGCGTAGGTCA
ATTATAAGACAATAAGAAACGAAGGTTGGTAAATAGAGCCCCTCTTACAGTCCGGGGAAA
TCCACTACTTGCTTGTCGAAAATCGCCCTTCACCCGAAATACGCTAAGTTGTGATCAGCA
TGATGACGCAACCTACCGCTACTTTTGTTAACCATCATGACGAAAACTAGGGCTAATCTC
GTTCCCTTCCCCCCTAAACGGGTCGGGCCCGTCGATAGACCGTTAGCATCCCCATACGAG
TGAAGAGTTAAGGGACTGGTACTCTCCTTACT
>Rosalind_8598
TAGCGACTCCGGATGTCATGGTAGTTGAATAAGTGGCGGTCTATGCCCCTATCGTCCTGT
CCGGATGCCTGGCCGGGGAAGACGGCCCAATTAAATACGATACCAAACAAACTACTTGAA
GATATGGTGTCTCAACTAGTTGTCAGTGATATCGAACGAGTAATTATTGTAAGATCGTAC
AATGGACATAATTCAGAGAGTGGGGCGGGCAATTACGTTGCTTAGAACCAATGGAAATTG
CTCGTTATATAACCGCTTTGGCGTCAACCACCTAAAGGGTTTAGAATCGTGTTCACTCTT
TCGTATAGTTAAATCTGTTCTGCGCGAAGACATAAGGGGTGGGGTAAATCGTGACTGCCT
CTACCCCCCACTGTCGACGTAATATGGACAGCAGCACAAGGATTTGTAACTAGGAGGCGT
TCTCTCCACAGGAAAGGAAGGCGTTACAGTGCCACACTGCCCGGGGAGCCTGGTCCGACT
GTCTGACTTAGACCCTCCCTAATCATACCACCGTTTCCACCTTTTTATGAAGAACCGCTA
CAGACGAGGATACTACATGAAAAAATTCGTCTCTTGTGACACTAAGTGGGTGTTACCACG
ACTTGGGTGGGCGGAGGCGTGGTCCTCGTCGCTCGTAGCCCCTTACTCAGAGTGACACCC
TACCTACAGTAGAACTTACGGCGCACCCACTTGAACTTCCAGGATCTAGGTACGGCGATC
GTTGCATACCGCTTACACATCACTCGAGGGGGGGACTGCCCAACTTGTTTGACCTACACG
ACTGAATCCACGGTACGCTTAACGGGTTTCGTGATAAAGAGTAGGTTCTGTCGGCTCTCC
GTTAGCTCAACACGTTGCTTGAGTGTAACTATCCTCAAGTACGTGGTCCGTGGCCTCTGT
CGGCAAGCACATTGTGTTCAAGTGGTGTCTGGGCAACCTTGATTGTAGGATGACTCCGAT
TTATAGCCGATTATTAGGTTATAAGTCTCTCCTTCT
>Rosalind_4304
ATTCGAGTGAGGCCAAGAGCGAGACTTATGAGGTTTGGGCCGGTCGCACGGGCGCTTAAC
AATAATCTTTGGCCATATTCATCCATGTGAGGCAAGGGCGCCTTACGTGAACATGTGGTG
CCAACTCTTGCCACGCCGGTGTGCTCCGGTGCCGCGGTAGGATATTAACGCTCCGGGATA
TTAGTGTGTTGTTGGCAGAATAATCAACGTAGACAGACACTTTACCTACTATCCTGGTGA
TTCGGAACATCGGTAGCGGCTCGCTTATCCGACGTTCAGCCTAGGGAGTGCACGACGGAA
GCTACGAGCCAAGTTAAATGTTGATGAGGAGCACCACTACAGAGCGGGATACAGTGTTGC
GCTCGCTCTTCTTGAACGTGCAGGTCCTGCTTATGGACCAACTATTAGCACTAGATCGCC
CATACAGCTCGACAGGGAATAGTGGATCCTCATATGAGAGTACACACAAGGGTCCAAGGA
AACCTTGAAATTTGGTGTTAGCCACGGGCGAGTATATACGAATCTTACGTTTTTGACCGC
ATGTACCTAATCACATGTATGAGGCAACAAACGGTAGGATATGAGATTGATAAACAGTTA
CACGGGGGTCACGGGCAACCTCGCGGGCTGTCTTCCTATCCAACTGTTACTCTTTCATAG
ACCTTAAGATCGGCTTGCTACGCAGCCGATCCAGACCTTTAAGAACGTCATTACAGCTGA
AGACAGTGCCTAAACACGGGTGGGAAAGGTCTTCTCTCACACCTATCCGAGACCGAGTAC
TGTGCACCGAGGCTTAAGTAAAGGGAAAGAGAAATGGCCATACGGGGTCGTTCCCCAAAT
ATTAACTGACCAGAGCAGACCAGGTGTATGCTGAAGCACCGATCGTTAGGTCCGAATTTT
GACAAGGACCAGCTTCGGCAACTCAGGACTGCATTCGTATCGGTACGCCGCATCAGATCG
GGTGAAAGGGCCGAATCTACGGC
>Rosalind_6378
GCGTTTAAGGTGCCGACGTATATTACATCATGGATTTCATCGGAGCTATTATATCGGCCC
CCGTGTCTTGGAGGGTGGAGTGCCTTAATCTTAGTATGGTTATCAGAGTAAGCGTGTGCG
GCCTGCGGTGAAAGTGCGCAAGCTTTCAAGACTACTAGATAATCAAAAGCTCGCTCCGTG
CGATTAGACTTTCCGACGCAACTGCTTAACTTTTCCTAGAAGTTGCGAGCTCGTGCTATC
AAGGACTGCGCGTCATCACCACTGCTCGCGAGTAACCTGCATCGCAGCGTAGGGGCGCGA
TAGCGACGGCCCCGACTAGAGCCATACGTTTCTAACAAAAGGCTAATTGCTTCCAGCCAC
GTCCGTGGCTACTGGCTTGGTGTCAGTAGTAGTAGACGGCAGTCATGTCACAGGTAATCT
AAGGAGTAACCCTCTATTTACTCATGGATACTTTATTCTAATCACGTAAACATGTCCTGA
TGTCGAAAAGTAACAGATTAACGCTTGTTTATTCCCGGGCTACCTGTCCAGTTGTCCCCT
GTTTCAAATCTGCCGTCCGGCAAGAGCCGAAATCGGCTTCTGTCCTCCAATGTCACCGGG
CGGGCAGTGACGCGTTGATACGATATGGCAGTCGTTCTTGTCGTGCAATGACGGTACCCA
AGGGTCCCAATGTCGATGTAAAGCAAAATTTTAGCACAATTGATCGCACATGAGATAGTA
TCGCCGATTGCTCAGCACCCAATCCGTGACAGAAGGCAGTAAAGGACGTCTTAAGGCGGT
CTGGTCGAGAGTCACCAGTTCATATTAATAAAAGTTGTGTTAGTCACTGAGTGGCGCTGC
CGCAGCAATCGAGAAGTGCGGCCAATCGTGCCTTCATAAGTTGCATCGGCAGGACTCCGT
TGTGTAGATACCCCCTGCGTCTTACCTCCAGATATACCAGAGATTTAAACGGTCTCTTCC
GTGACGAGTAGATATCCATTTTACCGCAATCA
>Rosalind_2508
GTCCGTGGCCTCTGTCGGCAAGCACATTGTGTTCAAGTGGTGTCTGGGCAACCTTGATTG
TAGGATGACTCCGATTTATAGCCGATTATTAGGTTATAAGTCTCTCCTTCTTGAGATCGA
TGTCCAAAGTCACGTTTATGGTTAGGTCTTCGGTATGTAAATGAGGTCACCCCTCGAGCA
AACAACTCCAAAGTTGACAGGCATCACTCACCAGACGCACTTGTCCCTTTTTCCGGACTA
TACGCGAGCACACTGCCTGCCGCGCATGTCCGTGACCCCGATGACAAACTCGAACGCGTT
GTCATTAGTGCCAAGGCGGGGATGTTTTCCGCAACTTTGGTGTGTATACACTAACGTTCG
GCACTTACATAGAACAATCATTCGATAACGAGGCGGTAATGTCTGTATATCCGCAATTTT
AAATCGCGTCCCAAGCAGTTAACTCTAAAAGCACTAGCGTCCAGGTCACACTGGGGGAAG
ACTAACCGGACGCCTTGAAGTCCAATTATTCAGTAAGGAAAGCGCATCTTGCTGTGGATG
ATCGCGCTTGTGCGACGACGTGAGAACACACTACTGTATCAAAAGCGGACTGCACGGTGC
CCGCCCATTCCCAGGGAAATCGATGACGAGGACGGAATTAGGAAAGAATCTGGTCGGCCC
GTACGCCTGGGTCATTATTGTGTTGGCTAAAGTCCGTGACTATGAGCGGTCCAGGGATAT
CAGAGTTGCGATAGTGCTGACACTTAATGAACACACTCGCCCGATGCTGCATGCGGTGCA
CACCGAACTCATGTGGAAATGGTCTGGTTCAATCGTTTGTGATGTACGGGGTCTTGCTCC
ATAAGACTAGTGACTAGTGTGGTCAGGGCTGCGCCCTGTCAAGAGTGGGCTGAACCTGCA
AGATCTGCTGATGCTGCACGTCTCCCAGAGATAAGGGTGCTGTCCTCGCATCCGCAACTG
GTTCATACCTCCGGACTCGAAACACCC
>Rosalind_2911
TATATCATGCTGCGATACGTCCTCAATAATTATCGGGGCAAGTCTGGTTTTCCTCAACTA
TCTAGTATGAACCCTGTGCACGCCCTCCATATGCTCCCCTTAGGGTCGTGCTGCCGTGGT
TAAGCCCCACCCGACTTGCAATAAGTGTATTATGAAACGTGGGTCAAAAAGAGTCGCTTT
TGGTACGCCACAACGAGTTAATAGCTAGTCAATCTGTGGGTCCGTGTGGCGTTCAAAGTG
GGGATCCACGGGAGGACCTGTCACTCGTTCCCTACGCGACAATAACTTCCCAGCGGTCCA
TTGGTATGTCTGGACGCGGGTACGAGATCCGTCACAGTCTACCTTGATGGCGAATCAGTC
ACCCTAGCATTTCCTAGAGGTCCTGCCGTTTACAACAAGTTACAGATAAACCACGTATGT
GTTACTAGAACGCGCACTTGAGTACGAGTGCCCATGCATTCAACTACTGTTCGTTTAGTA
TACAGAACCTACGAGGGTGCAGATAAATCGGCTCCTGAGTACGGTGATATTTTAACCCTG
TTTATGGGAATAAGTACGGTGCGTGTACGGCATAGTTAACATCAAGTGCGCCGTCGCATG
CGTGTGAACCGTGAATTTTTTATCGCCGGTAGGCACGTGAGCGCATAGTAGGAGTAGGCC
CGAAGTACGTCGTATTTATCACATTCCTCTAGAAAGGGTCACCTAGCGTGAGTTCTGGCG
CGGGGACGGTACGATAGATAATGGGGTGCTCACTTCCAAGCGTGTTGGCTGGCAGCCCAA
CAACCATTTATGATAACCGTTTCTAGACCGTCAGTGGCCCTGGATCGCCGCTGCTTGGAT
AATGAATCCACGCTGCTCAAACCTCCACGCTAATGGCTGCCAACGGCACGATGGGATGAT
TCTACTCCATCGAGCCTCAAAAGTCTGGCAGCCTGAATATGTCGTATTGGGGTTGTGATC
TAATCCAGTTGCAAGTCGTCC
>Rosalind_4102
TAATCGCAACTTGGGGTTTGTCGGAACGACAATGAATGCTGCATTGATGCATGCCCCGTA
TCTCGTGAAACACCACGGCCTCCACACAATAGTGTACAGGTTAGTTCTGATTCGAATAGA
TTCCAAACGTCAATAATGGCGCTAGGTATTTAGCCAGTCACACGGACTTCTAGCTGGCTC
GAGTCTGTCTTTCACGGTAGTTTCAGAAGACATGTGGCCTTCCGCCTCAAGCCGCTAGAC
TACCGGTCGAATATCGCACCGTAACCCACGAGGCCACCACATGCGTACCCACAGCTACAG
CCTGGCAGGCACCTGTCTCTTTGCATAACCCCGCCGTCGGACTGCCACTAAAGAGTTGCT
CCGGACGCAGTTTTGGTGTGATCTCTAACGTGTGTGTAGCGTAAAAATCGCTAAGCGTGG
GTGACCATCCACGCGGACGTCAACGATTTGAAGGCTATTGAACATACTTATCCGAAGAGT
CCGAGGGGGACGGTTATGGAAACTATGGAATAAATGTAGATGCGTATTGGGTATGACGGA
CCGAGAATGAGTGACAAGGAGACTGAGAGCCACGTCCTGATACGATCCTGCGAGCTACTA
CGTTCTGAACTACACTTCATAGCACAGAGCTAGTGCTCCTGTTCGGTACGGGGGGATCGC
ATGGCAATTATAATCGAACGACCGAATAAAACACCGCATCTCGAAGAACAGGATGAGAGA
GGGTCGAAAAGATGTGCGCCGGCAAAGAATGAACCCGGAATCACGTTGACTCACATGACA
GACCAAGTTAATACTACGTGTAGCGCATGTTGACCGAAGATTTCCATTGCCCTTCCAGAA
CTCGGTGAACGCAGGTGTGCGTTACGGTACGTACATGACTAGAGGCGTCCCGAGTGTAAT
GGGAACCCCAGGAGCCTTAAGTAACCGTTTCCCTAAAGGGGGTTGAAGATCGGTCCGCCG
ATTGGCAAAGCGGGTGTCCCGGAAGAGCT
>Rosalind_6302
CAACCCCGCGCTCATAAGCCGACACCACTGACTTCAGTCAAATTTACCATTAAGTTAGTC
CTTGAGGCCCATATCTTGAACTCTTTCGGCAGATGCAGTTCGGGAGATAGAGATTCATTA
CCTTACTTAAGTGTCAGCGCGGCAGAGATACCGGCAGCCCGCCGCACGATAGGTATGTCA
TTGGAGATGTAATCCGTAAACGTATTGGCCTGTCGACGATACGTGGACTCCAAGAAGTAA
TTTCAAAGGCCCCGACGCAGACCATAGGTTCGTCCAATCATAGGTTGTTCTCCGCCGGGG
TACGGTAGCCTGCGTTCGAGTGTTAACTACTCACAGATCGTATTTCTAACGAAAGATTGG
GCAATGGGAGAGTGTTACATAAGGACCAGAACATTTGGCTTGAGCATTGGTCACACTCGA
GAGCTCCCCTTCAGCGCATGAGCCAGCGTAACGCTGCCTATCAGTACAATATTCAAGCCT
TGTATTCCACACATGGCGTTCCAATGCCGTGAGGAAGCGAACAGGCCAGAATTCAGCTGG
ACGTCCAAAGTCGGCTTTATCCTCCCCTGAGATCGAAGAGAGTTCGTACCCTTCTTCAAG
AAGTTTGGCATATACTGTGGTGCATGAACTCAACAAGGGATTAATGTCGGTTTATGAAGT
AGAACAACCCGAGGCACCTACTCAGTTAATATACTCGGCAGAAGCTACGCAAGACACGGG
GGACGCGGAACTGCCGTGAAACGCGTAACTTTTTGCTCGTTCAGGGGTGTAAGGCTTTAG
CAGCTTCTGCGCTCACTCATTTTTGTAGGTGGCGTCAGGGAACGCCCCTGGATATACCGG
CAGGGGGAGACTGCGAACACGATTTCCCTCCCCCCAGCAATCGAGACGCGCCGGGAGTTT
AAAGAAAGTAATCATTACTTTTACTCACACGCGGCAGTACTCAGCCCTTCCATGACCACA
CACCGTGGACCCTCCCGACGCCTCCCATTCCA
>Rosalind_1268
GTTTTTGCTCATAGAATGAGGTGCTGCGGGTGCAGGCGCAAGTGGACTTGATGGAGCTCC
CTCCACAAGGGCCTACATCAAGCACATCGCGCCCAGCGAAATCACAGTTTCTCCATTTAT
GCCGCGTCAATTGGAACAGACTGGAGTTCTGTTTAGTTCGCATTACTAAGAGGGGGTTCG
ATAGCTTTAGTTTCGCGTACAGATTGGGGATTAAGGTCCGAGGGGTCCCACAGTGGCCTT
GCGCGGACCCCCGTCCAAGGAGCCGCTTCCAAGCACGTTCTTCGTACTGTTATGCGCCGG
GCAGCCAAATGCAAGGATAATGTCATGATTTACGCTAGTCAATCTTGGCCCGATGACTGC
TATTGTTTAGAGGTACTCCCACCATCGGTTATCGGTCTACCCGCGCAATGATCTCCGATT
CGAGTGAGGCCAAGAGCGAGACTTATGAGGTTTGGGCCGGTCGCACGGGCGCTTAACAAT
AATCTTTGGCCATATTCATCCATGTGAGGCAAGGGCGCCTTACGTGAACATGTGGTGCCA
ACTCTTGCCACGCCGGTGTGCTCCGGTGCCGCGGTAGGATATTAACGCTCCGGGATATTA
GTGTGTTGTTGGCAGAATAATCAACGTAGACAGACACTTTACCTACTATCCTGGTGATTC
GGAACATCGGTAGCGGCTCGCTTATCCGACGTTCAGCCTAGGGAGTGCACGACGGAAGCT
ACGAGCCAAGTTAAATGTTGATGAGGAGCACCACTACAGAGCGGGATACAGTGTTGCGCT
CGCTCTTCTTGAACGTGCAGGTCCTGCTTATGGACCAACTATTAGCACTAGATCGCCCAT
ACAGCTCGACAGGGAATAGTGGATCCTCATATGAGAGTACACACAAGGGTCCAAGGAAAC
CTTGAAATTTGGTGTTAGCCACGGGCGAGTATATACGAATCTTACGTTTTTGACCGCATG
TACCTAATCACATGTATGAGGCAA
>Rosalind_2689
GCGGTGGCTCGGCACTCGTATGGGAGGCGCCTCAGTTGAGGCAGACATGTTAGGAAGACG
ATTGAGGGTACATTCATCTGCTTAGTCCTAAATGGCCCAGTATACCTGCAGTGACTTTTA
GGAGTGGGTTATGATGCCCAGGAAAAATTTCGTACTCCGGAGGCGCCGGCGAACGACCCG
ATGGGTTTTCCTAGTATTCCCAGGAAAAACATTCGAACAACTCCAGCCAAGGGGACGACA
TGGCTTCAGCAATCCTTATAACGAGCAAAAGAGAGGTCTCGGGCCACAACGAATCAACTC
AGCTGGGAGCAAATCCGACTTATGATTAAGATGGGCCACGTTCCATCAGAACCCTCTTTA
TTTGCTACTCATCACCACGATGTGGCCTAGGATGGGATCACACATTCAGCAGTGACAAAC
TGTGTTCAGAATGTACTATACTGCGGGTTAAGTTGGAACCCACCATTTACACCCAAATAA
ATATACGCGAATGTGCAACGTCACTGGCTACCCATCCAGTTGAGAACAAAAAGTCGAAAC
TTTTAGCAATATTACTTCGTTTAATGTGCAAACTGACGTGCACTTAGCACGGTGCGCAAT
TGGATCAAACCGAAACATGTGGTAAACTTGACAGATGGTGTACCCGATGCACGTATTGCT
CATCCCCACAGTCAATCTGACAGACTCAAAGATACATTGACTCTACAACCCTCTAAGGGT
TGCTGGACCGTGGGCAGCTTCCAGCTTACAAAGACGTTCTCTTGAATCTGTCGAGACCTG
CGATGTCCAGGGTCTAAACGCCGCGTGCCTCATGGATCGGCTTATACTATGATAAGCATT
ACGGCCGGCCGTTATACCGTCTCGTAGTCTCGGATGCATACGTTGCTGTTTGCGTTCAAT
GCTGAAATAGGTCGAGGTTCTTCTTAGAGCGGAAGCGGGCGACGAAACGAAGGCCTCCTC
TTCGGATCGAAACCATACCGGCTGTTGTAAA
>Rosalind_1825
CCTGTTCGGTACGGGGGGATCGCATGGCAATTATAATCGAACGACCGAATAAAACACCGC
ATCTCGAAGAACAGGATGAGAGAGGGTCGAAAAGATGTGCGCCGGCAAAGAATGAACCCG
GAATCACGTTGACTCACATGACAGACCAAGTTAATACTACGTGTAGCGCATGTTGACCGA
AGATTTCCATTGCCCTTCCAGAACTCGGTGAACGCAGGTGTGCGTTACGGTACGTACATG
ACTAGAGGCGTCCCGAGTGTAATGGGAACCCCAGGAGCCTTAAGTAACCGTTTCCCTAAA
GGGGGTTGAAGATCGGTCCGCCGATTGGCAAAGCGGGTGTCCCGGAAGAGCTAGATAGGC
CTCGCGGGCACATAATGCGCGGTCCAAATTTATTAGTTTGGTTGCAAGTCACGCGCACCA
CTGTTCCGCAGGCTACGCCCCGGCCCAACTCCTTGTGACTAAGCGATGATGCAATTTCTC
CCGAACCTGGCGGCCAGTGAGGGCTCTGGTTTTAACACCTACGAGTTCGACCACGGGCTC
GCCTAGGCGGATCTATGAATTAGGTTTCATGGAGTGGTCACTAAGTGAGTGCTATGAAAT
GCTCTGAGTTGTTCCCTACTCTGGGGCACGCATGTTATGCGTTCGGCGGTTCGCACAAAG
GGACGGGCACGCTAGGATGCCATAACTCGCGCCCGAATCTTGAGTAAATAACGACTAGGC
CTACTCTGCGTAACCCATCTATTTTTTATCACCATCCGGCGGCATGCACATGCAACATGC
GAAACCGTGAGAGCCCATCAGGATTATAAATTATCGGAAAGCGGAAAGGGCCTGGGAGTC
GCGCTACGAACCTGCGCGAGTGCCACAACTGCTATCTCATGGCTTCCACGAGCGACACGT
CTAATAAGATTGCCACTCAATCTTGCCGCCCCTCCCAGTACACCACTCCCGCTACGGTTC
TGAGATCCGCTGGGAAGACTTGTCCGAAAC
>Rosalind_9318
ACCCACAGCTACAGCCTGGCAGGCACCTGTCTCTTTGCATAACCCCGCCGTCGGACTGCC
ACTAAAGAGTTGCTCCGGACGCAGTTTTGGTGTGATCTCTAACGTGTGTGTAGCGTAAAA
ATCGCTAAGCGTGGGTGACCATCCACGCGGACGTCAACGATTTGAAGGCTATTGAACATA
CTTATCCGAAGAGTCCGAGGGGGACGGTTATGGAAACTATGGAATAAATGTAGATGCGTA
TTGGGTATGACGGACCGAGAATGAGTGACAAGGAGACTGAGAGCCACGTCCTGATACGAT
CCTGCGAGCTACTACGTTCTGAACTACACTTCATAGCACAGAGCTAGTGCTCCTGTTCGG
TACGGGGGGATCGCATGGCAATTATAATCGAACGACCGAATAAAACACCGCATCTCGAAG
AACAGGATGAGAGAGGGTCGAAAAGATGTGCGCCGGCAAAGAATGAACCCGGAATCACGT
TGACTCACATGACAGACCAAGTTAATACTACGTGTAGCGCATGTTGACCGAAGATTTCCA
TTGCCCTTCCAGAACTCGGTGAACGCAGGTGTGCGTTACGGTACGTACATGACTAGAGGC
GTCCCGAGTGTAATGGGAACCCCAGGAGCCTTAAGTAACCGTTTCCCTAAAGGGGGTTGA
AGATCGGTCCGCCGATTGGCAAAGCGGGTGTCCCGGAAGAGCTAGATAGGCCTCGCGGGC
ACATAATGCGCGGTCCAAATTTATTAGTTTGGTTGCAAGTCACGCGCACCACTGTTCCGC
AGGCTACGCCCCGGCCCAACTCCTTGTGACTAAGCGATGATGCAATTTCTCCCGAACCTG
GCGGCCAGTGAGGGCTCTGGTTTTAACACCTACGAGTTCGACCACGGGCTCGCCTAGGCG
GATCTATGAATTAGGTTTCATGGAGTGGTCACTAAGTGAGTGCTATGAAATGCTCTGAGT
TGTTCCCTACTCTGGGGCACGCATGTTA
>Rosalind_6280
TAGTGCTGACACTTAATGAACACACTCGCCCGATGCTGCATGCGGTGCACACCGAACTCA
TGTGGAAATGGTCTGGTTCAATCGTTTGTGATGTACGGGGTCTTGCTCCATAAGACTAGT
GACTAGTGTGGTCAGGGCTGCGCCCTGTCAAGAGTGGGCTGAACCTGCAAGATCTGCTGA
TGCTGCACGTCTCCCAGAGATAAGGGTGCTGTCCTCGCATCCGCAACTGGTTCATACCTC
CGGACTCGAAACACCCCTGGCCGCTTTGCAAAGATCTAGACGAGGAATGACGGGCAACCT
TATTAAGGACCCGCCAGTGTCATGGCCAAGTCATGGCGCCCCCGTGGCAAGGCGGATTTA
TTACTCAGTTGCTTCAGGTAACAGTCTTTACCTGGAGTGCCGGAAACTTCGGGGATCTCG
TATCGACCCATAGCAGCTGCTACGTAGAGCGCTCGAAAGAGTTCTAGGAAGCCCGCAGAA
GACTACTAGTAATGGACGGCGTAAAGGCTGCGGCGCATGTTCCACGCATGTCCAAAGCTT
CTCTTAGAGTCGTGTGGGCACACCGGCGTTAATTTAAGGAGTACTGATACGTACGACGAG
CCCACCTTGCAGGGTTGCAATGATACTGTCTAATGTCACTATAGTGAGTGGTGACCTCCT
TTGATAGCCGTATTGCTGACACACTTGGTCAAATCTCTACTGAGATGTAACCTACTGGTG
GCTCGGAGTGCCTTGGCTCCTATGCTAGCAGAACCGAGCCCCAGATCACCGACATGTAGA
CGGGGCCAGTTGATATTACTTACCGGCCCAGCCCCAGAATCGTCTCTTCTTGGCGTGCAA
GCAGATCTCAGCACTATAAGAATGTGAAGAGCTCTAACCCTAGACTCTTCAATTTCCCCG
TCCGCCGTAAAGACACCGGTCAGCGAACCTTGATCTATAGAAGAAACGCCGGTGGAATCA
GGGAGGAGCCGGAGAGGTGGGCGGTTATGCACG
>Rosalind_5582
AGCTCTGCTCATGACCGGCGACAAAAGATATCCTGGTCCGTCTGCTACCGCTGCCATTCT
TGATATGTGATTAGTGGGCATCGCAATAGTCCAAACATGACCTTGCCTAACTTTACACTT
GACCCGGCACGAAAAACATGTGCTGCTGGGAGGGATTAACCATCACTACGTACAAGCTGT
CCAACAGGTTCCAACGACAGTCTGGGAAACGCAATTGTAGGGACTGCGTTGCGGTTCCAT
ATTACTTCCAATCTACTCGAGTTTGGAGGGGTTATAATCACAGACACTGAGTAATTCTCC
GTGTTCCTGGTAGTAGGCTCACACTGCACGGGTGTAGCCTGAAAAACTTTTCCGTCGAGG
AAATTTCAAGTGCCTTAACCGTTCAGGTGATTCATGAGGTATTCTCAATTCACCCCGAAT
AACGTTCTTACCCACCATGGTAGACAGACCAGTAACTAGGGGTGGATATAGTCAGACCCG
TTACTCCCATTCCTCATAATAGAACACCAACATCACAGTAATCCGGACTTAGAAATTTCA
GTGTGCTAACTCAGACGACATTGAACAGGGTACAACGCCTGGAATAGGTCTTTGTTGCGC
GCTTTTGTATATAAATCTCTTACACGGAACGGCGACGTCCATATGCTTTTCTAGAAACCG
TAGGGAACACGTTGATAACATCTTGATGGTTTAAGAGGGGATTCGCCTCGCCAACCCCCC
AGGAGGCCTAGAGAAGCAGCGAGGTAACACGCCTAATGTACAAGGAACTAATGAAAGCAC
GTCCGGTAAAAGCAACCTGAATGGTATCAGAGCCGTTGCTACACCTAAGAGAGCGGTGCA
GATCAAACTTCCGGAGACAGTGCGTCTAGCTATTCCCGTGTATTTCTCAACGAGTCCTGC
AAATGATAAGGACACACCTATAGACGTAGGGGAGCAATGAGGGACGTACTCAGCACGAGT
CTGCTCTGTACCCGAGCTCCGCTCCAGCTCAAGATGAT
>Rosalind_1350
CAGCTGAAGACAGTGCCTAAACACGGGTGGGAAAGGTCTTCTCTCACACCTATCCGAGAC
CGAGTACTGTGCACCGAGGCTTAAGTAAAGGGAAAGAGAAATGGCCATACGGGGTCGTTC
CCCAAATATTAACTGACCAGAGCAGACCAGGTGTATGCTGAAGCACCGATCGTTAGGTCC
GAATTTTGACAAGGACCAGCTTCGGCAACTCAGGACTGCATTCGTATCGGTACGCCGCAT
CAGATCGGGTGAAAGGGCCGAATCTACGGCCCATGATTCTTCAACAAGCGCCCCCGACCT
CATTTCTGGTACCGGTTACTGTGCATGGGCCATGACGAAACGGCGCTACCTCAGCAAAAA
GTCGATTACTTCTTCTACGCCCTCTGATCGAGTCCAGATTCTAGACACCACCCATTACTT
TAGCGGCAGTTTGCGGTTAGTCCCTGCGACAGCTGACATTGCGTCTGCGCGGCGATATGG
CTGACAGTCGCGGCACCAGATCTCGTGCTGAAACGGAATATTCAAATGGGTATCTAATTC
CGACGTCACACAACAAGCCAGTAAAGCATACCGTGGTAGGCCGTAAATTACAAATGTATA
CTTAATGTAGACCATACCAAGGGTTGACCTCAAAAACTGAATAAACTAGCCACCCGTCTT
ATTAGCACTCGGCTAGTGGAAGTCGCGTCAAAACGGTGACTAAGGCTTTTACGCGCCCCT
TTCACCGCTGGCCGGGCTGTAGGGACCCTAAAGTTATTCATCGAGGGCTCAACGGGCAAT
TGTGGGTCTAAACGATCACGCAGAATCTCGGTCGCTCCTAGCTTGTTAGGTAGTGAAATG
GTTCGGTAATCTGAAGAACCACAGGGACTTGGCCCGTGTTGGAATGAGTAAGCGCTCGAG
AAGTGAGTTTAGGAGTTTTATAGGTGACCCCCGGGCTCTCGAATTGAGATAGCTGAATTA
TGGAGTTAAGCCAACTGCTGTAACTCGCCTA
>Rosalind_4014
ACGAGCCCACCTTGCAGGGTTGCAATGATACTGTCTAATGTCACTATAGTGAGTGGTGAC
CTCCTTTGATAGCCGTATTGCTGACACACTTGGTCAAATCTCTACTGAGATGTAACCTAC
TGGTGGCTCGGAGTGCCTTGGCTCCTATGCTAGCAGAACCGAGCCCCAGATCACCGACAT
GTAGACGGGGCCAGTTGATATTACTTACCGGCCCAGCCCCAGAATCGTCTCTTCTTGGCG
TGCAAGCAGATCTCAGCACTATAAGAATGTGAAGAGCTCTAACCCTAGACTCTTCAATTT
CCCCGTCCGCCGTAAAGACACCGGTCAGCGAACCTTGATCTATAGAAGAAACGCCGGTGG
AATCAGGGAGGAGCCGGAGAGGTGGGCGGTTATGCACGCGTAGGTCAATTATAAGACAAT
AAGAAACGAAGGTTGGTAAATAGAGCCCCTCTTACAGTCCGGGGAAATCCACTACTTGCT
TGTCGAAAATCGCCCTTCACCCGAAATACGCTAAGTTGTGATCAGCATGATGACGCAACC
TACCGCTACTTTTGTTAACCATCATGACGAAAACTAGGGCTAATCTCGTTCCCTTCCCCC
CTAAACGGGTCGGGCCCGTCGATAGACCGTTAGCATCCCCATACGAGTGAAGAGTTAAGG
GACTGGTACTCTCCTTACTCTATTGAAAGACACCCAGCCGAGTTTGTCAGGGATCTCAAT
CTGATCTGGTACGCTCTTGGCGCGTCTTTGCCGCCCTATGTGGCCAGTCCAACCCTTAAA
ACCCACTCTGTGTCAAGATTTCGCCCAGGAAGCTTCAAATAGGGCGACAGGACTCTTCAG
ACCCGTTCTTAGGAAGGCCATGCGCTGAATGCATAAAGTTTCCCGATATGTACATTGGGA
GCATGGAATGGTGCTACAGATCGCACCAGACGTCACTGCACTCTCCGAAGGCTCGTGTCG
CGTTTAAGGTGCCGACGTATATTACATCATG
>Rosalind_9624
AGACGTAAACCCAATTGAATCCTACCTCGCCTTAGTATAATTCTGGGGTGGCTTGTGCTC
GCCCCCTTCATATCGTTGCCAAATGGTAAGTTGCAACGATCGCTATACTTCCCAACATGG
CCGCATGGGAAATATTCACGGAAATCAGTATTCAAGCGTCCTCTTTTTCCTCCGAGCGCC
CCCCTATTTACGTCTCGGCTCGTCAAAAGGGCTGGGTCGGGCGACGCATAAGATCTCTAC
CCTCGTATGTCTTAGGATGACTCTCACTCTACTACAAGATCTTACACAGCGGAGAGTAAT
TCGCTTGAACTAAACCCTCCTACAAAACAAAATGCCAGCATGCGTGAGCGGGCTTGCATA
AAACTACAAGTCTCCCATGTCGTTTTGACGATATTACCTACGGGCCTAGTACTGACGGGT
ATCCTATCGCGAACGCGCCCGGGGCTAGGCATTACAAACGCAACAGGAATCGAAGCCTAA
CCTTAAAAGTTCCGGCCATAAGAATACTGCTCAAAAATGTTGCACTACGATACATGCCCT
CCATAAACGGGGGCATGGTACTCTGACTGTGGTCTATCCCTTGATACTGCCATTTATCCT
ATTAGGTCACTCGCAGATACATGTTGATGCTATTGCCGGGAGACGGGCAGACATTCAAAA
TTTCAGAGGGGGGCCCTCGTTCTTCTTGCGCCGGTGCTTGGTGTCTGAAGTTGCACTTGC
ATGCATCGTTTGAGGATGACTGACTTAAAAGACTGATTAGGCTTACGCTAGTTTATAAAT
GTGTTGTCTGATTCGGACATGGAGCTGGTTGGTGCGCTTTTGATGTAGAGCATGTCTCGC
ACTACCGCGACCATGTCACATGTAGGATGGCACTAAGTAAGGGAAACGCTAGTCGGCACA
GAATATGAAGATCGGCGTGTGTGTGGCTGGTCGGAGTGTGGGTGGTGTTAATGGCCTTAG
AGGTGCGGAAGAGCGGACTAATA
>Rosalind_3021
TCACACATTCAGCAGTGACAAACTGTGTTCAGAATGTACTATACTGCGGGTTAAGTTGGA
ACCCACCATTTACACCCAAATAAATATACGCGAATGTGCAACGTCACTGGCTACCCATCC
AGTTGAGAACAAAAAGTCGAAACTTTTAGCAATATTACTTCGTTTAATGTGCAAACTGAC
GTGCACTTAGCACGGTGCGCAATTGGATCAAACCGAAACATGTGGTAAACTTGACAGATG
GTGTACCCGATGCACGTATTGCTCATCCCCACAGTCAATCTGACAGACTCAAAGATACAT
TGACTCTACAACCCTCTAAGGGTTGCTGGACCGTGGGCAGCTTCCAGCTTACAAAGACGT
TCTCTTGAATCTGTCGAGACCTGCGATGTCCAGGGTCTAAACGCCGCGTGCCTCATGGAT
CGGCTTATACTATGATAAGCATTACGGCCGGCCGTTATACCGTCTCGTAGTCTCGGATGC
ATACGTTGCTGTTTGCGTTCAATGCTGAAATAGGTCGAGGTTCTTCTTAGAGCGGAAGCG
GGCGACGAAACGAAGGCCTCCTCTTCGGATCGAAACCATACCGGCTGTTGTAAAGCTGCG
GGTGTGCCCTCCGTATCTTATACTTCGGTAAATTTATGTTGGAGCTACACAGGATTAAAG
CCGATACGCTGGCTAAAGGCGTATGGCCAATGGTATATTCTGCGCGAATTTTCTCGTTTG
ATCCCAGAGGACGCCCTCAACAAAACGCGCCATTCACCGGGTCTCTCCGGGCGGGGTGTA
CAACGAGCGATTATACGTACTATCGCAGGGCGACTTTCCTCAAGTAACTTTCGGTACGCC
AAAGTCCTGCTCCATTCGTTATAACTGCATGACTATCCGACGATTGTGCAAGTAACACGA
CAATTGCCATTACGTCGACTATTTGTCCGCAGGGGCTACTTCGCTGTATCCACTTTGACT
GAGGCTAAACGCGAAACCTGTGGG
>Rosalind_2535
GAACGATCTTTGCTAAGGGGACGAGGGTGGCCATAATGGTGTTTCGTATGAATTGACTCG
CAGCTTGCGCCGCCGACACCTTGCACACCCAAGGCAGTAGGCCAGGTTTAACGGTAACCT
AGCCTTCATTCTTCGAGTAATAACAGGCTGGGCGCTCGGTATCAGCGTCCCCTTTAAAGC
AGCTCTCTCTTAAAAGGGTAAACGCTGTGCCGGCTTTCGCTGAGGATCTAAAATCTTGTT
AATACTTCATAGCATAAAGTGTCCCCCCCCTTAGGTAATCGCCTTAGTTAGTTCACGTAT
ACCACTAGATTGAAATATTGCCGGGCTAAGCGAGTTTTTTTTGAGACGCAGCACGGCACT
GCGAACACCTCGGGCTCATAGCCTTTCAGACTATTCGTCACTATGCACGTAATGGCTAGT
ATTTCCTGTTGACCATAGAATGGTATCTGTTTGCAGCACGATACGGGCAGTGGCCAAGAA
TGAGTCTGTGATCGCTTTTATGCGGTCAGTGAGCCCGTAATTTACAGCACGACGATGGTT
GCATAGGTGGGTCCGACCTCTCCCTGGGACATGTCCTAACAGGCGAGTTTCGGCATTACG
CACAGTATAGGGCATCATCGCCGTTGCTCGGTGTAGGCCGTATGGTGAGTCGTATGTATC
GTGTTGCATGACACGTCACTGCAGCGTGCAGGAAATAATATGAACCAGTACATAGATAGG
AGGCGTCGAAAGACATGCTTAGAGCTCTCTTGTAAATCGTGGTCGGGGGCGGCTAAACGA
GATGACGGCCACGTTGTTTATTGGCTCTATGGGCAGTAAGAGAAGAGACGGAAGGTCGCT
ATGTATCCCTTTATGATCAATCTGCTGTCCGTACACTGAAATAGTTTGACAATCCACGCT
TGGCCTGTGCTACTTTGGTTGCATCAGTATCCAAACCAAATCGTTTGGGATGTAGCCAGC
ACAGACTATGCAGCGGCAGTCGCTGTCTCGG
>Rosalind_8954
TGCGGACACTGGGAGCTGCGCGTTGCAGTGTAGGGGCTTTCCATATTACCAAGTACCCAA
GTGGTCAACAGTGTTCCATTTGGATATGAGAATGTAAATGCAGGGAGGATCACGGCGCTT
CATTGCTGGTCGTGTGTTACCATTGAGCAAAAATCCTCTCGGTAGATACTTCACTTTCTG
TGGTCTTACTATGGGCCCTCCAGGAACGTACAGTGAGCGTACAGAGCGGCTATAGTAGGG
GACATAGGATCATGATCCTCGATGTGGATCGTCTGCAAGACTGCCAGCTCTCATGTCAGG
CCATTTTTACTGATGGTCTTAAGCATTGGGCGAGCTACCAACACTTCTGTGTTGGCCCCG
AAGGAGCCCACCCAGGTCCATCAGGGATTTGACATTATGCGTTTGCTCCTGCGTCGGAAC
ACTTCCCGCATGTACGCGTGCATTCTGACATAGACATATGGCTCGTAGCTCTGCTCATGA
CCGGCGACAAAAGATATCCTGGTCCGTCTGCTACCGCTGCCATTCTTGATATGTGATTAG
TGGGCATCGCAATAGTCCAAACATGACCTTGCCTAACTTTACACTTGACCCGGCACGAAA
AACATGTGCTGCTGGGAGGGATTAACCATCACTACGTACAAGCTGTCCAACAGGTTCCAA
CGACAGTCTGGGAAACGCAATTGTAGGGACTGCGTTGCGGTTCCATATTACTTCCAATCT
ACTCGAGTTTGGAGGGGTTATAATCACAGACACTGAGTAATTCTCCGTGTTCCTGGTAGT
AGGCTCACACTGCACGGGTGTAGCCTGAAAAACTTTTCCGTCGAGGAAATTTCAAGTGCC
TTAACCGTTCAGGTGATTCATGAGGTATTCTCAATTCACCCCGAATAACGTTCTTACCCA
CCATGGTAGACAGACCAGTAACTAGGGGTGGATATAGTCAGACCCGTTACTCCCATTCCT
CATAATAGAACACCAACATCACAGTAAT
>Rosalind_7938
CGACCGCATTATGACTAGGCCTATCGATGGTCACCAACGCACCGTTGATGGATGATGCCA
TGAAACCAACTTCGCACTTGTCAAACCGGCGCACTTTTTGGATGCTCGCCAAGACCTCAA
GCATATGAAATTACCGTGCCGGTGACCCTACGGATCGTGCAGTGTCTATAGTATCCGCAA
CCATCACCAGTTCGCCACTGGACCAATTATAGGGTTATGGCGTCCGGTGCCACGCCCGAG
TAGATGAAGGCTTTTCAAGGCAAACGTCATATCGCCTGGTGAAACATCTCCATTCCGGCG
CCTAGCTGGGTGTCAGGAGCCGGGACCAAGGCAGGAGAACAAGTACTATTGGTATTTTTC
GATGACAGGTCTCCCATTACTGGTCGCAAATCCGGCAGGAGGTTATGCCCTGAGTCCTGG
CGAGGTGCGGGACACCTGCCTGCAGCTGAAATTTATAGCACCTAAGGCGGCTGTCATGTT
TACCCGTCGCGGCAAATGACGTGGCCCGGCTGACCCCAAAAAAGTATACTACGCCCTCCG
GATCACGCGCCACCGGGGTCACGAACCGAAGTCATAGGTCTCTTGGGCACCCCCCACACT
GCTACGTACGAGGACGGATGCCTCGGAGATTATGGCCAGGAGCAGTTCGGATGTCAACAC
CTTGTCTTTCTGTATCCCCTTTATCGTAACAGGAACGTTCGCAAACAGCAGCATGTGTCA
ACATACATCTAAACACCGACTTACGGGTCGGCGCATCCAAAAAGGCTTACGATCAGTCAA
ACATACACAATTTTCCCTAGATGTAGGTCTCTTGCTTGCAGCATCCTTCGGGTTTAAAGA
CATTCCCATGCTGGTAGAAACGTAAACTATCCTATTAAAAGTATTTCGTTGTGGTTTTTG
CTCATAGAATGAGGTGCTGCGGGTGCAGGCGCAAGTGGACTTGATGGAGCTCCCTCCACA
AGGGCCTACATCAAGCACATCGCGCCCAGCGAAAT
>Rosalind_0049
TGATGTAGAGCATGTCTCGCACTACCGCGACCATGTCACATGTAGGATGGCACTAAGTAA
GGGAAACGCTAGTCGGCACAGAATATGAAGATCGGCGTGTGTGTGGCTGGTCGGAGTGTG
GGTGGTGTTAATGGCCTTAGAGGTGCGGAAGAGCGGACTAATAGCCTCTTTATACCGGTG
TTTGCAACAAACAGATACGTTAATCGAAGGTGGTAATGGATAGAGGTACCCGCTACTGGG
TCAATGCTAGACTGATCGCAGACAATCTGGACTGAGAAGCAACGCCGACTTTGAGGGTGG
ACTGGACTCGCGCCCAACCGACCCAACTCTGCCGAGAGGTCTTATTTCTTACAATAGGCC
GCGGAAAAATTTATGGCTTTAGCCCCTGCGGTCGACTACGCGCTGTTCACGATGTGTATT
CACAAAAGTCAGCCCTCTGTTGCTAGAGCCATGTTCGCTGATATCTATCTTCCCCCTGGT
GTAGATGAAGAACCGCATACGAGTTTAGGGGAAGAACCCACCCTTTTACTGATCTGCGTG
GCAGTGAACCGGGACGAAGATCTACGCATGGAGACTCTTAGAGACTCTGTAGCTTGGGCA
AGGCGCAGATTACCCGTGAGCTTACGCGGGATAGTCGAGTTGACGCTAGGCAAGCCTGTG
AATCCCCTCGCGTTAACCTTTTAACCGCCGGACAACCGACCTCGCGTTCATTACTAACTG
AAGAGGCACTTTGATCGCGTTAAATCATGCTGAACCGCGGGGAAGGTAACGATGCTCGTC
TCCGCAACCTTGAGTCCCGAATTGATTAAGTTTGTTCCACAAAGCCGGACACCCCATGGC
GATACTTGCCTCGTCGCTGAATGGCGGCATAGTGCGAATTTGGTTTAGGAGATCATATGC
CCTGACCAGTATCTTAGAAGCCTCTAGGTTGACTACGCCAGCTGCAACATCCAGGATGTC
ACCAGTCATGTATGAGCTTGTC
>Rosalind_1048
AGAAGCCTCTAGGTTGACTACGCCAGCTGCAACATCCAGGATGTCACCAGTCATGTATGA
GCTTGTCGACCTGCCTATAGATTTTACCGCAGTTTCGCCGGTACGCTAAGTGGTTCGATC
TGGACCCGGGAGCGGTACTGCAAGCAGTTCTTTATTGTTCTTTTGGAACCAGGCCACTTA
GCATGGATATGGTCTCCACTTACATCAAGGAGAGTGGGTTGGAGTGCTCCCAAGTCACTC
GCACGCACGAAGGGATGTATTCCACAACCGTGTCTCTCCTGCGATGATTCAGGTCGTTGA
CGTACCACTAAGTGTCGATCGCTGACCTGAAACAAACGGCTCTAATCGCAACTTGGGGTT
TGTCGGAACGACAATGAATGCTGCATTGATGCATGCCCCGTATCTCGTGAAACACCACGG
CCTCCACACAATAGTGTACAGGTTAGTTCTGATTCGAATAGATTCCAAACGTCAATAATG
GCGCTAGGTATTTAGCCAGTCACACGGACTTCTAGCTGGCTCGAGTCTGTCTTTCACGGT
AGTTTCAGAAGACATGTGGCCTTCCGCCTCAAGCCGCTAGACTACCGGTCGAATATCGCA
CCGTAACCCACGAGGCCACCACATGCGTACCCACAGCTACAGCCTGGCAGGCACCTGTCT
CTTTGCATAACCCCGCCGTCGGACTGCCACTAAAGAGTTGCTCCGGACGCAGTTTTGGTG
TGATCTCTAACGTGTGTGTAGCGTAAAAATCGCTAAGCGTGGGTGACCATCCACGCGGAC
GTCAACGATTTGAAGGCTATTGAACATACTTATCCGAAGAGTCCGAGGGGGACGGTTATG
GAAACTATGGAATAAATGTAGATGCGTATTGGGTATGACGGACCGAGAATGAGTGACAAG
GAGACTGAGAGCCACGTCCTGATACGATCCTGCGAGCTACTACGTTCTGAACTACACTTC
ATAGCACAGAGCTAGTGCTCCTGTTCG
>Rosalind_8687
TCTCGTAGTCTCGGATGCATACGTTGCTGTTTGCGTTCAATGCTGAAATAGGTCGAGGTT
CTTCTTAGAGCGGAAGCGGGCGACGAAACGAAGGCCTCCTCTTCGGATCGAAACCATACC
GGCTGTTGTAAAGCTGCGGGTGTGCCCTCCGTATCTTATACTTCGGTAAATTTATGTTGG
AGCTACACAGGATTAAAGCCGATACGCTGGCTAAAGGCGTATGGCCAATGGTATATTCTG
CGCGAATTTTCTCGTTTGATCCCAGAGGACGCCCTCAACAAAACGCGCCATTCACCGGGT
CTCTCCGGGCGGGGTGTACAACGAGCGATTATACGTACTATCGCAGGGCGACTTTCCTCA
AGTAACTTTCGGTACGCCAAAGTCCTGCTCCATTCGTTATAACTGCATGACTATCCGACG
ATTGTGCAAGTAACACGACAATTGCCATTACGTCGACTATTTGTCCGCAGGGGCTACTTC
GCTGTATCCACTTTGACTGAGGCTAAACGCGAAACCTGTGGGTTAGAAGTCCCTACCGTA
CGATGGATATCTGCTAAACTAGTTCCGATCACTATGTTTTAACCTACTCGATAAGTGGAG
ATACATTTTCAGTCGCCGGCTAATGGCAAATTATGACTCTCCAAAAAAGACTCTACACCC
TGAGCTCATGGCTTGACTTTGACAGTACCACCGCTAGAAACTAAGTCTTTGACACACGTT
AGTTCTAAGAACGATCCGCAAAGGGGGAAGGCGGATAATTACTGTCATCGAAAATACTTA
GTAGACGTAAACCCAATTGAATCCTACCTCGCCTTAGTATAATTCTGGGGTGGCTTGTGC
TCGCCCCCTTCATATCGTTGCCAAATGGTAAGTTGCAACGATCGCTATACTTCCCAACAT
GGCCGCATGGGAAATATTCACGGAAATCAGTATTCAAGCGTCCTCTTTTTCCTCCGAGCG
CCCCCCTATTTACGTCTCGGCTCGTCAAAAGGGCTGGGTC
>Rosalind_0002
CTGCCGAGAGGTCTTATTTCTTACAATAGGCCGCGGAAAAATTTATGGCTTTAGCCCCTG
CGGTCGACTACGCGCTGTTCACGATGTGTATTCACAAAAGTCAGCCCTCTGTTGCTAGAG
CCATGTTCGCTGATATCTATCTTCCCCCTGGTGTAGATGAAGAACCGCATACGAGTTTAG
GGGAAGAACCCACCCTTTTACTGATCTGCGTGGCAGTGAACCGGGACGAAGATCTACGCA
TGGAGACTCTTAGAGACTCTGTAGCTTGGGCAAGGCGCAGATTACCCGTGAGCTTACGCG
GGATAGTCGAGTTGACGCTAGGCAAGCCTGTGAATCCCCTCGCGTTAACCTTTTAACCGC
CGGACAACCGACCTCGCGTTCATTACTAACTGAAGAGGCACTTTGATCGCGTTAAATCAT
GCTGAACCGCGGGGAAGGTAACGATGCTCGTCTCCGCAACCTTGAGTCCCGAATTGATTA
AGTTTGTTCCACAAAGCCGGACACCCCATGGCGATACTTGCCTCGTCGCTGAATGGCGGC
ATAGTGCGAATTTGGTTTAGGAGATCATATGCCCTGACCAGTATCTTAGAAGCCTCTAGG
TTGACTACGCCAGCTGCAACATCCAGGATGTCACCAGTCATGTATGAGCTTGTCGACCTG
CCTATAGATTTTACCGCAGTTTCGCCGGTACGCTAAGTGGTTCGATCTGGACCCGGGAGC
GGTACTGCAAGCAGTTCTTTATTGTTCTTTTGGAACCAGGCCACTTAGCATGGATATGGT
CTCCACTTACATCAAGGAGAGTGGGTTGGAGTGCTCCCAAGTCACTCGCACGCACGAAGG
GATGTATTCCACAACCGTGTCTCTCCTGCGATGATTCAGGTCGTTGACGTACCACTAAGT
GTCGATCGCTGACCTGAAACAAACGGCTCTAATCGCAACTTGGGGTTTGTCGGAACGACA
ATGAATGCTGCATTGATGCATGCCCCGTATCT
>Rosalind_3341
TTAATGTAGACCATACCAAGGGTTGACCTCAAAAACTGAATAAACTAGCCACCCGTCTTA
TTAGCACTCGGCTAGTGGAAGTCGCGTCAAAACGGTGACTAAGGCTTTTACGCGCCCCTT
TCACCGCTGGCCGGGCTGTAGGGACCCTAAAGTTATTCATCGAGGGCTCAACGGGCAATT
GTGGGTCTAAACGATCACGCAGAATCTCGGTCGCTCCTAGCTTGTTAGGTAGTGAAATGG
TTCGGTAATCTGAAGAACCACAGGGACTTGGCCCGTGTTGGAATGAGTAAGCGCTCGAGA
AGTGAGTTTAGGAGTTTTATAGGTGACCCCCGGGCTCTCGAATTGAGATAGCTGAATTAT
GGAGTTAAGCCAACTGCTGTAACTCGCCTAGAGAGCTAATAGTGGCATGTAACGCTAAGT
CTCAACGTGATCGGCCTTTGACCGCCCTTGTACCGATAGAGTGAATTGTTTTAAGCCGAT
TATTCGTTGGGGCAGTGCGCTTATCGAGGTAGTTAGTCTGGGAATTCGAAATGGCTACTG
GTCATGGGGGAACTGAGGTGTCCGCCCGCAGACCAGAGGGAGGCCCGTGGTAAATTATGA
GAATATTACGACAGCAATTATGCTGCGTTCCAATTGACTGTTTTTGAAACAGCGACCGCA
AAACTATCTGTTATGGCACCGTTTGACTCCTACATGCCTAAGTCGGGCAGGATCCTTGAG
TCCAATTCAGATGTTCGAACCAAAACTCCTAGGCTAATTAATAATGATGCGTGCCTTACG
TAACGGCTTGCATTCTTCGTACCGGGCCTGTATAGGGAATCGCCGCGATTGTTGGCAGTA
AAACGCTATTTGTAAAAGGGTGGCCCTACGGTCGAGACCGGGTGCAGGTCAAACATGCTC
CTGGAGGGCCCGCTCAGATCGGTACTAAGACTTCCCTGACGATAGTCTCCCTTCAATGGA
ACCTGGCTTGTGGATAGATACTAAGGAGTATAGTATTCG
>Rosalind_5344
AGCACTAGATCGCCCATACAGCTCGACAGGGAATAGTGGATCCTCATATGAGAGTACACA
CAAGGGTCCAAGGAAACCTTGAAATTTGGTGTTAGCCACGGGCGAGTATATACGAATCTT
ACGTTTTTGACCGCATGTACCTAATCACATGTATGAGGCAACAAACGGTAGGATATGAGA
TTGATAAACAGTTACACGGGGGTCACGGGCAACCTCGCGGGCTGTCTTCCTATCCAACTG
TTACTCTTTCATAGACCTTAAGATCGGCTTGCTACGCAGCCGATCCAGACCTTTAAGAAC
GTCATTACAGCTGAAGACAGTGCCTAAACACGGGTGGGAAAGGTCTTCTCTCACACCTAT
CCGAGACCGAGTACTGTGCACCGAGGCTTAAGTAAAGGGAAAGAGAAATGGCCATACGGG
GTCGTTCCCCAAATATTAACTGACCAGAGCAGACCAGGTGTATGCTGAAGCACCGATCGT
TAGGTCCGAATTTTGACAAGGACCAGCTTCGGCAACTCAGGACTGCATTCGTATCGGTAC
GCCGCATCAGATCGGGTGAAAGGGCCGAATCTACGGCCCATGATTCTTCAACAAGCGCCC
CCGACCTCATTTCTGGTACCGGTTACTGTGCATGGGCCATGACGAAACGGCGCTACCTCA
GCAAAAAGTCGATTACTTCTTCTACGCCCTCTGATCGAGTCCAGATTCTAGACACCACCC
ATTACTTTAGCGGCAGTTTGCGGTTAGTCCCTGCGACAGCTGACATTGCGTCTGCGCGGC
GATATGGCTGACAGTCGCGGCACCAGATCTCGTGCTGAAACGGAATATTCAAATGGGTAT
CTAATTCCGACGTCACACAACAAGCCAGTAAAGCATACCGTGGTAGGCCGTAAATTACAA
ATGTATACTTAATGTAGACCATACCAAGGGTTGACCTCAAAAACTGAATAAACTAGCCAC
CCGTCTTATTAGCACTCGGCTAGTGGA
>Rosalind_2320
ATGGCTAGTATTTCCTGTTGACCATAGAATGGTATCTGTTTGCAGCACGATACGGGCAGT
GGCCAAGAATGAGTCTGTGATCGCTTTTATGCGGTCAGTGAGCCCGTAATTTACAGCACG
ACGATGGTTGCATAGGTGGGTCCGACCTCTCCCTGGGACATGTCCTAACAGGCGAGTTTC
GGCATTACGCACAGTATAGGGCATCATCGCCGTTGCTCGGTGTAGGCCGTATGGTGAGTC
GTATGTATCGTGTTGCATGACACGTCACTGCAGCGTGCAGGAAATAATATGAACCAGTAC
ATAGATAGGAGGCGTCGAAAGACATGCTTAGAGCTCTCTTGTAAATCGTGGTCGGGGGCG
GCTAAACGAGATGACGGCCACGTTGTTTATTGGCTCTATGGGCAGTAAGAGAAGAGACGG
AAGGTCGCTATGTATCCCTTTATGATCAATCTGCTGTCCGTACACTGAAATAGTTTGACA
ATCCACGCTTGGCCTGTGCTACTTTGGTTGCATCAGTATCCAAACCAAATCGTTTGGGAT
GTAGCCAGCACAGACTATGCAGCGGCAGTCGCTGTCTCGGGAATACTCCCGCGGTTTATG
TCGAGATGGTGCACTGTGGGACAACCTTCAGCGGTTATATCATGCTGCGATACGTCCTCA
ATAATTATCGGGGCAAGTCTGGTTTTCCTCAACTATCTAGTATGAACCCTGTGCACGCCC
TCCATATGCTCCCCTTAGGGTCGTGCTGCCGTGGTTAAGCCCCACCCGACTTGCAATAAG
TGTATTATGAAACGTGGGTCAAAAAGAGTCGCTTTTGGTACGCCACAACGAGTTAATAGC
TAGTCAATCTGTGGGTCCGTGTGGCGTTCAAAGTGGGGATCCACGGGAGGACCTGTCACT
CGTTCCCTACGCGACAATAACTTCCCAGCGGTCCATTGGTATGTCTGGACGCGGGTACGA
GATCCGTCACAGTCTACCTTGATGGCGAATCAGTCA
>Rosalind_4409
GTCACTTGACATTGTGCTCCCTAGAAACACCTTGGGGTAAGGACGTTGAAGGAATACCTA
TTTATATGGTCGACCATACAACCCGATCGACAGATGAGCTGTAAACTATGTCCATGCGCC
CGAACGCCATCAAAAGTGGACGTCGAAATGGTGCAGAGAGGGTCTCATGCTATTTGAAGC
ACCCTATGCTAGTCTCGAGACTTCGTTGCACATAAGCTGGTAACCTTACGCGTAGCCAGA
GTCAAAAGTAGTTCGATTATGTGAAAAACGGACTGCTAATCCCTCTATGTCTAGCCCGGC
ATCCTGTAGAGACTGCATCTCGTGCAAACTCGCCGATTTATTGATCTGTGGAGAAGAATC
GCCCAGTTTACCAGATTAGCGCTTGGACAGTTTATGTACCACGGACGCGGAGGAGATAGC
GAAACAAGTGCGGACACTGGGAGCTGCGCGTTGCAGTGTAGGGGCTTTCCATATTACCAA
GTACCCAAGTGGTCAACAGTGTTCCATTTGGATATGAGAATGTAAATGCAGGGAGGATCA
CGGCGCTTCATTGCTGGTCGTGTGTTACCATTGAGCAAAAATCCTCTCGGTAGATACTTC
ACTTTCTGTGGTCTTACTATGGGCCCTCCAGGAACGTACAGTGAGCGTACAGAGCGGCTA
TAGTAGGGGACATAGGATCATGATCCTCGATGTGGATCGTCTGCAAGACTGCCAGCTCTC
ATGTCAGGCCATTTTTACTGATGGTCTTAAGCATTGGGCGAGCTACCAACACTTCTGTGT
TGGCCCCGAAGGAGCCCACCCAGGTCCATCAGGGATTTGACATTATGCGTTTGCTCCTGC
GTCGGAACACTTCCCGCATGTACGCGTGCATTCTGACATAGACATATGGCTCGTAGCTCT
GCTCATGACCGGCGACAAAAGATATCCTGGTCCGTCTGCTACCGCTGCCATTCTTGATAT
GTGATTAGTGGGCATCGCAATAGTCCA
>Rosalind_9571
TAATCACGTAAACATGTCCTGATGTCGAAAAGTAACAGATTAACGCTTGTTTATTCCCGG
GCTACCTGTCCAGTTGTCCCCTGTTTCAAATCTGCCGTCCGGCAAGAGCCGAAATCGGCT
TCTGTCCTCCAATGTCACCGGGCGGGCAGTGACGCGTTGATACGATATGGCAGTCGTTCT
TGTCGTGCAATGACGGTACCCAAGGGTCCCAATGTCGATGTAAAGCAAAATTTTAGCACA
ATTGATCGCACATGAGATAGTATCGCCGATTGCTCAGCACCCAATCCGTGACAGAAGGCA
GTAAAGGACGTCTTAAGGCGGTCTGGTCGAGAGTCACCAGTTCATATTAATAAAAGTTGT
GTTAGTCACTGAGTGGCGCTGCCGCAGCAATCGAGAAGTGCGGCCAATCGTGCCTTCATA
AGTTGCATCGGCAGGACTCCGTTGTGTAGATACCCCCTGCGTCTTACCTCCAGATATACC
AGAGATTTAAACGGTCTCTTCCGTGACGAGTAGATATCCATTTTACCGCAATCAGTTTTA
ATTGGCCGACCGCTATCCCTGTGAACGCGCCATGTAGGGATCGCGCACAGATCTCTCGGC
GGTGACCTCCCAGATCGAGACGTGCTCAAACTGGTCCTCCTCTCGTCTGAATTCCCCCCG
AACACTCGTGTCATCCACAACCCCGCGCTCATAAGCCGACACCACTGACTTCAGTCAAAT
TTACCATTAAGTTAGTCCTTGAGGCCCATATCTTGAACTCTTTCGGCAGATGCAGTTCGG
GAGATAGAGATTCATTACCTTACTTAAGTGTCAGCGCGGCAGAGATACCGGCAGCCCGCC
GCACGATAGGTATGTCATTGGAGATGTAATCCGTAAACGTATTGGCCTGTCGACGATACG
TGGACTCCAAGAAGTAATTTCAAAGGCCCCGACGCAGACCATAGGTTCGTCCAATCATAG
GTTGTTCTCCGCCGGGGTACGGTAGCCT
>Rosalind_4046
TTGCATAAAACTACAAGTCTCCCATGTCGTTTTGACGATATTACCTACGGGCCTAGTACT
GACGGGTATCCTATCGCGAACGCGCCCGGGGCTAGGCATTACAAACGCAACAGGAATCGA
AGCCTAACCTTAAAAGTTCCGGCCATAAGAATACTGCTCAAAAATGTTGCACTACGATAC
ATGCCCTCCATAAACGGGGGCATGGTACTCTGACTGTGGTCTATCCCTTGATACTGCCAT
TTATCCTATTAGGTCACTCGCAGATACATGTTGATGCTATTGCCGGGAGACGGGCAGACA
TTCAAAATTTCAGAGGGGGGCCCTCGTTCTTCTTGCGCCGGTGCTTGGTGTCTGAAGTTG
CACTTGCATGCATCGTTTGAGGATGACTGACTTAAAAGACTGATTAGGCTTACGCTAGTT
TATAAATGTGTTGTCTGATTCGGACATGGAGCTGGTTGGTGCGCTTTTGATGTAGAGCAT
GTCTCGCACTACCGCGACCATGTCACATGTAGGATGGCACTAAGTAAGGGAAACGCTAGT
CGGCACAGAATATGAAGATCGGCGTGTGTGTGGCTGGTCGGAGTGTGGGTGGTGTTAATG
GCCTTAGAGGTGCGGAAGAGCGGACTAATAGCCTCTTTATACCGGTGTTTGCAACAAACA
GATACGTTAATCGAAGGTGGTAATGGATAGAGGTACCCGCTACTGGGTCAATGCTAGACT
GATCGCAGACAATCTGGACTGAGAAGCAACGCCGACTTTGAGGGTGGACTGGACTCGCGC
CCAACCGACCCAACTCTGCCGAGAGGTCTTATTTCTTACAATAGGCCGCGGAAAAATTTA
TGGCTTTAGCCCCTGCGGTCGACTACGCGCTGTTCACGATGTGTATTCACAAAAGTCAGC
CCTCTGTTGCTAGAGCCATGTTCGCTGATATCTATCTTCCCCCTGGTGTAGATGAAGAAC
CGCATACGAGTTTAGGGGAAGA
>Rosalind_2835
CTACCTCAGCAAAAAGTCGATTACTTCTTCTACGCCCTCTGATCGAGTCCAGATTCTAGA
CACCACCCATTACTTTAGCGGCAGTTTGCGGTTAGTCCCTGCGACAGCTGACATTGCGTC
TGCGCGGCGATATGGCTGACAGTCGCGGCACCAGATCTCGTGCTGAAACGGAATATTCAA
ATGGGTATCTAATTCCGACGTCACACAACAAGCCAGTAAAGCATACCGTGGTAGGCCGTA
AATTACAAATGTATACTTAATGTAGACCATACCAAGGGTTGACCTCAAAAACTGAATAAA
CTAGCCACCCGTCTTATTAGCACTCGGCTAGTGGAAGTCGCGTCAAAACGGTGACTAAGG
CTTTTACGCGCCCCTTTCACCGCTGGCCGGGCTGTAGGGACCCTAAAGTTATTCATCGAG
GGCTCAACGGGCAATTGTGGGTCTAAACGATCACGCAGAATCTCGGTCGCTCCTAGCTTG
TTAGGTAGTGAAATGGTTCGGTAATCTGAAGAACCACAGGGACTTGGCCCGTGTTGGAAT
GAGTAAGCGCTCGAGAAGTGAGTTTAGGAGTTTTATAGGTGACCCCCGGGCTCTCGAATT
GAGATAGCTGAATTATGGAGTTAAGCCAACTGCTGTAACTCGCCTAGAGAGCTAATAGTG
GCATGTAACGCTAAGTCTCAACGTGATCGGCCTTTGACCGCCCTTGTACCGATAGAGTGA
ATTGTTTTAAGCCGATTATTCGTTGGGGCAGTGCGCTTATCGAGGTAGTTAGTCTGGGAA
TTCGAAATGGCTACTGGTCATGGGGGAACTGAGGTGTCCGCCCGCAGACCAGAGGGAGGC
CCGTGGTAAATTATGAGAATATTACGACAGCAATTATGCTGCGTTCCAATTGACTGTTTT
TGAAACAGCGACCGCAAAACTATCTGTTATGGCACCGTTTGACTCCTACATGCCTAAGTC
GGGCAGGATCCTTGAGTCCAATTCAGAT
>Rosalind_9824
TGGCGAATCAGTCACCCTAGCATTTCCTAGAGGTCCTGCCGTTTACAACAAGTTACAGAT
AAACCACGTATGTGTTACTAGAACGCGCACTTGAGTACGAGTGCCCATGCATTCAACTAC
TGTTCGTTTAGTATACAGAACCTACGAGGGTGCAGATAAATCGGCTCCTGAGTACGGTGA
TATTTTAACCCTGTTTATGGGAATAAGTACGGTGCGTGTACGGCATAGTTAACATCAAGT
GCGCCGTCGCATGCGTGTGAACCGTGAATTTTTTATCGCCGGTAGGCACGTGAGCGCATA
GTAGGAGTAGGCCCGAAGTACGTCGTATTTATCACATTCCTCTAGAAAGGGTCACCTAGC
GTGAGTTCTGGCGCGGGGACGGTACGATAGATAATGGGGTGCTCACTTCCAAGCGTGTTG
GCTGGCAGCCCAACAACCATTTATGATAACCGTTTCTAGACCGTCAGTGGCCCTGGATCG
CCGCTGCTTGGATAATGAATCCACGCTGCTCAAACCTCCACGCTAATGGCTGCCAACGGC
ACGATGGGATGATTCTACTCCATCGAGCCTCAAAAGTCTGGCAGCCTGAATATGTCGTAT
TGGGGTTGTGATCTAATCCAGTTGCAAGTCGTCCGCTTCTTCATGGACTCTGTTCGGACT
CTCAAGCATAAGCCAAAGTCTCATGCCCGGGGCAAATATACAGCCGCTTTTAGGGCATAG
AGAGTGGTATCTACGAACGCGTCACTTGAGCCGTCCTGGGTCCGCCTAGGGAGCGGGAAA
ACGCAGGCAATGATTTAATATAGAACCATGTCACAGCACAGGCTATATTCAATCGAGTTG
GGCGTTGCGGTGGCTCGGCACTCGTATGGGAGGCGCCTCAGTTGAGGCAGACATGTTAGG
AAGACGATTGAGGGTACATTCATCTGCTTAGTCCTAAATGGCCCAGTATACCTGCAGTGA
CTTTTAGGAGTGGGTTATGATGCCCAG'''
min_super_string(fasta_to_list(s))

'CGACCGCATTATGACTAGGCCTATCGATGGTCACCAACGCACCGTTGATGGATGATGCCATGAAACCAACTTCGCACTTGTCAAACCGGCGCACTTTTTGGATGCTCGCCAAGACCTCAAGCATATGAAATTACCGTGCCGGTGACCCTACGGATCGTGCAGTGTCTATAGTATCCGCAACCATCACCAGTTCGCCACTGGACCAATTATAGGGTTATGGCGTCCGGTGCCACGCCCGAGTAGATGAAGGCTTTTCAAGGCAAACGTCATATCGCCTGGTGAAACATCTCCATTCCGGCGCCTAGCTGGGTGTCAGGAGCCGGGACCAAGGCAGGAGAACAAGTACTATTGGTATTTTTCGATGACAGGTCTCCCATTACTGGTCGCAAATCCGGCAGGAGGTTATGCCCTGAGTCCTGGCGAGGTGCGGGACACCTGCCTGCAGCTGAAATTTATAGCACCTAAGGCGGCTGTCATGTTTACCCGTCGCGGCAAATGACGTGGCCCGGCTGACCCCAAAAAAGTATACTACGCCCTCCGGATCACGCGCCACCGGGGTCACGAACCGAAGTCATAGGTCTCTTGGGCACCCCCCACACTGCTACGTACGAGGACGGATGCCTCGGAGATTATGGCCAGGAGCAGTTCGGATGTCAACACCTTGTCTTTCTGTATCCCCTTTATCGTAACAGGAACGTTCGCAAACAGCAGCATGTGTCAACATACATCTAAACACCGACTTACGGGTCGGCGCATCCAAAAAGGCTTACGATCAGTCAAACATACACAATTTTCCCTAGATGTAGGTCTCTTGCTTGCAGCATCCTTCGGGTTTAAAGACATTCCCATGCTGGTAGAAACGTAAACTATCCTATTAAAAGTATTTCGTTGTGGTTTTTGCTCATAGAATGAGGTGCTGCGGGTGCAGGCGCAAGTGGACTTGATGGAGCTCCCTCCACAAGGGCCTACATCAAGCACATCGCGCCCAGCGAAATCACA

In [162]:
def chunks(s, n):
    """Produce `n`-character chunks from `s`."""
    for start in range(0, len(s), n):
        yield s[start:start+n]

def rna_to_protein(s):
    res = ''
    for i in chunks(s,3):
        if rna_to_protein_dict[i] == 'Stop':
            return res
        else:
            res += rna_to_protein_dict[i]
    return res        
rna_to_protein('AUGAAUGCAUCGCGUGAUAGGUCCACCACAGAAGCCGAGCGUAUCGUUGUUGGGUUGUGUUACGGACCAACCACAUGGAGCCUUUACCGGGGGGAGCAACCUCGUAUGGUCGUUACGAACCCCACUUAUAGGUCACUCUUCGGAAUGACCGCUCGAAGACAGGCGGUCACUUACGAAUAUAUGACCCCCUCGUAUGAUCCGGGAAGGGGCGCCUAUGCGUUUAGCACUCAGAUCUCCAGCACGUAUACUAGAAAGUGCUCGGGCCCAAGACCAGACUCCAGCGCUUAUCGGAGUGAGAUGCCGGGAGGGUAUGUAUGUUUACUCCCCCGAGCUGACUCCUGCCCUCUGGGACGGAUGAAUCCGCCGCCGGGACCACUGGGUCACCAUCUUCCACGCAAAAAUGCGGGCCACGUUCGCUCCCUUACAGAGGUGUUGUCGUGUUCAGCGGGCAGGGCGCAUCCACUCAGUGUGAAGCCGGGGCGCGUUUCCUCCGUUAAGAGGGAGCUAUCAUUGAUCUCGUCUUACAGUGAGGCUUUCUUCCACGUCCCCGCGGGAUGCCUUUUCACAACUCAGGGUGACCUCGACGCCAUGAGCACGUCAGGCAAGAGUAAUAGCCCCGUUGUGUACUGGGGAUACAAAGCUCGGUUGCGCGUUGCGGCCUACGACGUGCUCUUACUGGCCUGUGCGGCAUGUCGCCUUCUGGGUGUUUAUCAAGCGGGCACCACGGGAGUGCGUUGUCAACCGCCGGGUCCUGGCCAUGAUGAGAUGGGGGUUUGCCCUGUUUACGUCACUGAAACACAAAGUGGCUGUAGCGUUUCCUUCAGAGUAGUGAACUCCUGGCCGCGACCAUCCUCUGCUGGCUUUGGCAACGUCGCACAAGGUAUGCGACGGGGGCAACUUAGUAAACUCACCAUCCUAACGAGAGUCAUCUAUGAGUAUCGCCAUUCAUGGAGCAACAAUCCUGAUCAGACAACAUCCCGUGCCUGGCCACCCGAGGGCCUUUACUUACGUUCCAAUCGGCAACCUCCGUCUUACCCAUAUAAGCGUCUCAGUUGGCUAAGGCGCCGCGGUCUGACUACAUGGUCCAGGUCACGGGGCCUGGCCGAGGGCAUGCAGCCCUGGUUCAGUAGUUUUGCCUCGUCAAAAUUUUUAGCAUAUAAUUGGCACAAGAUCGCUUCGUUUGAGGCCCGAACGCCAGUGUCUCGUUUAUGUUUACUUACCCAACUAAAAAAGUUACACCUGUCCGGAGGCAAACUCUUUUUAUUUGCAGAGCAUAUGUAUGUUCCGUCCAGUUGUAGAAUUCCUAGCCGGUUCCUCAUUUGGCAAACCCCUGUUCGCAAAAAGGCGCAUCAAGUAUUUCCCAUUGAAUAUCACCGAUUACCGUCCCGGAACGAGCGACACGGUUGGUGCAUGCGGUUGCGGCAGGGCAUCCACGACGCCAGUUUACGCAUAGCCAAGAUCACCUGUGGUCUAGUUAAGCCGGUGCGAUCUUUAAUGCACAUCAUCAUAAUUUAUAAGACCAAUGGCUGCUCAGCCGCUAGUCUCGUUACUAUUUUUGUUAUAAGCAGGUGUAGUUCUACAUACGUGCCCUCCUUGGAACGGUUCGCAGUAUUAUUUUCGGGAGCGACCCUUCCGCCUAGAAGCGAACCAAGCCUAAUACCAUGCAGAGAAUUUAUCGAGGUAUCGCUACAUAGAUUAGUUACUCAAAUACGCCGCACAGCAGACACCGGGUUAGAUCGCGUCAUCCCUCUCAACCGGUGCGGUAAAUAUUGCCCGGAUUUGAGCCCCCAAGGUCCAUUCACCGCAUAUACUAGGACACUACCUUGUCAUCCCCCAACGGCUACCAGGAGCAUUGUCAAUAGGACGAAAGGAGCCCCACAUGAAUUCCUCGGCAUAACAUCCUUGUAUCCAGCUGAAGAUGAGACUAACUGGAAGACUUCGGAGAACCUGCAAUUUGCCUCCAAUGACAGGCAAGCGGCCAAUUCUCGAUGGUUCGCGGUCUUUAGUUGCAGACUUGUGCACUCACCAAGGGUCCGUUUGGAUAUGACUGCACUAGUUGAUGGUUGUCCCUGUGGUAAUGAGGCCGGUUGUGUCCUUGAAUACCGGGUCAGCUCGGUAACACAUAUGCACCCGGAUUGGGUGGACUUUCGUUCACAAACGGUGCAUGCGCCUACAAUGUCUAGCGAUAUGGACAGUAACGAGAAUCAGAGAAUUUACACAAGCACUAGUGCUGCACCUAAUCGUACUAAAGAUGAGUGUUGCGAGUUGCUGCCUUUUCCAUCAACCUUCCUGCUAACCACCUACGUGAAAAUCAUUGAUGAGCUCGGACCGCUGGAACCUUGUGAUAGCCCUUAUAAACAUUCCUUUCGCGUAUCAAUGAAAUCGUCUCCGCAGGUCAUUAACAAAGAAUAUAGUACCCGAUCAGAGCUAUGCUACACUAGAUCCGGAAUGUCCCCUCGAAGGCCGACACGCCAUUAUACAGAUACCGCUGCAGGACAGAUAGAAUUGUCAUUCCAUGCGUGGGGUUUAUGCGGUCACGCCACGCUUAUAGACUUCGUUCCGGAUAGCUCGAGGUUUCCCGCGAUCUACAGUACAAGGUCCGAAGUGUGCAGCGCCGCCCCGGGGGUUUCCCCACCGCCAGGAGGAACCUACUUACCGCAUUUCAUAUACCUUCACCGAAGUUCAAAGACAGCACACCGAUCACUUAGCCACAUCUCCCUAUUGUGGUUAACAGCACUGGCUCGAUCACCUCCCGGCGCGGAGGAUAAGCGGCAAAACAGCCGUGAUGCUGAAAAUUCUGAAUCGACUGCGGCGCCUAUACGGCCAGCAUCUUACGACUUUCCUAGAACGAGGGUAGACCCGUGUACUGAACUGCGCUUUCAAGUACUCACUGGAGCGCCUUAUCAUAGCGGUCUUCAGGCAAUGUCACCAACUCCAUCGACCGAUUGCUUUCAUUUCUCCAUACAAUUUCCUAAGACAGUCUCCUUGUACGCAACGAAGGCUUGGAUACCUGUAUGUGCGGCUCUCUGGCAUAGCGUAGAAAUUGGGUGGCCGGAAACCGACAGAAUGAGCGUGGCCCUAUUGUGCCACGACCAUGGUUGGAUGCAUCACGCCAGAGACUGGAUUAGAUCUCUUCCCGGAGCUGGUAUUUUAUACGGCGGGGUAGGAGAGAGAUUCGUCCUGCUCGUUUGCACUUGCUUCCUGCAAACGCACCGAACGGGGGAUCAGGGGGGAGGGCACAAUCGGCUUCACCAAUAUCGGCGGUCGGGUUCGUUUAGCCCUCAGAAGGACUACACAAGGUGGACACACUCAUGCUCCUUCACUGUUAUGUAUCUACCAGAGAUCCGCGCACCCAACCUUGGCCUAGGCGAGGGAAGCCGUGCCUCAUGUCUUCCUGACUCACAAAGGUUGUGUGUCCCGUCGCGGGAUUCCGCCCUGUCCCUCCUUUGGACUAGCGUUUUACUUGAUUUACGCAUGGGCUCUCGUCUUAACCUGGCCAGCUGGGGCGAAGUCCUUACGUACGGUCCCGAUGUUCAAAGUUGGCCUGCGUUGUCCAGGAACUUAAGCACACACGCAGCAUAUUGUGGGAUAUUGGGAGAAGGCGUAGUGGGUUAUCACCGAGUCCAGAAUACGGAAAACUCUCUCCUUGCUGAGGGCGCCAGUCCCCCAGCUAACGCCAUGUCACCGACCGGGCGUCGCCGGCGCUUAGGGUCUUUCGAUCCACAGGUAGCAAAAUCAUUAGCCCCUCGGCCGCAAUUGACCGUCAGCGGGUCAGUCCUCCACUCCUUCGAGUAUCCGGACCGGCCCGUUGUGGCACAGUGUCUAAUAAGGGUACACAUACACGGUAUUAAAAAAAUCGCGCUGCACAGCGUUCGGGUUUGUGAAGUUAGUCAGUCCAAUCCUUGGGGCAGAGAAUGCACAGGGGUUUUAUUAAAUUUCAAACGUCUCCCCUUAUUCAGAUCCGGGGUAAUAAGAUCCGAGGUGUGUCUCCGGCAGAACACCGGUCCUUGCUUCUAUUUCCAAGGGGACCCGUAUAUCUUCUCGCGUACAUCACGGUGGGAGGGGGGUGUGCUCGAUAGGCAGCCCCCCUCAAAUUAUGAAGCCUACUGUUUGAAAGCUGCUUCUACACAUGUCUCUUCGAGACAUAUUCUGCCCAAUGGAGUAAUUCGCUCAAUCCCUAACCUCGCCAGUUUCAUGCUCCGCUCGCUCCUGUCCAUACGUAUAUUCCCACUUAUCGGGCGGCUAUGGUGUCUGCCGUAUCUACCGGAUACCAACCUACGCGGAGCCAAGCAUAAGUCAGCCUACACCAACUCACACCGGGCUUGUAUCCGAAACGGGGUGAAAUUGAGAAUUCUGACACUAUCCCAUCUUCCUGAUAUAUUCUUGCGCUUGACUCCAACUCUGCGGAUACGACGACAAGAAAAGCGGGGCUGCAGUCCACCCCACCCCCGGGGAAAAUAUAUCGUUAUCAGUGGCAGAGGCCGGGAGACACGGGAAGGCAAACUUACUAUACAUGGUUGCUCUAUUCACGCGAAUCUUGUAGAACGCAAAGGUAGAGUGGUCAAAGCCGGAAUGAUUACCCAACACUGGUACUUAAGCUUCGACAUACCGAAUUUUUUGUGCACCAGCUGUCGUUUUGGGGGCCGCAAAGUUUCGCACGGCCAGUCUCCCUUCUCUCUUCAUCUCCUUACGGAGUUCGCCAUGGCAAAGUGCUCCGUGGGGGCCUACCGCCUAAAUAUAUCCAUAGUUCAGAGCGUCAUGAGUCCCGUGACCAUAGGUGGGUACGAGAAGUCCUGUAUCAGCUCGUUCAUAUAUCUGUCGCUUGGGGAUUCCUGCACAAAACUCUUGUCAUUUCAUCCAGGAAAUAAGCGCAGAGAUGUGUGCCCAAGAUUGGCACCGGAUGUAGGUGUGGGCUCCUGGCUGGAAUACGUAGUAACCGUGCCAGCGCCAGGUUGCCAAAUCUGUCAGUGCCUUUUGGGAUCUCGUCGUGUUAUACGCAAUGCCGGCCCCAAGAACCAUAGUGUUGACGAGUUGGUACUGGUUUCCGUAAAUUUUGGCAAGGAACUCCCGCUCACGAAUUGUUCUGGAGUACAAAGAAGUCGCUGCGGCUUCUCGAUGGAACAAGUCGACCCGAAUACCUCGAGGGUAUUGGGGGCACUAGCAUCUCACAAGUACGUCUUCUAUGCGAUUGUGAUAUAUUUCGGUAGACCGGGCCGCCGCAUGACGGUCCCAAAGGAUGAGUCUCCAUAUAGCAUAACCCUCGCUCUCCGCGAUGAACCAGAUACCGGUUUUUCCCGAGGCAGCCUGGAGCACGCCCAGGGGUUGGCUAGAAAGCCACCCGUUAAGCGUGACGUCAUCGCCCCACUAUCAACUAUGAAAUCUAUCGAUGGCCGUAGCUCAGAACCUUCAGGAAUGCGAUCUGUAGGAAACUUCAUUUCCACGUGGCAUCCGAGACCAGAGGGGGACCAUGGCAACAUGUCGACGAGUCUCCUCCGUCAAGGUGUUUUCCUUCAACAAAAGUGUUCCACCGCAAUUUCCUACGGCGACAAACGCAUUCCCAGCAGCAUCCAGGUCCAGCAGAAAAAACUAUCACAACGCGGGGACGCGCAGCGGAUCGUAUAUACGAACGGUGUGGCAAGAAGUUCAUCUAAAUCAUUAGAAGCGUCUCUUCCGGAUGAGAAUUGUAAACAGGCGGUUGUUCUAUCAAGAACAAUUUUCUGGGUGCCCAGGCUUUACGAAAGCUCUCAACGAAAACAUGUCAAUCGCGCCAGUCCACAUAGCUGCGAUACCCGCUCUCCGCGUGGCCCAGGGAACAGGGAUAUUCCAUGUGGACAACUUGCGUCGCACGCGGCACUCACAGUAGUGGAGGCAGCUCCGUCACAUAUGCCCCGAUGGACCCUGAAGGCUCGCGGAAACGACGGGAGGGACCAGAAGCAGCGCCGCUGUAACCAACCCACCCGGGUUCAUUAUCCGUACAUAGAUUUAUCAAGAAUACGGUAUGUGCGUGUUCGGUUCGCGUGCGGUCAGUGCAAAUUUCAUGAGACCGGACACUUUCAGAAGAAAAUUUGUGUGCCUCUACCAAUCGCUUCUAGUCGAAGAAGGGUCCACCGCAACCGGGAGGUGACUACGCACAAUCGGCCCCAGACGAGUGCGUAUGAUCGCUGUAGGGUGGCACAGGUACAUUGCCGGGCGAGCGUGGGCGAGCUAGUUGUUCCUAAUAUGAGAACUAUCGCAGCACAAAAAACGUGGCGAAUUACGACUCCUAAUGGCAUACCGGUAUCUUUACAACGCAGUUGGAAAAACCCGAAAGAGCCUACAUAUUGCCGCAUACUCAUGGUCUUCCAAACUAAUCUAUAUUUUAAGAUUUUUUGUUACCUCGGGAUCAAAGUCCAGGCUGACCAUGCUGGCAAGGGCGACACCACGAUACUUCACAAAUUCAUUCAAAAUCUUAGUAAGACGAAAUCACCUGUAAAGGUGCUAAAACUGAAUAGAAUCUCAGCGGGUCGGUAUAGCGAGGCGUACUGGGCCUUGCGCCCAACUGACGCGUUGUUCUCGAGACCUGUCACACUGUGUCCUUUACAAGAGAAUGAGGCCACUGGGCGCCCCCGUUUAAGACCCACUACGUGCGAUAUAGUAGGGCUAGCACCGGAUCGAGAACACACCGGUACUGGGUUGCUCAGGUCCAAAGUGUCUUUUGACUACCGGCCAUUCCAUCGUACAACUGGCUUUGUGCUUCGCUACGGUUUCAGGGGCCUGCGUGGCGUAAAACAAAACGGGUGCACUUUGUCCUCACAUGUCCACGUCUUCAUAUCGAGUCAUGAUAUACAGUCCAAUUCAACACCAAAGGUUACCACCAAAGUCAGCGAAAGCUCAGCCGCAAGGUCAGAGUUUAACUUAAACUUCGGUUCAUUUUUAGUCAUCCGCUAUCUGAGUGUCAGUAAUACUGUGAGCCGGUGUGGGGUUGAGAAAGAAUGGUCUGUAGUUAUCCAGUUCAGGCCUGCCUUCCUCUACCUAACUUUCAGCUCGUCUAUUUUCGAGUCUUUGCGAUGCUCCAUUGUUAUUACAACUGCGUUACCCCCAAUAGGCAGACCGAAAAUCUUGGCCACUCCUUAUUGCCGCCUAGCGAACGGCGUCUACUGCCUGAACAGCACAAUCUAUCCGUUAAUUGUCUUGGUUCGAUUCGCUUGCUUAAGUAUAUCAGUAGCCGCAGGCCGCACCCCUAUGAGCUGUACUCAGACCCUGCGCAGUGCCGUCACCUUCCACCCCGCUACCGCAACUAAUUCCCACCGCUUACCCUUAAGUCGAAAUAAUAUUUACGACACUGCAGGAUGGUUGUCCUCCUACAGCAAACGUUAUGCGUGGAAUAAGGAGAUAUCAGGCUUCUGCAAUUUAGAAUACAAUUACGUAAUCAUUGAAGGUCGCCGCCAGGAAUGGGUUUCUACCGACUCCACCAGCAACAGUCAAGAUCGUCAGGCCAUGUCCCAGACCACGCUUGACCAUCGGGCCCUAGUGCCUCGCGCUUUUACGGACACCAAUGCCCUAACCGAUCGGCCACUUUUCGUUCCAUAUGCCACGAGCGAGAAAUUUACCCACCCGGUACCUCUGUGGUUGUGCGGAACGAUGCCCUAUCUACUCGAAAGACCCGCGGACAAUCGUUUCGUUGAUUCACGAGUGGGGGGUUCGAGGGUUGAGUUUCGGCAGCUAUCUCGGAAACGGCUUGAUUCACCCAAUGGCAUUGCCCGCUAUGACUCGCAAGAACCAGAUAUGCCGGACUGCGUCAUAGUUAAGGAUGCCGAAAGGUCAGACAUUACUAUCACGUCAUGGACGCUCUCACAGUGGACUGCUACUCGGAUAUCCGAGCGCGCAGUGAGCCUUGAUUCGUGGCAGGACUUAAUUCAUACAGUAGGAAUCAUUAGCGCUGAACGGUGGGAGGUCACCGGAAAUUCCCGCAUAAAGAACACGGGAGCCCCUAUGGCUAUCGCGUCGAGAAGCAAUAUCUGUUUUAAAAAGUUAAAACAGCAUUGCGGCUCAAUCGAGCCGAGACCAUCCCCGGAGUCUACCCUUGGUUGGCCAGCAAGGUCACACCUGAAAACCGCAAUCGCCUGUAACUACAGCGUGACAACAGUCAGUCCGAGUAUGUUGUUUACGGCCGGCCGUUGGAAUCAUGCUCUCAGCUGGGGCCCCGAAUCAGCCUUAAACGUACAAAUAAAGAUUUGUAGGGCAGCUACUGGAACCCUGGCGGCUAAGCGCAUUUCGGGAAGGACAGACCCUCAGGAUUAUCUUGCGGCCCGCCAAUCAUGCUACGUCGCUGCGUUGGUAUUUGAAAUGAUACCGGCCCUCUGA')
    

'MNASRDRSTTEAERIVVGLCYGPTTWSLYRGEQPRMVVTNPTYRSLFGMTARRQAVTYEYMTPSYDPGRGAYAFSTQISSTYTRKCSGPRPDSSAYRSEMPGGYVCLLPRADSCPLGRMNPPPGPLGHHLPRKNAGHVRSLTEVLSCSAGRAHPLSVKPGRVSSVKRELSLISSYSEAFFHVPAGCLFTTQGDLDAMSTSGKSNSPVVYWGYKARLRVAAYDVLLLACAACRLLGVYQAGTTGVRCQPPGPGHDEMGVCPVYVTETQSGCSVSFRVVNSWPRPSSAGFGNVAQGMRRGQLSKLTILTRVIYEYRHSWSNNPDQTTSRAWPPEGLYLRSNRQPPSYPYKRLSWLRRRGLTTWSRSRGLAEGMQPWFSSFASSKFLAYNWHKIASFEARTPVSRLCLLTQLKKLHLSGGKLFLFAEHMYVPSSCRIPSRFLIWQTPVRKKAHQVFPIEYHRLPSRNERHGWCMRLRQGIHDASLRIAKITCGLVKPVRSLMHIIIIYKTNGCSAASLVTIFVISRCSSTYVPSLERFAVLFSGATLPPRSEPSLIPCREFIEVSLHRLVTQIRRTADTGLDRVIPLNRCGKYCPDLSPQGPFTAYTRTLPCHPPTATRSIVNRTKGAPHEFLGITSLYPAEDETNWKTSENLQFASNDRQAANSRWFAVFSCRLVHSPRVRLDMTALVDGCPCGNEAGCVLEYRVSSVTHMHPDWVDFRSQTVHAPTMSSDMDSNENQRIYTSTSAAPNRTKDECCELLPFPSTFLLTTYVKIIDELGPLEPCDSPYKHSFRVSMKSSPQVINKEYSTRSELCYTRSGMSPRRPTRHYTDTAAGQIELSFHAWGLCGHATLIDFVPDSSRFPAIYSTRSEVCSAAPGVSPPPGGTYLPHFIYLHRSSKTAHRSLSHISLLWLTALARSPPGAEDKRQNSRDAENSESTAAPIRPASYDFPRTRVDPCTELRFQVLTGAPYHSGLQAMSPTPSTDCFHFSIQFPKTVSLYAT

In [155]:
s = '''>Rosalind_1327
CCGCCTCCTAAGTATCGAGGCTTACCATTGAACTAAGCTCTGAAACTTCGTAGTTCATTC
CCAAAGGGTGCTTGAGAATACGCCA
>Rosalind_8283
CGAATCGCCTAAGGACATGAGTGACGTTGAATTTGTTACCGTTTCACAGGGCGAAGAATG
ATACGTCACGAGTTACTTAATCTTC
>Rosalind_9389
ACAGAACTGACTTATTACAACTCTTCACCTACCCGTCTCTACCCTCTGTTCTACGTGGAT
CGCGACATTGAGCGTGAAGAGTACCTTTATC
>Rosalind_6228
TGGACTATATGACGCTGTCAACATGTAAAGGGCACAGGCCACAGCGTCCTCAGAGATCGT
GCTGCGTCATTTTGGAACGCCCCTTATTAATGA
>Rosalind_6302
AGAGATCAACGCACTACAACATGATAGGACGTACTAGTGGACCGTCAGTACGTTTCGAGC
GAGGGTGTGCCTAACATGTCAATAGGTCATT
>Rosalind_3870
CTCGATTTTGAACAAAGAGATAATCCATTGAGCTTGAGTAAGATGTGAGCTTACCGTGCA
GGAATGTACGGTTAACAGACAGGATGCACTCCTGGTTGAA
>Rosalind_4489
TTTTACACAGCGTTTCGCGTGAGGAGTTGCAGATGAAACACTCTACGGCCGAAAAGCCAC
TCTGGAATTTCATAGCATAGACACATACCTGTGACTAT
>Rosalind_5646
ATTAAGTTCCGCCGGTCCCAACTGAATACAACCAAGGTCGGGCCCTTCTTCGAATTGTCA
ACGCAGTTTCACGCAGAAGTGGGCA
>Rosalind_1302
GAAGACTAACCATCTATAGCAAGGTTCGATATCTCGTAATTGGAGTCCATAGTATTTGCT
AGAACGACATCAGAACCGTGGCGGCGACCAGTAACAC
>Rosalind_2815
TTCGATGCTGCATGCCCTTGCTACAAAAATGGATTGAGGTCGTTCAGGATTGTTCGGGAC
AAGCAAAGGTCAAAGACTTCTACATATCTGTT
>Rosalind_8052
GTCGGAGGTAGTTTGTCGGTTACAAATAGGTACCCACGGACAATGCTTAGAGGTTCTACG
TGTTAGCCCATCTCTCGATTGGAGCGT
>Rosalind_6257
CAGGATCCCCCCCTGTCGCTCGCCACGTTTCTGGTAGTCTAGGCGCTATAGCAATAGAGG
TCTGTTAGGAACGTGGGTGGC
>Rosalind_0005
GCGTATTAATCAGGCCGTGTGGGGGTCATATTGTTGTTAACTCCCGTACACAAATGCGCA
CAATAACGTCATCCTAAGCCCGTCG
>Rosalind_9051
TTCAGCTTCGGTCCCTACATCTTCACATCAGTATGGTGTTGACCGAAGGAGGATCAGAGG
TGCCCGGTTGGTCTGGATAA
>Rosalind_1579
GGCAACGAAACCTCTCCAGATGATAATTACCACACAATTGTCTTCGTAGTGATCTACTCA
CAGTTCAACGTGTTCACGACCTGTGCGCAGCGCC
>Rosalind_1471
CTCCCCTCGGTTTTTGTACCGATGGATTGCTTACCCATCGAGATAGCTGGTGCTGAAAAC
CCACTTCGTTGAAAACCGCCGCACG
>Rosalind_2137
GGATCGGTGGAGAAGCGCCTCCACGCAGCTAGTCACCGGGAGTGGCAAAACGAGGAGTGC
CCCGTGCCAGCTGATTTTCCAATCAAGAAAAGGCTGTAA
>Rosalind_1581
AGTTTCTCCCTCTTCGTACGTGACTCAGGGGCTTTGTTTTCGATTGATTGCGGCGAGGAC
GAGTAGGACATTATACTGTGATAGCTGGGTC
>Rosalind_7661
ACTTTAATTCCTCTAAACGGAAGGAGCTGGACTGAGCTTCGTGCGGACTACTGGTGTCAA
CAGTATGTCTTGGTCGACAGCC
>Rosalind_6598
TATCGAGCGACTGGTAGGTACGAATCCCGACGTGATACTGAGCTCGTGTGCGTTAGCACC
GGCTAGTGGCCGAATGCTACGGCGG
>Rosalind_1176
AACCCTGGCTCAGTGACAAGGCCTCAGACGTTTTTATGCATTGTCTCTGACCCCTTAAAA
TTTAGTTGGGTCCGATTAGGTCATCGTTTGCGCGAA
>Rosalind_0535
TTGTGCGAGCCTACGCACGCACGTCGTGGTTCCTTCGTAGTCCACGTGTCAAGCAATCAA
ACCAGGTCCATCGAGGTATACC
>Rosalind_6007
GGCGTCGCTGAAGCGATCCGCTCTATTGACGCCCAGGCACTATATTTTTTCTTTACGCAG
GTGTTCCAGTTTTAGTTTCACATGCATTAACAGTTGAAT
>Rosalind_4678
TACCATAGCGAGCCGACGAGTAGGATTGCCATAAGGGTCGATCCAACTATCTCATTACAA
TTGTCGGGTAGCCATGTTGTACGTGGAAGA
>Rosalind_7297
TCGTGCATGCTTCGTATCTCACAAGTTATGTTATTGCGAACTGGAATCGTCAACAACCTC
GAGCCGTGGACAGAACACACGGAAA
>Rosalind_1947
AGGCTACGCAAAAGTAGAACGTACCGATGCTATTCAGTGCAAAGCCGAATTCCGAGATAG
AGCCCCTAGTCTATGCATCTTTCCCGTACG
>Rosalind_9995
CGACCACGATCGAATTACGAAACTAATCTCTGCCGGGTATAACCCTCAGATAACTGGCTA
ACTTTCCAATGGTCTCTGTGTTCC
>Rosalind_3152
ATATCGGTGTGGCGCCAATATGCAGCTGGGCGAAAACTCTACCGGTCCGCTCTGTGAAAC
TAGACTAACTTACCAGTTCCGA
>Rosalind_9721
AGGGACTTTGTAGTCATAGCTCGAATTTGGGGACGCTCCTTGGCTCCATCGCACTTTGGT
AGGGTGCTTAAAGTAATATCACTAGACCACTACGT
>Rosalind_5767
ACTTACTTGTTAACGCGGGGTGGCCGGCCGCATGAGTATCGTCATGGTCTGTGGTACAAG
TACGCTGTCAGCACACCTGACGCCGAGGGCTTCGACCA
>Rosalind_4752
GTGTGCTCTACTTGTATGGTAATAACTACTAGAGGGCAAGACCTTTTTGTTCTTCATCCA
CAAGATATCGCTCTCCCGTCCCATAG
>Rosalind_0478
GCGATACTAAGCTGAGGAGCTGTGGGTCTTTGCCTCTTCACTCGTTACAGTTTTTGAGAC
GGCCGGAGGGGTGATCCAGAAAAGTAGTGAGCCAGGC
>Rosalind_3480
ATTGGAACCTTCACCGACGCTTGTTCATGCTTTATGCTCGGAGGGTTAGAAAACCGCCGG
GCTCCGACACAAGGTATTTACG
>Rosalind_6819
TGGCCCTTTGGCACTCAGGTGGTATAAAGACGAGGCAGGAGCGAGACCAAATATCGCCCC
CGTACGGATGATTTGTCTGCGCGAGCGATGCA
>Rosalind_5774
CCTATCCCAAGATGCGGGTTCGGCCTAGCCTTCTATGTAGTAAGCATTTCTATCTTCGGA
ACATGGGGTTATAGACAAAGTACCCTGGGGAACCC
>Rosalind_1399
GGAAAAACGAACTCACTATTCTAGATGCCAATAACTTTGAAAAACAACGCTGACGCATTT
TACCGAGAGGGATGCGTGATATTTTCTATATGTCTTGTT
>Rosalind_5478
TATCTCGATAAATGCCGGAAGACCTCGCTCCCCTAAATGCAGTGTATTTAACTTAAACTC
TGGTGAATGGGAGATGTCACTGAGT
>Rosalind_1838
TCTATCAAACTATCGCCATTAACGAGTAGCTCGCAACTAATTACTGACGACGTTGATATG
GGGGAGGCGACTACCGATACACGCCCCGCA
>Rosalind_7983
TGCCGAGCCGATGTAAGATACTTTGAACATCTCTAACCCAGCAGTCGGAAGAGGTCGTGG
CCCTCCTCTGCAAAGATATGCCT
>Rosalind_8420
CCAACCTTTTACCAGTTAATAGCACGGCCTATGCAACGTCAGCACAGTGGGCTTGTAGTT
GCCGTTTAAAACTACCGTATCCATCCT
>Rosalind_9292
CATGTCCAGGCTACGCGGAGCCCTAGTGATCAACACGTTCTAGCGGTCGCAACGCGCTCT
ATGAGCTGACGTTGTCGCTCGGGCAGCG
>Rosalind_1753
GGCGCTGAAGGGACCTCTTCAGTTAGGACCAGATGGACCAGCCCATTCGCTGCGATCATC
CTTTGTAAAACTTGGTACTATCTCGATGAGCGCAGT
>Rosalind_8671
TGCAAACGTTAAATCGAGTCGGCAGCGCAGTCCCCTGGGTATGTCGCTGCATTCCAGGAG
AGCGCCCTTACAGTCTTTTGGTTTTAGT
>Rosalind_8582
CGGTTAGCGGCAATCGCTGGTCTCACAGGGCGTGAATCATTAAGTCGGGGTTACTATGAA
CCTTACATGGACTTAACATATGGAGGTGTTGCTGC
>Rosalind_7869
GGAGTGGTATGTCGCCCAAAACCTAATCCTGCTTTTGTTCCGCATGGCACCAGGTCGTGC
GGCCGGCTTCAAATCTCCTACCGGATGC
>Rosalind_5440
ATGCACTTGACTGGCACATTAATCTAGGTTATGAACGTGTGTTATCCTGGGGAAGCACTT
GCAAAATAGGAGAAGTTGCC
>Rosalind_7895
GAACCGGGGTGACCAACACTCTGGAATCTCAGGTTTCCGTCCCCCTCTCACAATTTTGTA
TCTTGACCCATAGATTTCGTCTGATAGCGGACACAG
>Rosalind_1504
CAAATGCCCCCGACACACTTCTATGGAATCTGGATGGGCCCAAGTACTCGCACTAAAGAA
TGCGTCCGGTTTGATAGGGAGTTTGGATACA
>Rosalind_1255
TGCCCCCCGACTGATGCACTACAGATAACGCGTGACGTCAGGATACATTAAGAACGATAC
TTAGCCAGTGACAATAGGATC
>Rosalind_5807
CCTTAAACAGTAGTCGGATACCAGAGGAAGAGCGGGCGCAACTGCCCGGCATAGGCGACA
GCTACTCGCCATGCGGCGATT
>Rosalind_6934
AGCCTTGAACCTACCGCCTACATCCGGCAACTATAGTATATTCCTCCTCGTGTCACATAA
CATGTCATATAAATACCCCGCCGTGCACCCTG
>Rosalind_4157
CGCGCGTACCGAATTGCAACTAGTAGGGGCGTATTGGTCTGTACTTGTCGGTTTGCTCCA
CCTATACCGTTGTTGGGTTCACCGACGAGCT
>Rosalind_3295
GCTGTATCTCCGACCTAGTTAAGCGGGGGAATAGAGTTCGCCTACGTAAGGCTCGGCTAA
GTGGACGTGCATACGTTGAGTTCACCTTAGTCC
>Rosalind_6454
ATCTGGTTCTCCCGATGCGTTTCTCGACCGCCGCCCGTGCGCGCGAGTTCCGAAATGATA
GAACCAGCGTTACGACTATGTGCGGGCTTAGGTGTTT
>Rosalind_1762
GCCAAATCCTTGCTTCATGGGTAAGTGGAGAGGTACTCTTGCTTATCCCTTTCAATGAGT
TTCATATAATACGGGATCCGTTCT
>Rosalind_2430
TATTAAGCCGCAGGGGTAAACGATAGGGCGCCTCTTAACGACGGAAGAGTTACACTGCAT
CAGCTAATAGTATTGATTTT
>Rosalind_0239
GGTTCACCTTCTCAAATCTTCAAGCTACGCATGTGGGCACAAGCGTTTTCATGTTCGTCT
ATTCGCGTCTTCGATGAAAAGTGTAGCA
>Rosalind_1277
CAGGGTTCCTCCCACTGAGCGAGCAGACCTGTTGGTTCATAACTAGAACAGTTATCAGAC
GGGTACCTGTCTGAGGTGTCTCCCCAT
>Rosalind_9123
GCTGTGCCAGGCGGGTGGTACCCTGTTTCTCTTGTATCTGGAACCGCGCACAGTGGTAGG
ATTGCCTTTAACCTAGAGTACCGTAAGAGTGCTGCGCA
>Rosalind_2693
TGCTGGGGTAGGACCGCTCGATCTACGCAACACCTTAACATCCTCACGTGTATCACCAAA
GACGCTGTGCGCTTTCCACGCGGTCATAG
>Rosalind_0510
AGGCAAGACGACCAACAGTTTGATTTGTTACCATACCGCCTCCAATAACTTCCAACCGCC
ATGTTCTCTGTAGGGGGGACAAGCTTCA
>Rosalind_2100
TCTCAGCCGCTACTACCTTTTACATAGGACCGTGCTGTCTTCCGCGAGTGTACTTGCCGA
GCGCGGGACATTGGCGCTCAACCCGTAGGGTTCGA
>Rosalind_6494
ATGGTGGCGCATCACAATCGGCTCGGAATGTCAACGAGACTCCTCTACTAGCGTAGCACT
ACAACCAGCAGTAGAGTGTCACTCGTGAAGTCCGATCTAT
>Rosalind_1664
ATTGCTGACGACGGAGGGAATTTTGTAGTACGACACGACAGGTTCACTCTGTGGCCACTA
TAGCGCCTCACATCGCCGCATGAGCGGTCGATGG
>Rosalind_7726
TTCTCACCATAGGTTGAGGAGTACTGCCCGTGTGCTGTCGACCAACGACCCACTCGTTAC
TTCGGCTTGCAACTTCGCGCGTAGCCAGCCTTGTCTA
>Rosalind_3379
CCGGACACGCGTGCACCGGAGCAGCAAACCCCCCTGCGGGCTGGCCTCGCCGCAACATTC
CCGGAGCGGATCACCCGGCTCTTT
>Rosalind_5941
AGAGCGCCGCATCGATGAACGGGTCTGAGTATCGTCGTGTTAAAGGCAACAGGATGCGCT
AGCATTGCCACGTTCCTGAAGTCC
>Rosalind_9783
GTAGCCCATTTACCAGTGTTCGGGATGTTCGCACATCGAGGGCTTGTCCACCAGCAATCA
AAGTTCTTTACTCTTATATACCCCGGCGGGTCTCCAC
>Rosalind_9168
CTATGGCTGCCCCTGGTGTTCCGCGAAGCTCTTAGTTGACGTAACTCGCGGTATCACACT
CCTGCTCCCGGCTTTGCATGGTTGTTTTT
>Rosalind_2886
TTCGGAAACGGCCGGGGCTGAAATGGAGGCCCATGTCAAGTACAGGTCTGTCAGGGGATC
TGACGGACCGAGGCTCGCTTGAGACAGGGACCACCAGAGT
>Rosalind_3868
CTAACACCCAGCTGTTTCACAGGCCTTGTACAGCTGTAGTTACAAAAACAACAAAACGCC
GTAGACGGCACAAACCTCGTGATCGTCAAATAAAGTT
>Rosalind_3053
GCTGGCAGGTGATACTGCTGCACACCCCGACACAATTAGCAAGTGCTCGAGCCCTAGGAC
CGGTTCATAGGTGATCGTACCGCTTTTCA
>Rosalind_2870
GTCCAACAACCATTATAGATACAATAGTCTATTCGCAATTGGCGGTACATGAGTTAAGAT
TACACAGGTGCGCGAACATGTCGCATTCCGTGTATGG
>Rosalind_1190
GCCATCCGCGATCGGCAGGCCGAACATATACACGGTCCTAGGAGCAGCCAAAAAAGACCT
GGCGCAGGTCATCCATTCGTTATCATGA
>Rosalind_2759
AATGGTAAGTATCTAGTCCTTATGGGACGTGCCCTTTTTGATTGCCTGCGACGTAAGATA
AGTAGGGCTTATTGCGGGTGTGCGCCCTCACCGGAG
>Rosalind_9098
GGGAAGCATGACGGAAGACGTGCGGGCGACCTATAAATGGGCGGTTCTGATGGACTAGCG
GGCGCACTGCAGCATGGTCGCGCAGAAGGGGCATTC
>Rosalind_2436
TAGCGTCATTTGAGGCGTCCTCTACCCGATTCGGCGCATGAGTTGGCCGTCATACGCTAA
TGGTGCGGTAACTACGGAGGACAGCAATG
>Rosalind_5383
TCGCCACACTTACGCCCTGAGCTCAGGTGACTAACCGCGGGGATGTGCCTTCCCTATGCG
ATCTGTTTTTTTCACGTTGAAAGTACCTCATT
>Rosalind_9256
GCAGAGGAAACCTCCCTCACCCAGAAATCTGACGTACCCTTTAGTAGCATGAGGCCACGC
TGCTAGTCAATTGATGGTAGTACGATCCTCGATATT
>Rosalind_7727
CGAATTGATAACAGCAGGCGTTCTCGTCGCATCGACGACTGAAGATTTACGGAACGTCTT
TAGAGCTCTCCAGAGCTACTAT
>Rosalind_2363
CTTCGTATACCCTCCCACATACAGTAATCTGGTCTCATAGCACGATCAGAGTGGGCCTTC
ACTAGATTAATAGGAGAGTACGTAACGC
>Rosalind_4306
CTTGAAGCGACCTTGAGCCATCCCCACCTTAAAAATCGCGAATGATCAGTTAGCGGTGGC
TCTGGTCTTTGGAACGTGACAT
>Rosalind_0029
CAGTGAGCGAGGTGGCCGGGCTTCCCACCTAATCCCTGCATTAGGATAATGCAGCTCTGT
TAATCTGTTCTTGGTTGTAATCGTTTAACTG
>Rosalind_4164
GTTGATATGTCTTAAGCTTGCGTACAACACCGCGATGAACGGCTATCGTCACCTGTGAGT
TGACCTGCTGGTAGGAGCTCCATACAGCA
>Rosalind_0093
TACAGATGGCTTGGCCGGTCAAGCGAGCTGGACGGGAATGCCTCAGTCGTTACAGTCACT
AGGCCGAATTCCAAAGATCGCGGATTAA
>Rosalind_7122
GATAATATCAGAAACCTCCAGTCTCTATAAGGCTGCGCGAAGCCCGCCCGGTTCGCGCTG
GACAGTCCTAAGTGAACAGACCAGCAGAGGCTG
>Rosalind_7385
GCGGATTATACAGCAACCTGAGTCTGATCTCTTAGGAAGCCAGTAATAAGAACCTCAGTT
AATATGCATACTTAATGCCCTAAGGGGAGGA
>Rosalind_2158
CGCTATTTAGCTAGCTAGGCGACCCCCTTCCAAAAGCTGCGTTGAATAAAGGAGCACTGT
CATACCTGCCCAGTTTGAAAAGGTATCACAGT
>Rosalind_8877
GGCCCCTCAAAGCCCACACTCATGGACACACCCATAAGCACTCTCTAACCCTGCAGGAAA
CCATAAACAGGAGGGTTGTGCGTA
>Rosalind_0933
GCAAGGTGAAAGATAGTCGGTGTGCTGTCACGTTACTATTAGCACTAGACTCCCATGGGC
TGCCAGAAAGATGAACACGTGACTAGATCG
>Rosalind_0010
ATCATGCACAGAAATTGTCTAATGGCTGGCATTACTGATCCGGTTTCCGATCCTACTCAG
GGACATCAATTGCGGGGCGCGC
>Rosalind_6900
AGGTGCAAGTGACAAACGTTAGAGTAGCGGCCGAGTCTTGTCGCAGCGGACTTGAGGTAC
GTTGATGCTACAATAAGAGGGGTAGGCCTTTTGGCCG
>Rosalind_2196
CCTCAGCTTATCATTGCGATCGTTTCAATAGTGAGCCTCTAAGGTCGTGCTAAACGTTGA
GAGATGGGATTATGATGGCCGTA
>Rosalind_1057
GCTGTGACTAGTCTATCGGTTGAAGTACGGGTTGCACCATGGTATACTGCCTAAAACGTC
CCTTCTAAATAGAATCCTGGCTTCCCCCATCGG
>Rosalind_6930
TCAGAAGCTTAGTTAGACTTCCTTGATAGAAGGTTGCAGACAAGGTTCGGCCAAATTAGT
ACCGGGGCTTCATGGGGTAGTCTTGCC
>Rosalind_5326
TGTAGGGAACCCCGCCTGCTCTGGAACTATGGTCCTATAATTGTTAAGTGCGCCGCTGCG
CTTGTCAAGCTTTCGTCTACCAT
>Rosalind_0865
GTACGTGTTAATTCTCATACGCAACGCTTGTCAGGGTCTGATCTTTAAAAGTTATACCAA
CGTTATACCCTGATAGTACACCC
>Rosalind_5124
TCCCTTTTCGATTCCCAGATGACGTTTGTTGTTATCTAGTTGGCAGTATCCTCTGCCCCA
TCGGCGATTGGTTTTGGAAGTTTACCAGTGTTGACCA
>Rosalind_6895
CAAGCTTCCTCTTATGGGTCTTAGTCGTAGATGCTTACAGTTGAGAATGATCCCAGGGTA
TGAAGACTTTCCCTTCTAAGCGAGACAAGCGCGGGCGC
>Rosalind_8709
TGAACTCGGTTATGAAACCTTACGTTTGACAAGTAATTCCTTGTTAAGGCGCCCCTGAGA
ATCCTTATCAAAGCGGTAAAAAATCCCACAATAATTTGA'''
ans = overlap_graph(s)
for i in ans:
    print i[0][1:] + ' ' + i[1][1:]

Rosalind_0239 Rosalind_0933
Rosalind_2100 Rosalind_7727
Rosalind_5383 Rosalind_1664
Rosalind_8420 Rosalind_5774
Rosalind_9123 Rosalind_0933
Rosalind_1581 Rosalind_8052
Rosalind_9098 Rosalind_2886
Rosalind_5807 Rosalind_1664
Rosalind_2196 Rosalind_9783
Rosalind_6895 Rosalind_4157
Rosalind_1838 Rosalind_0933
Rosalind_6819 Rosalind_0933
Rosalind_1504 Rosalind_9389
Rosalind_0478 Rosalind_1579
Rosalind_4157 Rosalind_3053
Rosalind_2886 Rosalind_1581
Rosalind_0510 Rosalind_6930
Rosalind_8283 Rosalind_2886
Rosalind_6930 Rosalind_1762
Rosalind_5646 Rosalind_0933
Rosalind_5807 Rosalind_5646
Rosalind_4489 Rosalind_2430
Rosalind_3152 Rosalind_7727
Rosalind_9292 Rosalind_0005
Rosalind_9098 Rosalind_7726
Rosalind_9389 Rosalind_0010
Rosalind_8582 Rosalind_1255
Rosalind_7385 Rosalind_7869
Rosalind_7661 Rosalind_1762
Rosalind_8877 Rosalind_0865
Rosalind_5326 Rosalind_9292
Rosalind_5383 Rosalind_3480
Rosalind_2693 Rosalind_2436
Rosalind_0010 Rosalind_2158
Rosalind_2158 Rosalind_1581
Rosalind_5440 Rosali

In [23]:
s1 = 'CCACGCTGCGTCACGTTCTTGTATTCAATAGTCTCGAGGTGTGCTTGTCTCATTGCAAGTACTTTTTCGCCGCGCTAATGCTCTTGAAGGAACGAGAATTTTGGGTTGTGCGATGGCCGGCTTCGGCCTTCACAAATCTGAAGCCTTAATTACCTAACTAGCTTTACTGAACTGAGGGCGAGTTCAGCATACAATCGGGTACCGCTAGGGGATCTGAGGTGTACCTAGGGGGGTGTGATCGTCTAATCGTAGTGTCCGAAGCAAGATCATTGTGCTCCCGTGATCATACGTAAGTTGGTCATCCACATGTGCCATGCCATCCGTTGTTGTCGGCGCTCTTCATTTTTGATGTTGTTCCCTACTCGAGGACGATTCGTTAGCAGCTTCACTCCGTGCAGCTCGACTCCATCATGCACGGGGCGTACGGACCGACCTACTCCCTAACTGATGTCGTATTGCCGACGTTCTCCTGCTAGGAATACTGCCGCCAGTAATATACCAATCATCACTCCGCAACGACCGACGGACTAATTTATCCCCCTGTAACGGCCGTACGTAGGATCTGGGGGAATTCCGATTTGGGACGTAGAGACCACATCTTGCGTGACTGGCAAGGGGAGTAATTAAACCGTTTTCTAAACTCCCTAGAGCGTCTATGGTCTGTCGCGCTCATTCACCCCGACCCGGACTTGTCTACACAAGCCCGTCGTGCCCAGATTTCGTCTTATGGAGTCCCTAAGGATCGTAGATTACTCGTTCCAACTATACCTTAGAGGACGGAGTTTCGAGTTAAAAATGTACTAGATAGCAAATTACGTGCGCTCTTGGTCAGCAAAGAAGGACTAAATGCAATGCAGGCTCAAAATATCCATTACTGAAAGACTTGTCTATCGCGTATTCTCCGATGAGCTCCTTACCCTAGATGACGAAAGTAGTCGTTTCCAAGCA'
s2 = 'TCTAGCGGCTACACTTACCTGCTGTCTATAATATCTAGGTGCCCGTGACTTTCGGCACGCCTGGCGGCGTCACGCTCAGGTTTGGTCTGGTGAGATTGCATTAGATTGAGCTCGGAGCATGTGCGACACCCACTAATGTGAAACAGTAAAGACCGTAGCCGTGTCATGGTATCGTAGTTGAACTCAGTATACTGCCGGAAGAGAGTAGGGAGTTACAGTTACGCCTGGCGGGGTGAGATCTACATATTGTCTCTTCAGGGGCACTATCCGTGTGATCGCGTGATCCAGGAGTAGTGGGTAGGCCTAAGGTGTCATGCTCGACCTAGGTGTGACTGCTGTTAAATCCAGTTTATGCCCACTACGCGTCATTGCTGAGTTAGTAAATAGTCTCCTAGGAGATCGTGTCCATATTCTGTTATCATCACAGATCTTACTTCGCGCTGTCGTCCGACGTCTGTCTCCACGTGGTATGCGTGGACACTTAGTGCCCCTAAAATTCGGCGCATAACTCCGTGGACATCGGGCGACGACCGCTTTTTCCTCTAGATGCGGTTGATGGGCTTAGGGGCAGTAACAAATCCTTTCGTAGATGTCTCGACGCACATTAGGGGCAAGGACAATCAAGAAAAAGTGTGATTATGCCACCATACCTTTTCTTGTGCACCGGGAGTCATCTCACCGAGCACGACTTGTCCCCGCAAACCCGTCGCCATGCGATTAATTGTGATCGTGATCAGCAGGCTCGCGAGCCAGTCTGCCTAGATGCCCCCAAGATGATTGAATTTCGGAGGTATTTCGAACTATATAGGAGTTTATGAGCTCCATCGCTCCGTAAGTAAAAAGTCAGCAATAGAGATGCGCTGCACGTTCCTAGCTGATGGAAAGACCTCTGAGATTAGATCCCATATTATGCAACGCGTCGTTAACGAAAGTCGGCGTCTCCAATCA'
hamming_disstance(s1,s2)

472

In [13]:
s = 'GCCTTTGCTGCTAAATTGATACTGACATACGTCTAATGGCACCTGATACGACTTTACGGGTCACAAATGTTAGTTCTTAACCATGTGAGAACACCACAAGTGTTGATAATCCATCTAGATGGGAGACCTCCTCGCCTCCAATGTGATACGGAGTGTGGGTCCAGTATAACAGCATCATATTGTCGCGCTCCAGCGGCAGCCAGTTTGAAATCGGACTGTCTCTGATGATCTCCGGGGAGTGGTTACCGATACACATCAATGCAAGACGTTCCATTCGACGATGAAGGGGTTATTTCTACCTTACTGGCATTACACCATCCCTATCACTGGTAACGCCTGGATTTTAAGACAAATACCGGCACGAGAAAACCCTGATATACCTCGCCAGTGGAGAACTCGGCTGTATATCCACCTGACTAGCTGAGCGACTGCGCATCGTCATTTGTGAGCCGGAGTCCTAGTTCGAAACTCTCGTTCCCGCTAACTGGCTAAATCATGCCTATTGCCTGCTCATATAGACCAAACAGCCCAGACGTCAAACGCCTAAACGGCGTCTGAGGTTGGCCCTACGAAGTATTTGCGCGTGCGTTGGTGGCGTCCAGCCTAGAATTCATCCAGCGTAAGGCTGCGAATAGCCACTAAAAAGGAGGCACCATACACCCTCGTTATTTGTCAATCAGCCGCAATTTACAGCTTTCCGCCTACGGTTGGTGTGTCCTAGAAGTAAAGAGATAGGACAAAATCATTGAACCGAAAGGAAGTGGCTTGGCCGAAGTAGATACATATATACACACGCGAACCGCGGGGGCCGGGCCCAGGATGCCATGACCACCGGGACGCAATTCTTTGTATCGCGCCATTGGTGGGCCCCCGAACATCAACAGTCCAAAACTCTGTTATAACGGCGAGAAAGGATTTTGATCGCGCTCCAGGTATTGACAGATGCGTCCAGCCGGAGGAGCTGATAAACGTCTCCATGTCTGAATATGT'
counting_nukleodits(s)

{'A': 262, 'C': 254, 'G': 238, 'T': 236}

In [17]:
s = 'CCATCGATAGAGGAGAACGGAAGAGGGCATAAGCCAAAGGTAAGACCTAGGGCTGTGGCAGGTATACGCGTCAATATGAACAACAAGATGTCGTGATTCCAGCCCACCTATGCCATGCTCTGTTCCCCGTCGATCTGATCAGTCCTAACAATGGCTTACGGGACTGATCGTAACTTTTACTATGATTGCGACATAGATCGAGGTATATGCGCTATCATGCTCCAATTAGACGATGTTTTCTGTTTTCAATTGTGTTCGGATTGGCGATAGCGGAATGGGAACGCAATTCAGCACCTCATTCAGATTTTCTTATGTAAAGACCCAATAGTGGTAGTGGATTTGTAGTCTCCATTGTGTTACTTTCCCAACAAAATTCTGGGAATTACGACTCTGCTCCAGCAGCGTCCAACAATGATATCCCGCTCGTTTTACAACGGGTGTGATGGCTCTATGTAGCTACTGACGTGTGTTGGGCCCCCGTTTCCGTGTGAGACTACGACACCTTTGAATCCGCAAGTTCCGACATTCACTCACAGTGCTACGGGAAGTCAGTAGTATCTTTGTCCGCCGCCTAGCACGGAGACTCTCGATAAAGATAGTTGGAGAAATTTAGCGATACCAACCCCGCCTTTTTATCCGCCGTTCGAGCCTGGTCCGTATAAGCCATTACTTGTGCCCTCGGATGGTGCGAATGCTTCTACTCATCAATATACTCATATCTTAGAAGAGGAAAAACACGTCATGCAGGTTACCGTCCCCCGTAATCGATCATTTCTCGCTCGAATGCTGATCGATGAAGGTAACCGAGTTGGGAATCTTTTTGGCGTCACCTGATGCCAAGGCGGTCTTTAGGTCCCAAACCGCTTTTGACCTGCCATCCCTTATCAAGCATAAAGCTTGATGCGTCGCTCAACGATATCGTCACAATTTGCCGATGCCATCGCTTACTGCT'
transcribing_DNA_into_RNA(s)

'CCAUCGAUAGAGGAGAACGGAAGAGGGCAUAAGCCAAAGGUAAGACCUAGGGCUGUGGCAGGUAUACGCGUCAAUAUGAACAACAAGAUGUCGUGAUUCCAGCCCACCUAUGCCAUGCUCUGUUCCCCGUCGAUCUGAUCAGUCCUAACAAUGGCUUACGGGACUGAUCGUAACUUUUACUAUGAUUGCGACAUAGAUCGAGGUAUAUGCGCUAUCAUGCUCCAAUUAGACGAUGUUUUCUGUUUUCAAUUGUGUUCGGAUUGGCGAUAGCGGAAUGGGAACGCAAUUCAGCACCUCAUUCAGAUUUUCUUAUGUAAAGACCCAAUAGUGGUAGUGGAUUUGUAGUCUCCAUUGUGUUACUUUCCCAACAAAAUUCUGGGAAUUACGACUCUGCUCCAGCAGCGUCCAACAAUGAUAUCCCGCUCGUUUUACAACGGGUGUGAUGGCUCUAUGUAGCUACUGACGUGUGUUGGGCCCCCGUUUCCGUGUGAGACUACGACACCUUUGAAUCCGCAAGUUCCGACAUUCACUCACAGUGCUACGGGAAGUCAGUAGUAUCUUUGUCCGCCGCCUAGCACGGAGACUCUCGAUAAAGAUAGUUGGAGAAAUUUAGCGAUACCAACCCCGCCUUUUUAUCCGCCGUUCGAGCCUGGUCCGUAUAAGCCAUUACUUGUGCCCUCGGAUGGUGCGAAUGCUUCUACUCAUCAAUAUACUCAUAUCUUAGAAGAGGAAAAACACGUCAUGCAGGUUACCGUCCCCCGUAAUCGAUCAUUUCUCGCUCGAAUGCUGAUCGAUGAAGGUAACCGAGUUGGGAAUCUUUUUGGCGUCACCUGAUGCCAAGGCGGUCUUUAGGUCCCAAACCGCUUUUGACCUGCCAUCCCUUAUCAAGCAUAAAGCUUGAUGCGUCGCUCAACGAUAUCGUCACAAUUUGCCGAUGCCAUCGCUUACUGCU'

In [22]:
s = 'CCTTGTGTAATAACACATCATGCCTGCATGAGCTCCTAATATGGTTATTTTGATGACCTGGCAATGGAGTTACCGATTTACGCGAACGCTCTCGAAAGTCCATCCACTCGTTATGATTTCTGATGAAGGTCCGGATACCTCGCACCATAATTGACGTTACAACGCCCGACGAATGCTCGTAACTTTGTGCGTTAAAAGCTCGGCGCTACTGATCTAGTCAGCGCATGGGTTCTCTTTGATCAGCATAGCCGAACCCGAATTTCTGTCTCTGACAATTTCACTGAGTTCAAATAGCCTGGCTTGCACAACAAGGAACGGGAATACTCCGGCATCCTAAATCATTCTCTAATTTAACTCGTAAGCTGTGCTAGCCATGGTAGACTCGTTGGAACCAACGAAGCCATGTAGCGCCAGGGCGCATGCAGTGCAAGGTAACAAGCTGTCCCGGCCTGAGGGCTGTCGGCCATTGGTTCCGACGTACATTCCCGAGCAGATAGGCTCCACACCGTTCTTATCAACTTCGCGGAGTAGTTGAGGTTAAGCTGGAACCCTAAAAGTATTCCAACAAATTGTTTGGGAGACAATAACTTGCCAACGAGTAGGCGCTACCTCACTCATCGGAATCGGATTATTTAATGGCTAGAACGATCCTCTGTGTGGTCTCTAAGCGTTCGGAATACAACGAGGACGTAGAAACGATGACTAGTACTTAAGGCGTGTACCGTTTAAGTCTTCTCTATCCTGGACCGGTGAGTTTGAGGAGCACGACTATGCTTTGCGAAGCGAGAACTATCCCCACTCCAAACTTGACGAGTGTCAGATCACCGCGACGTAGATAGGAATCTTAGGAAGATCGACGGATAGAAGCCCACTCTACTCGTAAATGATCCTGCAGGGAACGGTCCAAATTCTACTGAGGTGTCCTAATGATTAGATGTGTACTACTGTAGCCCAGGGGCCAGGTTGTACCGATTCATAAATGACGGG'
complementing_DNA(s)

'CCCGTCATTTATGAATCGGTACAACCTGGCCCCTGGGCTACAGTAGTACACATCTAATCATTAGGACACCTCAGTAGAATTTGGACCGTTCCCTGCAGGATCATTTACGAGTAGAGTGGGCTTCTATCCGTCGATCTTCCTAAGATTCCTATCTACGTCGCGGTGATCTGACACTCGTCAAGTTTGGAGTGGGGATAGTTCTCGCTTCGCAAAGCATAGTCGTGCTCCTCAAACTCACCGGTCCAGGATAGAGAAGACTTAAACGGTACACGCCTTAAGTACTAGTCATCGTTTCTACGTCCTCGTTGTATTCCGAACGCTTAGAGACCACACAGAGGATCGTTCTAGCCATTAAATAATCCGATTCCGATGAGTGAGGTAGCGCCTACTCGTTGGCAAGTTATTGTCTCCCAAACAATTTGTTGGAATACTTTTAGGGTTCCAGCTTAACCTCAACTACTCCGCGAAGTTGATAAGAACGGTGTGGAGCCTATCTGCTCGGGAATGTACGTCGGAACCAATGGCCGACAGCCCTCAGGCCGGGACAGCTTGTTACCTTGCACTGCATGCGCCCTGGCGCTACATGGCTTCGTTGGTTCCAACGAGTCTACCATGGCTAGCACAGCTTACGAGTTAAATTAGAGAATGATTTAGGATGCCGGAGTATTCCCGTTCCTTGTTGTGCAAGCCAGGCTATTTGAACTCAGTGAAATTGTCAGAGACAGAAATTCGGGTTCGGCTATGCTGATCAAAGAGAACCCATGCGCTGACTAGATCAGTAGCGCCGAGCTTTTAACGCACAAAGTTACGAGCATTCGTCGGGCGTTGTAACGTCAATTATGGTGCGAGGTATCCGGACCTTCATCAGAAATCATAACGAGTGGATGGACTTTCGAGAGCGTTCGCGTAAATCGGTAACTCCATTGCCAGGTCATCAAAATAACCATATTAGGAGCTCATGCAGGCATGATGTGTTATTACACAAGG'

In [77]:
data = '''>Rosalind_3721
GGGTTGCTTGGGACGGTGAGTTATGCTCCTGTGGCGTTGACGGAATGGTAGTGTTTTAGG
CCATTCACTCCTGCCCAGTCCATTGTTGTAATACGGCACTCTTATACGGACGACGGAGGC
CAAGGGATTGCCGAATGCACTGAAGTGCCCGATGTCGTTATGATCCCTCATGCTAGGGGG
TCTCCCGGCCGCTACATGTGAGTGGCTACCTTACCTGATAACTCCTCGGCGGGAACAGAT
TCCCAATTAGCCACATCCACGCGCATCTTGCTCTTGCACTCTAAATGAGACTGCTTGCCC
GACGGACTGGAGGTAACATTCTATAGCGTTAATATACGGTGTAGTCCGTCAGAAGTGTAC
GTTGTTGTTATGCGCTTCAGGCGAACACCGTACTGGTGCGCTGATGTGGAAATCCAAATT
TGATATGATCAAATCAGGATTTATTGCTTACAACATCATTTAAAGGCCATACTGCTGTAT
GACAGTTTTAAACCTCCAAAAACGTTAGGGGCTATGGAAGAATTCGCTGAATTTCAGTGT
CAACGGTTTCCTTTGGCACGGCGCTCGATAGCATGCAGGTGCTTCTCGACCGAATACTGG
AACATAACGCCATTTAATGTTTAATCTAAGTCACATATGTTTTCATATCTCCATTAGAAT
AACTACTTAACTAACTTCGAGAAGGCGCAACGAGCGACAGGTACTGACGGATGCTTCCAG
CTCCACAAGGCTCCATGGCGCGTGTCGACAAGAGGATTCCATCGCAAAATAAATTTTCAG
TTCCCGGAAGGGCGCGGATTGGTCCTGCCTTATGCAACCCTAATAAGGACAAGTTACCGC
AGAGGGACGTACCACTTAATCAAGAGTGAATCTCGACTAAAAGCACCAATCACCTAAGGA
GCAGGACTCAAGCGTGTCCACATAGGTTCCGA
>Rosalind_0453
GTGAGTATACATTTATGCTCTCCCTATCCTTACTGGCCGAACCATGTCAATGGGTCCTAA
GCTCCTGCTACTAATACGCGTCGCTGAAGTTCCACCGACTCCTTCCCCTAATTTGCTGCC
AACGAAAATTTCCTTCTAAGGAAAGGTCTCAGATGAACGACACCAGGCGAGTAGATCTGC
ACCTCATGATTACGAATCGGGATTGGGTATACCCCTCGTTCCTTGGCTATCCTCGCGTAT
TGGCACGCTGCTCAATAACTCCAGCTGGGAGTACAAGATCCGACCATTGAAGCCTTTCAA
TTGGAGCTTCAGGCATCGCCGGGTATGCTGTTGAAAGGTACCAGCTAGTGTCTAAAACAG
CCGACTCCTATGTGCTCATCAGTACAGCCGGTCACAGTGCACCATAATCGTTTGCGACCG
GAAGACCTAGCTTAGGGTTGCCGCTCAAAGGTCCATGATGTCTAGATCCTACGTTCGAAG
TCCACCTGTCGATACAGGGTTTCTCCGTTATAGACTACACTGAGGAGGTTCACTGGGTAA
CGTTAACAACTCAAGTGGTCTCCGATCGGGTCCGTGCGGCACCAGGTGTCCCTTCTGAGC
TCCTACTGATCCGCCATGGACCGGAGGTGCTTAAAGGGATTCTCCTATCTTACCCGACTG
TAACCGAGTTGTGTTTCGGAACTATGGCATCTACAAGCTGCAACAGCTTAACATTTAACC
TGCCCTTGGCCTCGATAGAGTAAACCTTTTTGGGATCCTGGGTGCGGTAATACAAATTTC
CCTTTAGCTTCGGGCTTGACGGCCGCCAAGCTGCAATTCAAGATAGGAAAACATGGTAGC
TGCTAATCTTAGGATAGACAGGAAACGGTGAACCCCCCACACTTCTTGTCGGT
>Rosalind_0769
TCTGACCATATTGTTAGTAGCCAGTGCATGTCGGCAGGGGATGACCGCAAGTCTCTCGTG
ATAAGACGTACACACACACATGCTCTTAGCGCGCCAGACGAACGAGTTTTGAGGATCGAC
CAAATAGGGGTGCATATGTACGGGCAATACGGGTACAATGGGTCGAGGTCTACTTCCAGT
CTCAGTCATGTTAAAGGTCTGAGACTAATCGGGCTATCAACGCATACCGACCAGTACTCC
CCTACGCAAGAGATTAATAGAAACCCTACACTACCGAGGCGAAGGTAATCTGCGTAGCGA
GTTCACAAATGCACTCCAACTTGGTTATTGAGTCCGCAACTATCAGGACCTTTGGAGCCT
CCACTTTGCCTGAGACTGGCGAGGACAGATACTAAGGAAAACACCAACCCAGGTCACAAA
GCTATTTGCGGTATATGAGCGGGAGGTCGCATCGATCGGACTGGTCGAGCACCATCTACA
GCGGAGGTCAATGTAAATGGTGGGGCACGGGATGGCCAATGCGTCCGCGTAATAGCCCTC
GCGGGGATGGCATGACGAATAGGAACTACATTTCACCCATGAAACTCTAGGCGGTATTGG
GTCAAGGCCGGTAAGGAACACCTGACTTGAGGTACGCTCATCGACCTGTGCAGGAGATGT
CCCCACCAGCACTGATGACCGACTCGTGCTATCAAATACGAGCCACCACGTATCACGGCA
CCTATATCTCGTCTTCCCGTCTCCACCATTAAGGGAACTTTCATACAGTATGCCTAACTT
GATGCGGGCCCGCTCCGGGCTACTACCGTTGTAACTTCCGTTTACGCGAGCGGAACAGAT
TACCGGATCAGGTTCCCAACAAGAATAGTG
>Rosalind_7062
AACAGTCTCGCGCGGTAAAGACTGGATTACGCTGTGCCTCTCCGAGGATCTCCTGCTGTC
CATCCGATATGGCCAGCTAAACGCCCCGCATTGACTACCCCCTAAGGTAGCCCAACTTCT
TATCAAAGGCTTCATGGGAGCGTGCGGTCCTAAGGACTCGACGCTAAGAACCCGTCCCAG
GTCAAGGCTAGGGATTTCACACTTCTTGCCAACCGTCCTGAATGTAGTAGCCCCAGTTCC
TGTCAAACATAGGGAAGTATCACAACGCGGCTGCTAGCCCGTGGATAACCGGATAAATTC
GCGAGATCCAACCAAAAATTACACCGGGGATTGACACGCCAATAGCCCACTAAGACACAC
CAATTGGGACTTGTGGATACTAACGAATCCGCCTGGCCAGATTTGACTAATTTGCGAGGC
ATGATCATCGGAACGCTGACCACTGAAAGCGAACGGGGCGGCAGTGGTAAAAGACTTTTC
ATTCACGCCTAGGGCTCAGCGCCCGAGTATAAGAGGCGCTATTGTCCCAACAGTCGTAAA
GCTCTGGCAGCGAGTCGTGAGACGTCCCGCGTTGAAACTCAACCGGTGTAGGCTCTCTGA
GAACATATATGGATCAGTATTACTATCTGAAGCCATCTGATCCTCCATTTTCAACCACTA
CCTTTTAGACGTGGGTTCATCATTCCTGTGGGGAAACAACAGCAATTCAATACAGCCGCT
TCCTCGATCGCTATAGCAGGGGTAGAGCAATTTAATGAACAGGTTTGATCAGCAGCGGAG
TTGTGCGACGAGTAGGCGCACCCTTGCCGTGTGGTAATTAGGGTTGACAGGAGAAGCCTA
AATAGTATCCAGTAGATAAGGTAGGCTCAGCTATTGACGCCCATGCTTTAAACGGGTGCT
ACATAATTGTTAAAGAGTAATCCCACTTGTCACGAAGGCCTTCTCCCAAGCCAGAGATTA
AAGTTTCAGATGCGCTGATTTT
>Rosalind_7064
TTGAGACCGCCGTTGCTGACCACGGAATTGACCACTTATGCCCTATTTAGAGGCTCTTGG
TGTCGAGTCGTTGCCAAAGTTGAACGCTGAGAGTTCCAGTCCAAGTGTTAGAACTACGAC
AGCGCGCTCACACAACCTCTGGACAGCTTACGTCCGGGCCTCTACATGGCCAGTTCAAAG
GGGTGCCCACGTTCATGCGTATGACCACCTTCAAGCACTTCTGCAATCGCTTGTCCACTG
AGGTAGAAACTATGAGCGATGCGACTAGCCGGACGCATCAACACATCCTTCCAATATATT
ATTTGGCGCGGACAAGCCCTGCTCTCTGCAACGGGGAACGATCGATTTGCCGGTCTGTAG
CTCAGCTCCAAAATTTTCACTTCCGACATTCGGGGGGAAGACGTCTCGCACTCAGGCTAA
TCCGGTTAGACTCGTATACAGCTACGCTCCTGGCGCACGCCGCCCGTGGAAATTCTCTAA
ATACGTTGGGCCGAAGATAAAGTTATCGCAGATGGGTTTACCCCAACAAATCTTACATGT
GAGACACGCTTGTGCTGTTGAGATCTTCAGCGCCTTAAACCATCGTAGGAGGCCCAGCGG
AGCCTTCTGCCCACGGTGAGTCTCAGTCGGGAGATTACATGGAGGGCTGTTTTGAAGTTT
ACAATTAGCATTCATGCGGACGCCCCGTAATTTTACTGCCCTTATCACGGCCAGCTTGCG
CGTCAGTGGCTGGTCTCCTCTAATGCGAACGAACTGGGAAGTATCGGTTTGTAGGTCCGT
TCAGATTAAGTCGCAACGCGGGGTCACTGCGTGTTCACAAGTCCTAATACCTTAGGAGGC
TAACTGCGAGGCAAACTAACACGCGTAAACGGTTTGGTATTGACCCCGCCCTGGAG
>Rosalind_8735
GATAAGGAGAGATTCGCTGGCGCATGAGTGCTTCTGTCAACGAGTGACTCGTGGGACTGT
CAACCCCGCGTGGTAGGTACAGAATTGGCAGTTCGGAGGGCATAGTGACAATCAGCAAAT
TCGTCCCGCATTACTCAGTCTAACTACGAAGGGGTAAAAGGGATTGGCTATCGAATCTTT
GTAGAGGCCTAACATTGTGCGGAATGACTTACGTAGAGGGTGGTTTTAGGGCGTCTCCTA
TGCCGCTTCGCGGTATTGAAATACCACAAAAACCGGCGCGGGTGCAGGCATACCCTATTG
GTCGTTAACGGGCTCTATTAGCAAACATGCACGAGCCCTAGGTGTCTAAGAAGTGTCATT
TGGCCTAAGAGCGGTTTTTACGTCCTATGTTGCCCCGCGTCGCCATCTGGGGCTGGTGTA
ATAGGGTCCTTCTGCCCGATACTGATGTATTTAGCTTCCATGTCCCACCCACACACATCG
TACGAGCCGTTAATTTATGTGGGGCGCCGCACTCTTTGTAACGCGGCACCTGGTCTGGTC
TTGCATTATTATGAGTTAACGATAGCGTCACAGTCAAATCGGGAAGCGCAGCACTAAATT
GTCTTCGTCATTGGTTAACCCCACAGCTCATAGACGGTTTCTACACTAGGTTGTCGGCGC
GGTACCACCTGTATTAGTGCAATCCTAAATAAGTTCTCCGACCTATCTCTCACCCTATAT
TCTCCAGGTCGTCGTTTTTTCAGGCCAAAGCTCCCGAAGCATCTTTGGGAGAGTTGCAGT
CTCACAAAATAATCTAGGGCCTTAGGCCGGGTGCAGCTTCCGCTGCAATCATACATGGCG
CATTGCTAATACACGCA
>Rosalind_7686
TTACTGCTGACTGGTTGATACAATCAAGGAAAACCCGGGCCTGCTAAAAATGGCATTTGC
GGAGCGTAAGGTGAACCCCTTTGACGTGCGACGGTAAGAGAATTTATTATCTCCGGGAGC
GACCCGGTTGTATAGCGGCCTACAGGGAGAATAGGCTTTTTTAGAATTGACTGGCAGCAG
TTTGAAGGTGTACGACCTCTCGCGACGAAACCGTCCCAATATACGGGGAAGTGCGACACG
GGATGCTCACCAGAGCGACAGGTTAACTGGTAGGCATAGGTGAAAGCGCACTAAGGGACT
AGGCAACTCAAGTGTTCCTATCGTCACAGCCCTCCGGACGAATTCTCTTGAAGCACTTCG
CCTATGCGATGGGGTCCCCCCCTATCCGGCATGCGTGACCTGGTTCTTAAGCAATTTTAA
GCCGCTCAACTGCCGCCAATTCCCCGACACCGAATAATTACGAGCAGAACACCCCTCGGA
GGCCTGAACACCGTCCAAACCTCAGGGGGCGCTCGGAGTTACTCGCATGATGCTCGAATA
CCAGGGGTACATTGCTATACAATCGTCTTGACCCCAGATGCTCGTACATACTATGCAAGA
GCTGCACAGATCAAAATTTGGATGCTTTTCCAGAGAACCTTCGGCTCCGGGACGCGTTAA
TATAATAGTCGTCCATAATTTCGCGTCCCCATACCATCGCTTGCAGTTATTAGTCACCGT
GCATCGCCGGGTATAACAACATTTCAGGACTTGTAAAGTAGGTGTACGTTTAACTGACCG
TACGACCGTAGGGTTCACAAAGCCGAAGTACT
>Rosalind_9418
CAAACGCAGAAAAGACATTGTGTATGACGGCTTGGGCACTATGACCAAACCCTTCGTGTT
AAAATGGGGCCACCTGTAGCGGCGTTCACTCAAGTCCGTCCGGTGGAACTGCTGAGGACT
TTGCTGGGGGAATGCAGTTACCGTTTGATCAAAAACGAGTTTCTCAAACAACCAGCCAGC
GTTAGTTCGATTCTAGTGACCTGCCTCTAAACTAACGCCTGAACCGTCATGGCGAGGGGC
ACAGGTCAACCATCCCTATTTTCTAATCCAGGGCCCTGATTCGTCCGAAGTAGCACGCGG
GGGGAAACAATTCATTGTGTTCTTCGCAGCCCTTGCACCAGGGCTGTGGTTGGAGCCATG
CTAACAGCGCGATGCAATCGCAAGACTGGTCCTTACGTCCTAAGTAAATTATCCTCATGC
TCACCGTATTGCCCGAGAGGACAGGTTACACGACCGAAATCCTCCGATGCACCCTATCGC
CCATACGTTCAACTAAGCGGAGTGTCTGTAAGGTGGCCCCTGCGTCGCAGAGCGTTCTCG
AACCTTACCAATGGCCAGGCTTGTACTTGACAGGACGAGTATTAGGAACATTAGAATAGA
CGACGCATGCGCGATATTTCCCATATGAAATGCTCTCCGACACGCTCTCTCTCCTCTTGT
AGAATATGTGCCCTACGACAAGGGGGACACATGGCAACGGGTTAGTCCAGGATATGGTGT
TGGTTACTTAGATACAACGTCGATATGATCGCCGCTATTGACTTCAACCATTGTACTAGG
GTCTGAGCAATCGACTGCGTCAATCCGTCTAGACCTATTTTTGGGACAAACTCGCGCAGA
GAACGGAGTGAGGGGTGAGGTGGTTATGTCAAACCGACTGAACAAAAAAGGGGAATGCCT
CTTTATGCACAGAGTAGTGCTCGCTATGCCGGGGTGCCGCGA'''

d = fasta_to_dict(data)
res = {i: cg_content(d[i]) for i in d.keys()}
maximum = max(res, key=res.get)
print maximum
print res[maximum]

>Rosalind_7064
52.34375


In [84]:
s = '''>Rosalind_56
ATTAGACCTG
>Rosalind_57
CCTGCCGGAA
>Rosalind_58
AGACCTGCCG
>Rosalind_59
GCCGGAATAC'''
min_super_string(fasta_to_list(s))

'ATTAGACCTGCCGGAATAC'

In [93]:
text = 'ACTGCACTCTGCACTCAGCTGCACTCTTGACTCCACTGCACTCCTTATGCACTCCAGATCTGCACTCGACTGCACTCATGCACTCTGCACTCTGCACTCTGCTGCACTCCGCCTGCACTCATGCACTCCATGCACTCCAAGCTGATGAGTAATTGCACTCTTTGCACTCCGCGATTTACCCGTCTATGCACTCTGCACTCATACTTTGCACTCTCGAGAGCATACGGCTTGCACTCGATTGCACTCTCGTGCACTCCACAGTGCACTCCCTGCACTCTGCACTCATCTGTTGCACTCTGCACTCCGCTGCACTCTACGCTGCACTCAAATGCACTCTTTGCACTCTGCACTCTGCACTCACGATGCACTCTGCACTCCTGCACTCTGCACTCCAAATCTGCACTCTGCACTCGCGTGCACTCTGCACTCTGCACTCTGCACTCATGCACTCGAAGTGCACTCTGCACTCTTCGTGCACTCTCGTGCACTCTGCACTCCTTGCACTCCTGCACTCCGCTGCACTCTGGTCGACTGCACTCGTAGATGTTATATGCACTCATGCACTCGTGCACTCATCTGCACTCGACAAGTTCCTGCTGCACTCCGTCCTGCACTCTGCACTCCTTGTGCACTCTGCACTCATGCACTCGTGCACTCACGAGTGCACTCACATGTGCACTCTGCACTCTGCACTCTGCACTCTGCACTCTTACGTGCACTCTGCACTCTGCACTCCTGCACTCTGCACTCTGCACTCTGCACTCACTGCACTCTGCACTCGACTGGTGCACTCTGCACTCTGCACTCCGTCATGCACTCTGATGCACTCTGCACTCACTGCACTCTGCACTCAATGCACTC'
sub = 'TGCACTCTG'
[i +1 for i in finding_Motif_in_DNA(text, sub)]

[3,
 79,
 86,
 93,
 187,
 271,
 291,
 339,
 346,
 364,
 379,
 399,
 416,
 423,
 430,
 456,
 484,
 518,
 610,
 628,
 675,
 682,
 689,
 696,
 715,
 722,
 737,
 744,
 751,
 767,
 787,
 794,
 813,
 823,
 839]