# Genome Identifier

### Identificador de aminoácidos por uma sequência de nucleotídeos e plotagem de gráficos quantitativos

In [6]:
# Definições de códons associados a proteínas.

codons = {
    'GCU': 'A', #alanine
    'GCC': 'A',
    'GCA': 'A',
    'GCG': 'A',
    'CGU': 'R', #arginine
    'CGC': 'R',
    'CGA': 'R',
    'CGG': 'R',
    'AGA': 'R',
    'AGG': 'R',
    'AAU': 'N', #asparagine
    'AAC': 'N',
    'GAU': 'D', #aspartic_acid
    'GAC': 'D',
    'UGU': 'C', #cysteine
    'UGC': 'C', 
    'GAA': 'E', #glutamine_acid
    'GAG': 'E', 
    'CAA': 'Q', #glutamine
    'CAG': 'Q', 
    'GGU': 'G', #glycine
    'GGC': 'G',
    'GGA': 'G',
    'GGG': 'G',  
    'CAU': 'H', #histidine
    'CAC': 'H', 
    'AUU': 'I', #isoleucine
    'AUC': 'I',
    'AUA': 'I',
    'UUA': 'L', #leucine
    'UUG': 'L',
    'CUU': 'L',
    'CUC': 'L',
    'CUA': 'L',
    'CUG': 'L',
    'AAA': 'K', #lysine
    'AAG': 'K',
    'AUG': 'M', #methionine_start
    'UUU': 'F', #phenylalanine
    'UUC': 'F',
    'CCU': 'P', #proline
    'CCC': 'P',
    'CCA': 'P',
    'CCG': 'P',
    'UCU': 'S', #serine
    'UCC': 'S',
    'UCA': 'S',
    'UCG': 'S',
    'AGU': 'S',
    'AGC': 'S',
    'ACU': 'T', #threonine
    'ACC': 'T',
    'ACA': 'T',
    'ACG': 'T',
    'UGG': 'W', #tryptophan
    'UAU': 'Y', #tyrosine
    'UAC': 'Y',
    'GUU': 'V', #valine
    'GUC': 'V',
    'GUA': 'V',
    'GUG': 'V',
    'UGA': 'stop', #stop
    'UAA': 'stop',
    'UAG': 'stop',
}

hidrofobico = ['G','A','V','L','I','M','F','W','P']
hidrofilico_neutro = ['S','T','C','Y','N','Q']
hidrofilico_negativo = ['D','E'] 
hidrofilico_positivo = ['K','R','H']

In [7]:
import matplotlib.pyplot as plt
from datetime import datetime
import plotly.graph_objects as go

In [8]:
# identificar o códon de start. pode receber uma sequência típica DNA ou RNA.
def tratamento_str(string):
    '''Elimina os números e espaços de uma sequência, além de deixá-la em maiúscula'''
    
    pre_t = ['\n', ' ', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
    for i in pre_t:
        string = string.replace(i, '')
    return string.upper()

def neg_transcription(code):
    '''Recebe uma fita de RNA negativa e retorna uma fita positiva '''
    
    pos_code = ''
    
    for i in code:
        if i == 'A':
            pos_code += 'U'
        elif i == 'U' or i == 'T':
            pos_code += 'A'
        elif i == 'G':
            pos_code += 'C'
        elif i == 'C':
            pos_code += 'G'
        else:
            pos_code += 'X'
    
    return pos_code
    
def onde_esta_aug(code, DNA=False):
    '''Recebe uma string com uma sequência de nucleotídeos, identifica o códon de start e retorna a sequência de RNA a partir 
    dele. Pode receber como argumento uma sequência RNA ou DNA.'''
    
    if DNA:
        code = code.replace('T', 'U') # Transformar a sequência de DNA em uma sequência RNAm equivalente
        
    i = code.find('AUG') # Identificar o códon de start (metionina, AUG)
    
    return '' if i == -1 else code[i:]

# identificar os aminoácidos a partir dos códons
def id_aminoacidos(seq):
    '''Recebe uma sequência de códons e transforma em uma sequência de aminoácidos.'''
    
    aminoacidos_sequenciados = '' # Guardar em uma string as ocorrências
    resto_seq = ''
    
    for i in range(0, len(seq) - 2, 3):
        cdn = seq[i:(i+3)] # pega os códons de 3 em 3
        if cdn in codons.keys():
            if codons[cdn] == 'stop': # para a tradução
                resto_seq = seq[i+3:]
                return aminoacidos_sequenciados, resto_seq
            else:
                aminoacidos_sequenciados += codons[cdn]
        else:
            aminoacidos_sequenciados += 'X' # Um aminoácido não identificado.
            
    return aminoacidos_sequenciados, resto_seq

# calcula a quantidade de ocorrências de cada aminoácido
def ocorrencias_aminoacidos(seq):
    '''Calcula a quantidade de ocorrências de uma substring em uma string e retorna um dicionário.'''
    
    qtd_ocorrencias = {x : seq.count(x) for x in seq} # cria um dicionário relacionando cada amino com a qtd de ocorrências
    return qtd_ocorrencias

# plota um gráfico de ocorrências e agrupa por cores a partir da natureza polar/apolar do aminoácido
def grafico_ocorrencias(dicio, nome='occurrences'):
    '''Plota um gráfico interativo com a ocorrência de cada tipo de aminoácidos a partir de uma sequência de aminoácidos.'''

    # Dicionários para acumular os valores de cada grupo
    hidrofobico_valores = {}
    hidrofilico_neutro_valores = {}
    hidrofilico_negativo_valores = {}
    hidrofilico_positivo_valores = {}
    outro_valores = {}

    for i in dicio.keys():
        if i in hidrofobico:
            hidrofobico_valores[i] = dicio[i]
        elif i in hidrofilico_neutro:
            hidrofilico_neutro_valores[i] = dicio[i]
        elif i in hidrofilico_negativo:
            hidrofilico_negativo_valores[i] = dicio[i]
        elif i in hidrofilico_positivo:
            hidrofilico_positivo_valores[i] = dicio[i]
        else:
            outro_valores[i] = dicio[i]

    fig = go.Figure()

    # Adiciona os traços para cada grupo
    if hidrofobico_valores:
        fig.add_trace(go.Bar(
            x=list(hidrofobico_valores.keys()), 
            y=list(hidrofobico_valores.values()), 
            marker=dict(color='orange'), 
            name='Hidrofóbico'
        ))

    if hidrofilico_neutro_valores:
        fig.add_trace(go.Bar(
            x=list(hidrofilico_neutro_valores.keys()), 
            y=list(hidrofilico_neutro_valores.values()), 
            marker=dict(color='blue'), 
            name='Hidrofilico Neutro'
        ))

    if hidrofilico_negativo_valores:
        fig.add_trace(go.Bar(
            x=list(hidrofilico_negativo_valores.keys()), 
            y=list(hidrofilico_negativo_valores.values()), 
            marker=dict(color='red'), 
            name='Hidrofilico Negativo'
        ))

    if hidrofilico_positivo_valores:
        fig.add_trace(go.Bar(
            x=list(hidrofilico_positivo_valores.keys()), 
            y=list(hidrofilico_positivo_valores.values()), 
            marker=dict(color='green'), 
            name='Hidrofilico Positivo'
        ))

    if outro_valores:
        fig.add_trace(go.Bar(
            x=list(outro_valores.keys()), 
            y=list(outro_valores.values()), 
            marker=dict(color='black'), 
            name='Outro'
        ))

    # Define os títulos dos eixos e a legenda
    fig.update_layout(
        xaxis_title='Aminoácidos',
        yaxis_title=nome,
        legend_title_text='Grupos de Aminoácidos',
        title_text='Ocorrência de cada tipos de aminoácidos',
        barmode='group'  # Para agrupar as barras lado a lado
    )

    fig.show()

def grafico_porcentagem(dicio, nome='porcentagem'):
    '''Plota um gráfico interativo com a porcentagem de cada tipo de aminoácido a partir de uma sequência de aminoácidos.'''
    
    hidro_fobico = 0
    hidro_filico_neutro = 0
    hidro_filico_negativo = 0
    hidro_filico_positivo = 0
    outros = 0
    
    hidrofobico = ['G','A','V','L','I','M','F','W','P']
    hidrofilico_neutro = ['S','T','C','Y','N','Q']
    hidrofilico_negativo = ['D','E'] 
    hidrofilico_positivo = ['K','R','H']
    
    for i in dicio.keys():
        if i in hidrofobico:
            hidro_fobico += dicio[i]
        elif i in hidrofilico_neutro:
            hidro_filico_neutro += dicio[i] 
        elif i in hidrofilico_negativo:
            hidro_filico_negativo += dicio[i]
        elif i in hidrofilico_positivo:
            hidro_filico_positivo += dicio[i]
        else:
            outros += dicio[i]
    
    labels = ['Hidrofóbico', 'Hidrofílico neutro', 'Hidrofílico negativo', 'Hidrofílico positivo', 'Outros']
    sizes = [hidro_fobico, hidro_filico_neutro, hidro_filico_negativo, hidro_filico_positivo, outros]

    fig = go.Figure(data=[go.Pie(labels=labels, values=sizes, hole=.4)])
    fig.update_layout(
        title_text='Porcentagem dos tipos de aminoácidos',
        annotations=[dict(text='Aminoácidos', x=0.5, y=0.5, font_size=20, showarrow=False)]
    )
    
    fig.show()
    
    return sizes

In [9]:
def genome_identifier(string, isTimine=False, isNeg=False):
    '''Código principal, utiliza as funções auxiliares anteriores. Recebe uma sequência (de DNA ou RNA) e retorna os aminoácidos correspondentes, além de um gráfico por tipos e por porcentagem'''
    
    aminoacidos_identificados = ''
    lista_proteinas = []
    gen_code = tratamento_str(string)
    
    if isNeg: # Se a fita for RNA negativa
        gen_code = neg_transcription(gen_code)
    
    while gen_code: # enquanto o código genético contém bases
        sequence_starter = onde_esta_aug(gen_code, isTimine)
        aa_sequence, gen_code = id_aminoacidos(sequence_starter)
        aminoacidos_identificados += aa_sequence
        lista_proteinas.append(aa_sequence)
        
    dict_occurrences = ocorrencias_aminoacidos(aminoacidos_identificados)
    grafico_ocorrencias(dict_occurrences)
    porcentagem_aa = grafico_porcentagem(dict_occurrences)
    
    return dict_occurrences, lista_proteinas, porcentagem_aa

In [10]:
# Exemplo
COD1 = '''1 acgcttaaca accagatcaa agaaaaaaca gacagcgtca atggcagagc aaaaatgtaa
       61 cacctctaca atggatgccg acaagattgt attcaaagtc aataatcagg tggtctcttt
      121 gaagcctgag attatcgtgg atcaatatga gtacaagtac cctgccatca aagatttgaa
      181 aaagccctgt ataactctag gaaaggctcc cgatttaaat aaagcataca agtcagtttt
      241 atcatgcatg agcgccgcca aacttgatcc tgacgatgta tgttcctatt tggcggcggc
      301 aatgcagttt tttgagggga catgtccgga agactggacc agctatggaa tcgtgattgc
      361 acgaaaagga gataagatca ccccaggttc tctggtggag ataaaacgta ctgatgtaga
      421 agggaattgg gctctgacag gaggcatgga actgacaaga gaccccactg tccctgagca
      481 tgcgtcctta gtcggtcttc tcttgagtct gtataggttg agcaaaatat ccgggcaaag
      541 cactggtaac tataagacaa acattgcaga caggatagag cagatttttg agacagcccc
      601 ttttgttaaa atcgtggaac accatactct aatgacaact cacaaaatgt gtgctaattg
      661 gagtactata ccaaacttca gatttttggc cggaacctat gacatgtttt tctcccggat
      721 tgagcatcta tattcagcaa tcagagtggg cacagttgtc actgcttatg aagactgttc
      781 aggactggtg tcatttactg ggttcataaa acaaatcaat ctcaccgcta gagaggcaat
      841 actatatttc ttccacaaga actttgagga agagataaga agaatgtttg agccagggca
      901 ggagacagct gttcctcact cttatttcat ccacttccgt tcactaggct tgagtgggaa
      961 atctccttat tcatcaaatg ctgttggtca cgtgttcaat ctcattcact ttgtaggatg
     1021 ctatatgggt caagtcagat ccctaaatgc aacggttatt gctgcatgtg ctcctcatga
     1081 aatgtctgtt ctagggggct atctgggaga ggaattcttc gggaaaggga catttgaaag
     1141 aagattcttc agagatgaga aagaacttca agaatacgag gcggctgaac tgacaaagac
     1201 tgacgtagca ctggcagatg atggaactgt caactctgac gacgaggact acttctcagg
     1261 tgaaaccaga agtccggaag ctgtttatac tcgaatcata atgaatggag gtcgactgaa
     1321 gagatcgcac atacggagat atgtctcagt cagttccaat catcaagctc gtccaaactc
     1381 attcgccgag tttctaaaca agacatattc gagtgactca taagaagttg aataacaaaa
     1441 tgccggaaat ctacggattg tgtatatcca tcatgaaaaa aactaacacc cctcctttcg
     1501 aaccacccca aacatgagca agatctttgt caatcctagt gctattagag ccggtctggc
     1561 cgatcttgag atggctgaag aaactgttga tctgatcaat agaaatatcg aagacaatca
     1621 ggctcatctc caaggggaac ccatagaagt ggacaatctc cctgaggata tggggcgact
     1681 tcacctggat gatggaaaat cgcccaaccc tggtgagatg gccaaggtgg gagaaggcaa
     1741 gtatcgagag gactttcaga tggatgaagg agaggatcct agcctcctgt tccagtcata
     1801 cctggacaat gttggagtcc aaatagtcag acaaataagg tcaggagaga gatttctcaa
     1861 gatatggtca cagaccgtag aagagattat atcctatgtc gcggtcaact ttcccaaccc
     1921 tccaggaaag tcttcagagg ataaatcaac ccagactacc ggccgagagc tcaagaagga
     1981 gacaacaccc actccttctc agagagaaag ccaatcctcg aaagccagga tggcggctca
     2041 aactgcttct ggccctccag cccttgaatg gtcggccacc aatgaagagg atgatctatc
     2101 agtggaggct gagatcgctc accagattgc agaaagtttc tccaaaaaat ataagtttcc
     2161 ctctcgatcc tcagggatac tcttgtataa ttttgagcaa ttgaaaatga accttgatga
     2221 tatagttaaa gaggcaaaaa atgtaccagg tgtgacccgt ttagcccgtg acgggtccaa
     2281 actcccccta agatgtgtac tgggatgggt cgccttggcc aactctaaga aattccagtt
     2341 gttagtcgaa tccaacaagc tgagtaaaat catgcaagat gacttgaatc gctatacatc
     2401 ttgctaaccg aacctctcca ctcagtccct ctagacaata aagtccgaga tgtcctaaag
     2461 tcaacatgaa aaaaacaggc aacaccactg ataaaatgaa ctttctacgt aagatagtga
     2521 aaaattgcag ggacgaggac actcaaaaac cctctcccgt gtcagcccct ctggatgacg
     2581 atgacttgtg gcttccaccc cctgaatacg tcccgctaaa agaacttaca agcaagaaga
     2641 acaggaggaa cttttgtatc aacggagggg ttaaagtgtg tagcccgaat ggttactcgt
     2701 tcgggatcct gcggcacatt ctgagatcat tcgacgagat atattctggg aatcatagga
     2761 tggtcgggtt agtcaaagta gttattggac tggctttgtc aggagctcca gtccctgagg
     2821 gcatgaactg ggtatacaag ttgaggagaa cccttatctt ccagtgggct gattccaggg
     2881 gccctcttga aggggaggag ttggaatact ctcaggagat cacttgggat gataatactg
     2941 agttcgtcgg attgcaaata agagtgagtg caaaacagtg tcatatccgg ggcagaatct
     3001 ggtgtatcaa catgaactcg agagcaggtc aactatggtc tgacatgtct cttcagacac
     3061 aaaggtccga agaggacaaa gattcctctc tgcttctaga ataatcagat tatatcccgc
     3121 aaatttatca cttgtttacc tctggaggag agaacatatg ggctcaactc caacccttgg
     3181 gggcaatata acaaaaaaac atgttatggt gccattaaac cgctgcattt catcaaagtc
     3241 aagttaatta cctttacatt ttgatcctct tggatgtgaa aaaaactatt aacatccctc
     3301 aaaagactca aggaaagatg gttcctcagg ctctcctgtt tgtacccctt ctggtttttc
     3361 cattgtgttt tgggaaattc cctatttaca cgataccaga caagcttggt ccctggagcc
     3421 cgattgacat acatcacctc agctgcccaa acaatttggt agtggaggac gaaggatgca
     3481 ccaacctgtc agggttctcc tacatggaac ttaaagttgg atacatctca gccataaaaa
     3541 tgaacgggtt cacttgcaca ggcgttgtga cggaggctga aacctacact aacttcgttg
     3601 gttatgtcac aaccacgttc aaaagaaagc atttccgccc aacaccagat gcatgtagag
     3661 ccgcgtacaa ctggaagatg gccggtgacc ccagatatga agagtctcta cacaatccgt
     3721 accctgacta ccactggctt cgaactgtaa aaaccaccaa ggagtctctc gttatcatat
     3781 ctccaagtgt ggcagatttg gacccatatg acagatccct tcactcgagg gtcttccctg
     3841 gcgggaattg ctcaggagta gcggtgtctt ctacctactg ctccactaac cacgattaca
     3901 ccatttggat gcccgagaat ccgagactag ggatgtcttg tgacattttt accaatagta
     3961 gagggaagag agcatccaaa gggagtgaga cttgcggctt tgtagatgaa agaggcctat
     4021 ataagtcttt aaaaggagca tgcaaactca agttatgtgg agttctagga cttagactta
     4081 tggatggaac atgggtcgcg atgcaaacat caaatgaaac caaatggtgc cctcccggtc
     4141 agttggtgaa tttgcacgac tttcgctcag acgaaattga gcaccttgtt gtagaggagt
     4201 tggtcaagaa gagagaggag tgtctggatg cactagagtc catcatgacc accaagtcag
     4261 tgagtttcag acgtctcagt catttaagaa aacttgtccc tgggtttgga aaagcatata
     4321 ccatattcaa caagaccttg atggaagccg atgctcacta caagtcagtc agaacttgga
     4381 atgagatcat cccttcaaaa gggtgtttaa gagttggggg gaggtgtcat cctcatgtaa
     4441 acggggtatt tttcaatggt ataatattag gacctgacgg caatgtctta atcccagaga
     4501 tgcaatcatc cctcctccag caacatatgg agttgttggt atcctcggtt atccccctta
     4561 tgcaccccct ggcagacccg tctaccgttt tcaagaacgg tgacgaggct gaggattttg
     4621 ttgaagttca ccttcccgat gtgcacgaac ggatctcagg agttgacttg ggtctcccga
     4681 actgggggaa gtatgtatta ctgagtgcag gggccctgac tgccttgatg ttgataattt
     4741 tcctgatgac atgctggaga agagtcaatc gatcggaacc tacacaacac aatctcagag
     4801 ggacagggag ggaggtgtca gtcactcccc aaagcgggaa gatcatatct tcatgggaat
     4861 catacaagag cgggggtgag accggactgt gagagctggc cgtcctttca acgatccaag
     4921 tcctgaagat cacctcccct tggggggttc tttttgaaaa aaaacctggg ttcaatagtc
     4981 ctccttgaac tccatgcaac tgggtagatt caagagtcat gagattttca ttaatcctct
     5041 cagttgatca agcaagatca tgtagattct cataataggg gagatcttct agcagtttca
     5101 gtgactaacg gtgctttcat tctccaggaa ctgacaccaa cagttgtaga caaatcacgg
     5161 ggtgtctcag gtgattctgc gcttgggcac agacaaaggt catggtgtgt tccatgatag
     5221 cggactcagg atgagttaat tgagagaggc aatcttcctc ccgtgaagga cacaagcagt
     5281 agctcacaat catctcgtgt ttcagcaaag tgtgcataat tataaagtgc tgggtcatct
     5341 aagcttttca gtcgagaaaa aaacagtaga tcagaagaac aactggcaac acttctcatc
     5401 ctgagaccta cttcaagatg ctcgatcctg gagaggtcta tgatgaccct attgacccaa
     5461 tcgagttaga ggctgaaccc agaggaaccc ccactgtccc caacatcttg aggaactctg
     5521 actacaatct caactctcct ttgatagaag atcctgctag actaatgtta gaatggttaa
     5581 aaacagggaa tagaccttat cggatgactc taacagacaa ttgctccagg tctttcagag
     5641 ttttgaaaga ttatttcaag aaggtagatt tgggttccct caaggtgggc ggaatggctg
     5701 cacagtcaat gatttctctc tggttatatg gtgcccactc tgaatccaac aggagccgga
     5761 gatgtataac agacttggcc catttctatt ccaagtcgtc ccccatagag aagctgttaa
     5821 atctcacgct aggaaataga gggctgagaa tccccccaga gggagtgtta agttgccttg
     5881 agagggttga ttatgataat gcatttggaa ggtatcttgc caacacgtat tcctcttact
     5941 tgttcttcca tgtaatcacc ttatacatga acgccctaga ctgggatgaa gaaaagacca
     6001 tcctagcatt atggaaagat ttaacctcag tggacatcgg gaaggacttg gtaaagttca
     6061 aagaccaaat atggggactg ctgatcgtga caaaggactt tgtttactcc caaagttcca
     6121 attgtctttt tgacagaaac tacacactta tgctaaaaga tcttttcttg tctcgcttca
     6181 actccttaat ggtcttactt tctcccccag agccccgata ctcagatgac ttgatatctc
     6241 agctatgcca gctgtacatt gctggggatc aagtcttgtc tatgtgtgga aactccggct
     6301 atgaagtcat caaaatattg gagccatatg tcgtgaatag tttagtccag agagcagaaa
     6361 agtttaggcc tctcattcat tccttgggag actttcctgt atttataaaa gacaaggtaa
     6421 gtcaactcga agagacgttc ggttcctgtg caagaaggtt ctttagggct ctggatcaat
     6481 tcgacaacat acatgacttg gtttttgtgt atggctgtta caggcattgg gggcacccat
     6541 atatagatta tcgaaagggt ctgtcaaaac tatatgatca ggttcacatt aaaaaagtga
     6601 tagataagtc ctaccaggag tgcttagcaa gcgacctagc caggaggatc cttagatggg
     6661 gttttgataa gtactccaag tggtatctgg attcacgatt cctagcccga gaccacccct
     6721 tgactcctta tatcaaaacc caaacatggc cacccaaaca tattgtagat ttggtggggg
     6781 atacatggca caagctcccg atcacgcaaa tctttgagat tcctgaatca atggatccat
     6841 cagaaatatt ggatgacaaa tcacattctt tcaccagaac gagactagct tcttggctgt
     6901 cagaaaaccg aggggggcct gttcctagcg aaaaagttat tatcacggcc ctgtctaagc
     6961 cgcctgtcaa tccccgagag tttctgaagt ctatagacct cggaggattg ccagatgaag
     7021 acttgataat tggcctcaag ccaaaggaac gggaattgaa gattgaaggt cgattctttg
     7081 ctctaatgtc atggaatcta agattgtatt ttgtcatcac tgaaaaactc ttggccaact
     7141 acatcttgcc actttttgac gcgctgacta tgacagacaa cctgaacaag gtgtttaaaa
     7201 agctgatcga cagggtcacc gggcaagggc ttctggacta ttcaagggtc acatatgcat
     7261 ttcacctgga ctatgaaaag tggaacaacc atcaaagatt agagtcaaca gaggatgtat
     7321 tttctgtcct agatcaagtg tttggattga agagagtgtt ttctagaaca cacgagtttt
     7381 ttcagaagtc ctggatctat tattcagaca gatcagacct catcgggtta cgggaggatc
     7441 aaatatactg cttagatgcg tccaacggcc caacctgttg gaatggccag gatggcgggc
     7501 tagaaggctt acggcagaag ggctggagtc tagtcagctt attgatgata gatagagaat
     7561 ctcaaatcag gaacacaaga accaaagtac tagctcaagg agacaaccag gttttatgtc
     7621 cgacatatat gttgtcgcca gggctatctc aagaggggct cctctatgaa ttggagagca
     7681 tatcaaggaa tgcattttcg atatacagag ccgtcgagga aggggcatct aaactagggc
     7741 tgatcatcaa gaaagaagag accatgtgta gttatgactt cctcatctat ggaaaaaccc
     7801 ctttgtttag aggtaacata ttggtgcctg agtccaaaag atgggccaga gtctcttgcg
     7861 tctctaatga ccaaatagtc aacctcgcca atataatgtc gacagtgtcc accaacgcgc
     7921 taacagtggc acaacactct caatctttga tcaaaccgat gagggatttt ctgctcatgt
     7981 cagtacaggc agtctttcac tacctgctat ttagcccaat cttaaaggga agagtttaca
     8041 agattctgag cgctgaaggg gagagctttc tcctagccat gtcaaggata atctatctag
     8101 atccttcttt gggaggggta tctggaatgt ccctcggaag attccatata cgacagttct
     8161 cagaccctgt ctctgaaggg ttatccttct ggagagagat ctggttaagc tcccacgagt
     8221 cctggattca cgcgttgtgt caagaggctg gaaacccaga tcttggagag agaacactcg
     8281 agagcttcac tcgccttcta gaagatccta ccaccttaaa tatcagagga ggggccagtc
     8341 ctaccattct actcaaggat gcaatcagaa aggctttata tgacgaggtg gacaaggtgg
     8401 agaactcaga gtttcgagag gcaatcctgt tgtccaagac ccatagagat aattttatac
     8461 tcttcttaac atctgttgag cctctgtttc ctcgatttct cagtgagcta ttcagttcgt
     8521 cttttttggg aatccccgag tcaatcattg gactgataca aaactcccga acgataagaa
     8581 ggcagtttag aaagagtctc tcaaaaactt tagaagaatc cttctacaac tcagagatcc
     8641 acgggattag tcggatgacc cagacacctc agagggttgg gggggtgtgg ccttgctctt
     8701 cagagagggc agatctactt agggagatct cttggggaag aaaagtggta ggcacgacag
     8761 ttcctcaccc ttctgagatg ttggggttac ttcccaagtc ctctatttct tgcacttgtg
     8821 gagcaacagg aggaggcaat cctagagttt ctgtatcagt actcccgtct tttgatcagt
     8881 catttttttg cacggggccc ctaaaggggt acttgggctc gtccacctct atgtcgaccc
     8941 agctattcca tgcatgggaa aaagtcacta atgttcatgt ggtgaagaga gctctatcgt
     9001 taaaagaatc tataaactgg ttcattacta gagattccaa cttggctcaa actctaatta
     9061 ggaacattgt gtctctgaca ggccctgatt tccctctaga ggaggcccct gttttcaaaa
     9121 ggacggggtc agccttgcat aggttcaagt ctgccagata cagcgaagga gggtattctt
     9181 ctgtatgccc gaacctcctc tctcatattt ctgttagtac agacaccatg tctgatttga
     9241 cccaagacgg gaagaactac gatttcatgt tccagccatt gatgctttat gcacagacat
     9301 ggacatcaga gctggtacag agagacacaa ggctaagaga ctctacgttt cattggcacc
     9361 tccaatgcaa caggtgtgtg agacccattg acgacgtgac cctggagacc tctcagatct
     9421 tcgagtttcc ggatgtgtcg aaaagaatat ccagaatggt ttctggggct gtgcctcact
     9481 tccagaggct tcccgatatc cgtctgagac caggagattt tgaatctcta agcggtagag
     9541 aaaagtctca ccatatcgga tcagctcagg ggctcttata ctcaatctta gtggcaattc
     9601 acgactcagg atacaatgat ggaaccatct tccctgtcaa catatacggc aaggtttccc
     9661 ctagagacta tttgagaggg ctcgcaaggg gagtattgat aggatcctcg atttgcttct
     9721 tgacgagaat gacaaatatc aatattaata gacctcttga attgatctca ggggtaatct
     9781 catatattct cctgaggcta gataaccatc cctccttgta cataatgctc agagaaccgt
     9841 cttttagaga agagatattt tctatccctc agaaaatccc cgccgcttat ccaaccacta
     9901 tgaaagaagg caacagatca atcttgtgtt atctccaaca tgtgctacgc tatgagcgag
     9961 aggtaatcac ggcgtctcca gagaatgact ggctatggat cttttcagac tttagaagtg
    10021 ccaaaatgac gtacctaacc ctcattactt accagtctca tcttctactc cagagggttg
    10081 agagaaacct atctaagagt atgagagata acctgcgaca attgagttcc ttgatgaggc
    10141 aggtgctggg cgggcacgga gaagatacct tagagtcaga cgacaacatt caacgactac
    10201 taaaagactc tttacgaagg acaagatggg tggatcaaga ggtgcgccat gcagctagaa
    10261 ccatgactgg agattacagc cccaacaaga aggtgtcccg taaggtagga tgttcagaat
    10321 gggtctgctc tgctcaacag gttgcagtct ctacctcagc aaacccggcc cctgtctcgg
    10381 agcttgacat aagggccctc tctaagaggt tccagaaccc tttgatctcg ggcttgagag
    10441 tggttcagtg ggcaaccggt gctcattata agcttaagcc tattctagat gatctcaatg
    10501 ttttcccatc tctctgcctt gtagttgggg acgggtcagg ggggatatca agggcagtcc
    10561 tcaacatgtt tccagatgcc aagcttgtgt tcaacagtct tttagaggtg aatgacctga
    10621 tggcttccgg aacacatcca ctgcctcctt cagcaatcat gaggggagga aatgatatcg
    10681 tctccagagt gatagatttt gactcaatct gggaaaaacc gtccgacttg agaaacttgg
    10741 ctacctggaa atacttccag tcagtccaaa agcaggtcaa catgtcctat gacctcatta
    10801 tttgcgatgc agaagttact gacattgcat ctatcaaccg gataaccctg ttaatgtccg
    10861 attttgcatt gtctatagat ggaccactct atttggtctt caaaacttat gggactatgc
    10921 tagtaaatcc aaactacaag gctattcaac acctgtcaag agcgttcccc tcggtcacag
    10981 ggtttatcac ccaagtaact tcgtcttttt catctgagct ctaccttcga ttctccaaac
    11041 gagggaagtt tttcagagat gctgagtact tgacctcttc cacccttcga gaaatgagcc
    11101 ttgtgttatt caattgtagc agccccaaga gtgagatgca gagagctcgt tccttgaact
    11161 atcaggatct tgtgagagga tttcctgaag aaatcatatc aaatccttac aatgagatga
    11221 tcataactct gattgacagt gatgtagaat cttttctagt ccacaagatg gtggatgatc
    11281 ttgagttaca gaggggaact ctgtctaaag tggctatcat tatagccatc atgatagttt
    11341 tctccaacag agtcttcaac gtttccaaac ccctaactga ccccttgttc tatccaccgt
    11401 ctgatcccaa aatcctgagg cacttcaaca tatgttgcag tactatgatg tatctatcta
    11461 ctgctttagg tgacgtccct agcttcgcaa gacttcacga cctgtataac agacctataa
    11521 cttattactt cagaaagcaa gtcattctag ggaacgttta tctatcttgg agttggtcca
    11581 acgacacctc agtgttcaaa agggtagcct gtaattctag cctgagtctg tcatctcact
    11641 ggatcaggtt gatttacaag atagtgaaga ctaccagact cgttggcagc atcaaggatc
    11701 tatccggaga agtggaaaga caccttcata ggtacaacag gtggatcacc ctagagaata
    11761 tcagatctag atcatcccta ctagactaca gttgcctgtg catcggatac tcctggaagc
    11821 ctgcccatgc taagactctt gtgtgatgta ttttgaaaaa aacaagatct taaatctgaa
    11881 cctctagttg tttgattgtt tttctcattt ttgttgttta tttgttaagc gt'''

a = genome_identifier(COD1, isTimine=True)