In [11]:
def read_lines(path):
    lines = []
    with open(path, 'r') as in_file:
        for line in in_file:
            lines.append(line.strip())
    return lines

In [18]:
def codon_to_amino_acid(codon):
    """
    Translate RNA codon into amino acid letter.
    """
    codon = codon.upper()
    table = {'UUU':'F',   'CUU':'L', 'AUU':'I', 'GUU':'V', 'UUC':'F',   'CUC':'L', 'AUC':'I', 'GUC':'V',\
             'UUA':'L',   'CUA':'L', 'AUA':'I', 'GUA':'V', 'UUG':'L',   'CUG':'L', 'AUG':'M', 'GUG':'V',\
             'UCU':'S',   'CCU':'P', 'ACU':'T', 'GCU':'A', 'UCC':'S',   'CCC':'P', 'ACC':'T', 'GCC':'A',\
             'UCA':'S',   'CCA':'P', 'ACA':'T', 'GCA':'A', 'UCG':'S',   'CCG':'P', 'ACG':'T', 'GCG':'A',\
             'UAU':'Y',   'CAU':'H', 'AAU':'N', 'GAU':'D', 'UAC':'Y',   'CAC':'H', 'AAC':'N', 'GAC':'D',\
             'UAA':'Stop','CAA':'Q', 'AAA':'K', 'GAA':'E', 'UAG':'Stop','CAG':'Q', 'AAG':'K', 'GAG':'E',\
             'UGU':'C',   'CGU':'R', 'AGU':'S', 'GGU':'G', 'UGC':'C',   'CGC':'R', 'AGC':'S', 'GGC':'G',\
             'UGA':'Stop','CGA':'R', 'AGA':'R', 'GGA':'G', 'UGG':'W',   'CGG':'R', 'AGG':'R', 'GGG':'G'\
            }
    if codon in table:
        return table[codon]
    else:
        raise Exception("Unknown codon: {0}".format(codon))

In [27]:
def translate_rna_into_protein(rna):
    """
    Translate RNA string into a proteing string.
    """
    protein = ''
    n_codons = len(rna) // 3
    for i in range(n_codons):
        start_pos, end_pos = i * 3, (i+1) * 3
        codon = rna[start_pos:end_pos]
        amino_acid = codon_to_amino_acid(codon)
        if amino_acid == 'Stop':
            break
        protein += amino_acid
    return protein        

In [28]:
# Test
string = 'AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA'
translate_rna_into_protein(string)

'MAMAPRTEINSTRING'

In [31]:
# Submission
lines = read_lines('./txt/rosalind_prot.txt')
string = lines[0]
len(string)

9144

In [38]:
protein = translate_rna_into_protein(string)
print(len(protein))
print(protein)

3047
MRCNKSYIHHHIQMQRLKRRGLPELGLVCVAAAFALYRKLASGNDVVAGSSKVYESYLASAISLKPKMRSVSGTAACRQDFCYSACWGNPIRICTAILYQSPAQDGVDLLCQVVERYTTRILRRRDTFLGWYRQGGGTNDAVRCDSDLKYSYLAHVEFRLEYIKPYLQSVFVCAAVKLDGEISQPNFISCSDSSLTRVISCRIRPLYNPSREQSKGYFTDKALPIIGVVKGVLPQTVYFPSLVSKESLMLQDLEGIYKVDTPNNSQAVETGNHTQREFRLYYCECWFRIIVIAAIPSFTEGGSPLLYRPAGAASNRQCHAVISKSPNEVSSDFITAPRVFDWVGYDTNLLPGTKAIRLEGIGVKTHAEAYSPKPNPIKTLILIEYGDPHVFSRYPSTRRGFRPDAIQACFVVKYLPGIKFGLVPEFDRTDLSQVIIRATNSNKLPIYSTPTIAMRCSHPKLQRNKYNMRSDCILDKCIEFLGLWVLTARRRVYLDNGFVTCRRFQAFIYPLMCPISQTLHSATRQRQCQQLVACLPEQYSKLYGYVGYKRLNRLRAVRPSKREDRLVGDIALGRCEAVQNSEILGQSHPSPAEYRNISGAVLGQSFVLINNRRTVTLSGHCNVSQSTAGEYASRKFAPVRLIRTHPPHPELVSMRTTPGFLSMRSGRYTQKLSSTTCDLVRILPLSFGVSMPAMALRPANYGTPGAAMTDQTRSSDLQILSYFWLNSIPENFAIQWAPSGHIRGNQIGSTIIFRFFTGGRTEGLLLPNITYVQQQRDEVVGVCRYYYFGLLNPLEIYTTPVLKQVCAPRACVRTHLGNAYLRATRLVDVCPCSLSIVSNADVTNRAHWLATPSIAINPATYRAKCSLICENGYTRAGLPPDAYSAVELFVEFQVYLLGRFTHNCVGVRNLGFRNLVAPSMMCGSAQRGGIAASCTSMDPGHYPLTQTGVYFLKRRYNRNRARPSRSFVSILQSIWMVVISSRTTYVTIGRHEIGI