In [5]:
"""
translate rna sequence to list of amino acids.

RNA and amino acid sequences can be written down as a stream
of symbols, RNA is built from 4 symbols `UCGA`, for amino acids
there are more symbols: ACDEFGHIKLMNPQRSTVWY.

During RNA transplation such an RNA sequence is translated into
a sequence of amino acids, where each triplet (which we call "codon")
correpsonds to one single amino acid:

e.g.

    UUU -> F
    CUU -> L
    AUU -> I

The following strings implement this correspondance from codons
to amino acids: 
(note for the experts: we translate stop-codons to ".")
"""

triplets_txt = """UUU CUU AUU GUU UUC CUC AUC GUC UUA CUA AUA GUA
                  UUG CUG AUG GUG UCU CCU ACU GCU UCC CCC ACC GCC
                  UCA CCA ACA GCA UCG CCG ACG GCG UAU CAU AAU GAU
                  UAC CAC AAC GAC UAA CAA AAA GAA UAG CAG AAG GAG
                  UGU CGU AGU GGU UGC CGC AGC GGC UGA CGA AGA GGA
                  UGG CGG AGG GGG"""

aas_txt = """F L I V F L I V L L I V L L M V S P T A S P T A S P
             T A S P T A Y H N D Y H N D . Q K E . Q K E C R S G
             C R S G . R R G W R R G"""


def remove_whitespace(txt):
    # remove spaces and line breaks from string
    return txt.replace(" ", "").replace("\n", "")


def translate(rna_sequence):
    """
    uses codon table to translate rna sequence.
    example:

    UUU AUC GUU -> F I V

    spaces can be omitted.
    """

    # cleanup
    rna_sequence = remove_whitespace(rna_sequence)

    aas = ""
    for start_idx in range(0, len(rna_sequence), 3):
        triplet = rna_sequence[start_idx : start_idx + 3]
        aa = lookup_aa(triplet)
        aas += aa
    return aas


def lookup_aa(triplet):
    """finds aa symbol for given triplet.
    returns 'X' for invalid triplet"""

    # cleanup the multiline strings
    triplets = remove_whitespace(triplets_txt)
    aas = remove_whitespace(aas_txt)

    if triplet not in triplets:
        return "X"
    start_idx = triplets.index(triplet)
    return aas[start_idx // 3]

long_sequence = 20_000 * remove_whitespace(triplets_txt)
translate(long_sequence);