# Problem

The 20 commonly occurring amino acids are abbreviated by using 20 letters from the English alphabet (all letters except for B, J, O, U, X, and Z). Protein strings are constructed from these 20 symbols. Henceforth, the term genetic string will incorporate protein strings along with DNA strings and RNA strings.

The RNA codon table dictates the details regarding the encoding of specific codons into the amino acid alphabet.

**Given:** An RNA string *s* corresponding to a strand of mRNA (of length at most 10 kbp).

**Return:** The protein string encoded by *s*.


In [19]:
import pandas as pd

In [26]:
def rnaToCodon(rna):
    codon = ''
    count = 0
    codons = []
    for val in rna:
        codon += val
        count = count + 1
        if count == 3:
            codons.append(codon)
            codon = ''
            count = 0
    return codons

In [27]:
def translateCodons(rna):
    result = ''
    codonTable = pd.read_csv('codon_table.csv')
    for triplet in rna:
        translation = codonTable.loc[codonTable.rna == triplet].amino_acid.values[0]
        if translation != 'Stop':
            result += translation
    return result

In [28]:
rna = "AUGAGCGAAUCCACUGUUGUCGCUGUAGCCGAGAGAAGCCGUUGUUGUAGGGAAAUAAGGGUAGCAACGUACGAUUUACGGUCCGCGUAUGUCCCCAGCAUCCUGUUUCCGAGAAAUGUAGUUUGCUCGUGGCUCAGCGAACCGCGGAUUUCGCGUAGAUUACGGACCCACCAUAGCCCAGUUCAAUGGGGGGGGAUUUGUGUCAAAGUAACCUUACGCUACGGCUUCAAUGUCAGUUUAUGUUUACAAAGGCUUAAGACUUUCUAUGCUGGAGGGCGUACUGGGAGCGAGAAGGCCUCAUCCUUGUCUGCUACUUUUCUACCGCCCGUUGCGGGAGAGGGUCAACCGCGGAGGGUAGUCCUAACCCUAAGAGGUCCCAGGGAAGGUGCUCCCCUCAGACGCGUAAGCUUAUCAGGAAUGGGGCAAAGUCACGUGAACGUGCCGGCUGCUUCUAUGAUUUCGCAGGUGUCCCGGUGCCACCAUGCCAGCUUACGGCCGAUCAGUACUUCCGCUUUUCCAAUGAAGGUGGGGGUACUUUCCAAGUCAGUCCACGACCGAGCCGAUAUCAAAGGCAUGUCACCCGAAGUUAAAGUUUCGACUUGCGUACGUGGACCACCUCCAAACAGCCCAAAGAGUAUGUCCAUCCUCGGACGCAUGAGUCCAAUCUAUCGAUGGGCCCCAUGGUUAGGAAGUCGCGUGGAUGCAGGUCAUUUCGGAUUAGUCCCUCGGGGAUCGAAGUGGUUCCAGACUCCUGCUCAACCGAGGUGUGUUAAGCUCGCCGCUUCAUAUAAUAAUUCCCAAUCUCGCAGCAUGUUGCUAGCACAUGUUUCCCCGCCUCCGGGCGUAUCACAGGACUAUAAAGUAGUUAUCCUAUCAUGUGACUCUCUUAUGCUACUUUCUUCAGCUUUAGCGGCGGUUGCCUACCAGCCAGGGGGAAUAUCACUUCCGAUGCCUCUUUGCCCAAAUAGUUAUGCGUUUACUCUUUAUGCCGUUUCUCCAGUUAAGGACGCGAAACCAGAGAAACAAACGCCACCCGCGGAGAGUAUGACAUCGUUUCACAUACACCUCCGCAGGAUAAUCUUGGGAAGACCUUUGCGAUCUAGCGGGUCAGCUGACCAUACCCGGGUUUUGGAAUCUCCCGACCGACCCUUUCCGAGAAAUACUCCGUCGAAUACUUGCAGUUCCCAGCGCUGUAGGCUCAUACCGGGACACGCCUUGUUUUCGGCGGUUACAGGAGUUCAUGCUUGCUACCUUACCUCCGUUUAUGAUACGCUGAGCACCCAAAGCAGCUUCUUCAAAUGUUUGGCACGUCCCACUCAGAUAAAAGAAAUUCGUCUCAGCGCACACGAAAACCACGCUUUUAGUUUAGGUGUCGCAAGUGCACGCCAAGCAAGUGAGUUAUUGUGCUGCGUAUGCGACAAUCUUGAUAGUGAUGUGGAGUACCGCACGCAGACCUGGGACUCCAACGCGCGGCGAAAAAGCAUAAAAUCCUAUACCGUCGUGUACGUGACCAGUUCAAGCGUGACGACAAUCGGAGCCCGUUGCCGGACGGAGCUACUGCCCUGCGAGAGCCUCUCCACCCGAUAUCAAGUUACGCGACGCGCAGGGCUAGUAAACAUGCAUGUGACUUGGAGGGCAACGUCGGGAGCCUUAUCGCUGACAUACCCUUAUACGAAAAGCUCAACCGCUUCCCAGCAAAAAUUCAUCAGCCCAUUAUGUAGGAGCCGCGGCCAUCACAGGACGUGGCGGGGCGCGGCUCUUCAUCUCAACUUCGGGUGUUACUGCUAUACGCCGGGUUACGCAGUUAGGCCCCGCCAGCGCCACAUGCGUAUCAAUCUGGUUGUUUCCAGCUGUGACUGGCACAGCCAAGUUAUAACUUCAAUGUCGCGCUAUAACCAGCAGUCAAGCUCGGAUGUUAGUGAUAGAACCCCGACGUUAGCUUCCUCGUUUCCGCCAUGCUGGUGCACUUUAGAUGAUUACCGGCACCGGACCGUCUAUGCUAGGGGCGGAUUGAAGGCUAUAGGCCUCAUCAGCUUGAUCAACAAAAAUCAAAUCAAUACACAUUUCCGAUUACGUUGCAAAAGGAUCAUAUCACGCCGGCGCAAACCUCUAGGCCUCAAACACCGACUCUUACCUUCGUUCUUUUGCUGCAUCUUUCGUCUUCUGUUUCGAGAAAACAGGUCUAGCAAUGACUAUUUCCGGAUGCUGUCAAGACAUAAUUCAUCUACGCAAAUAGGACAGGUCCCGGAAGAGGUCGAUAAUCAGCCUAUGCGGCCGGCUGCGCCGUUUUCGCUGAUCGUCAAUCUGUGCAGAAGUGAUCAUCUAAUUGAGCCAAUACGCGUCUUAUAUUCUGAACUCUGUCGGACCUACUGCGCGUGGCGCUUCCUUCCCUCUACUAAAGUCUUCUGUAACGUAAAAUAUGCUCGUAUGUGGGAUCCGCCGGUCACGACCAGCAUGGACAGAAAGGAAACAAGGUCUCAUCCGAAGCGCAAGGUGGUCGAAUUCUAUGUUAUAUUAGGCGACGGUUACACGCGUUAUACCCCUUCAGGAGUUAAUUCGCAGAUUGAAGAUAACGCCCCGUGUCUCGUUCGACGUUCUAACACUGAGAUUCCUACUGAGCAUGCGGGCUCCCCACGUUUAUCACAGUACCACCCAAUAGAAAAGCAAGAAAGCAUACGUGCGCGUCCUCAGCCUCCAACGGCUGGGGUAUACGGCUUACAGCACCUGCGUGCUUAUGGGCACCCGGACCCCUCUCAGGACCGACGCGUUAUAAACUUUCCGGUCCGCUUGCUACCUCCCGAGAACACCGGUUCCAGAAUAAUCAUCAAUACCUUUAGAUUAAAGUUCGUAGGCAGGGGACUCGGAUUGGGCAAUCAUCUGGUAUAUUCGAGAUCUCACACGACAGAAUACCACUCAGGAGAUCUAUUGGUAUUGGAAACCGCAUGCAGAAUGCCAAAAAACAGGCCGAGCAGCCAUGUCGAGACAUAUUUGCCUCAUGUUCCGAGCCUGCGUCGACCAGGAAUGGAUUGGCAACAAGUAUUCAACAGCAUAGCGAAGAAAAUCCUGCAUAUACUUCUCCGCGCGACCUAUCUUUACGAUAUGCACUCCGCUACAAUUACAUUCCUCAUUGAACGUGGGGGGUACUGGGUGUGUAGUAACCGCCCCCCAUUUAGGGCAACACAUCGUACGCCCUUUAUCAAUAAUUCACACACCGGACCCCUAACGCCCAUCCCAACUGGUAAGGUAGAGGCGAUGUUGAACCUUCAGACUAUACAGUCUUGUGUCCUACCGCUUUUGCAUAGUCUCGACAUCCCCCUCGUCCAGCGCCACUCUUGGAUAGAGUCCCUGGAGAUGACUCCAUGGGCCCGGAUCUAUAUUCAGUCUCCAAGCGCGAGGUUGCGCGAAUGCAAUCGAAACUUUCAGUAUGCUGGCACCCCGUCACGCGAGAUUGCGGAGAUGAGGAUCUCCUAUUGCGACGGACACUCCAGUAUUAACCCCAGAAUUAUGCUUCAGUACGGGCGUGCACGCUCGCCUCUUGGUAAGCCUGGUCUAUCGGGGCAAUGUCGACCUUUCGCUUGGUCCGAAGAAGUCCAAAGGGUUUAUCGAGUAAUGUCCCCAGUGUCACCCUCGGCGGAGGCCUUGAAAUCAACUGACGUAGAGUAUAUUGUUAUGUCCACAGUCCGGGCGCACUACGCUAGGCAUGACCAGCUGCAAGAUGAAUCCCUAUGCGCUGUUAACUUACAGUUUGGGGGAGGAGGAAGGUGUGCUCUUAUACGCAACAUGGCAAAAAGGACCUGUCGGCUGGUGAAACGCCAUAGAUACUCUUUUCUGGCGAAUAACCAUGCCGGGGAUUUCCUGACUUCCGUGAUCCGGGGCGCAACGUUCGCGUCAAGGCCUAGUGCCGGAGGGUGCAUGAGAACACCAGACGAUAGACAUAUAGGUUCUAGGUUUCUCCGUGCAUGCUUUGGAGCUCAGCCCUCUCACCAGUCAGGCGCAGUGAGAAGGGACAUGAUAAUUUAUACAGCUGUCACUUCACACAUGCAUGGGCCCAAGCUUGAGAUCGGUCGGCCCAAGCUUACCCGCAUUACCAACCCGCUUCACGGCGCCGGUUCGAUUAAAUGGAUAGACUCAGGAUCUACCUCCCAAUUUAGAGUGGAACCCCAUUACCGAUGGAAAUGCCCCAGACGGGAGGACUUAUUUGCGUUCGGACGUCGAUUUACCCCCCGAGUCGGGGUAAAAUUUAAGGAGGGAGGUCUUUACCGAGGCGUAAAUUACCCCCAUCUUCGCGUCACCGGGACCGACAUUGGAUGCGGGUCUAUAAACAAGCGAGUACUACAAAUGGUGAACCUCCCGGGCCCAAAUACCUUUUAUCCUGACAAACGGCCCAUGCCAAAAACCGCCGCCUUGAGGAUGGGAAUUAUCUUAGCACAGCUCAGUCGCUUACAUCGCGUUGUAAGCCCAGUAUACGCCGCAAGGGACAGUACGGAGUGCUCCAGAGAUAAUACAUCUGCGCUACGCGAUUACGCCGUGGACGUCUCCCCACGCGCGAAGACGUCUCCGAUGGGUAUUAACUCUAUGACCCUCUUGUUGGGGGUCACACACGUCAUGCUAUAUGCCAUUCAAGAAUACAAGCGGUGGAUGCGGUCGGAGCUACUGAAAACCCCGCGUGAAUUACGGUGUGGCCUGCGACGAAAUCUGACAGUUUCUGCCACGUUCUGGGAGGCUCCAAAUUCCAGGGGGUCUAUUGGGUUACAGACAUUCCCACGAAUAGCCUCCAAACGAUGGUUUCAGGACCUGGGCAGGAAGGUGAUCUACUCCCCAUGCGAGUCCCACCCUGCAGUGACUGGGUUUACUCGCAUAUCAAACGUAAUGAAGUUUAUUCGCGCUAGACCGCAGUUUUUGACGGGGCCGAAUGUUACUCUGUGCCGCCAGGCCGGAGACACUACUCCUCGCUGGGAGGCGGUAUUCGACCCCCCAUUGACCGCGCAGAUACGUCUGAGCUAUCCCAGAAGGUUGUAUUACAACCGCGCGUUUUACACUGGCGGCCCAGUCGCUCCGCGGUGGAAAUCUAAAGUGUGGGUAAUCGCAAGACGGAACUGUCGAAAUCGAUUUGGCAGAUUCGGAGAACACAUUAACCCAAGCAUAAACGUCAAGGAGACACUGGCUGACAAUAGCGCGGGCUACCUCAUGAAUUCAGUGCCGAUAACGGACCGCCCGAACGCUUUAUUUAUCAAUUUUAAAACCAUUCCGGACACCUAUGGUGGGAGUGUGAAAAGAGAGAAGUGGGCAGUACUUGGAGUGGUAUGCCUCCUGGCUUUUUCUAAACAAGCAAUAAUCUGCUACCUACAACGGUCAGUUAAUUCUUACAGAUUCUUCCAUAGUAAAGAGGUCAGUUAUCUUGCGCAAGACGCGCGGUGGCUUUUUGCCGAACUUUGGUCCCUUGUGAAGGGACCUAACAUUGACUUUUCCAUUGACAAACCACCCGAUCAAUCAGGAUUACCCAACUGCAAGCUGACAAUGGUGAUUCAGCAGCUCCGUUCAGGCAGUACGACGAGGCUCACUAAAACCAGACACGCAAGACUGAAUUGGCUAACGUCUAGCCUAAAGUCUGAGCCCCAGCAGUUGUUGGUUUCCACAGUAGCGCUAAAUCGAAUAAGAUACUGCCAGACCCCCAGGAUGUAUCUCCAACCGUCAGGAUAUCCUUGGUCCCAAUGUCAGCGUUCCUGUUCAUCCCGCCUCUUUAUUGGGUGGCCACACAGCUCAGUGAAAGAUCUCACUGGAGCGAGGUUUCACGCAGAUACCCCCCACGUCGCUGUUAAAAUGCUCGAUGGCCGUAUACUAGUUGACUUCUAUGAGUCCGGGAUAAGCGUUUGGAAAACCUGCACGACAUCUCAGGAACCGAAGGCAGUGAGACACCGAAAGUGGGACUUCCUAGAUAGGUUUUUCGGUCUUGACUGGGGAGCGUUGUGCGAAGUGAUGCCCCGGUGCGGGGAAUUCGUCAGAAAAUCCGAAUUGCGUCCUACAGCACUCUACAAUUAUCGAUCCGCCCAUAUACCCGAUUUGACUUAUGAUAAAAUACAGCCAGUUCGGAACCUAUCGCGGGUGAGGUGUUCAGCACAAGAACCUGCGCGACGGGACAAUGACAAAAGCGCUUUAACCGUGUUUCAUGCUAUGACGAUAGCCGACAAUCCGGACUCACAUACUUUGACAUAUCUAAAUACGAGCUACAACAAAUUUGUAUUGCUUGAGGCACAUUUCUUCAAGCAUCGCGGCAGAAAAAGAAUAAUUUGGCAAAUAGUCCAAGCUACUACAAUCCGAGACCUCCACGUCGACCGAGACGGAGUACAAUAUCUGUCUUUCGAGGAUUUUAGACGUCGUGCUAAUUGUCGUGGACCCGAAAAAGACCGGAAUGCUUCAACCUCCGAAUCCACGGAUUACAGGCCGGGCGUUGUCCCCGGUGGAGGAAGAGACUUAACUUUCGCUACUUCAGAGCCACUGCAGAUACCUAUCGUUACAUCGGAAGACACUGCACGGUAUCACACCACGUCAUCCCGAGCUUCGUCGUCGAUCGGCGAGAAGUCCAUGUUUGCUGACCGUAUCCCUAGGCUCGCGAGGCAUAGUAUUAUGGCCGGUUAUGCCGGCUCCCCAAAGCCAGUAUUGGUUGUGUCCACACAUCAUAACGCUGUCAGUUCGGACGUACGUGUUUUAAGUCGCAUCGUGGUGAUGCGCCUUUUAGGUUACGAUGCCCAUCUACGUGGGGCAGCGUGUGUUCUAAGGGAAAUGCCUAGCCUAAGUGCUGCCCGCCGUGACAUGGCACGGGGCGGAGUUCACCAACUCAGUAACACACAUACAACCUUUGGAGUUUGGUUCCUGGGGGCUACUUUUCGGCCCUACUGUGGAUGGCAAGAUGUAAUUGUUGGAUCGAAGAUUUCGUCUCUGCGCAACACCCAAGCAUUCCGGAUAAGUGAGAUUAAAGUGACCACUAAAUAUCUUGUCGCUUCUGUCACAUAUAUAGAGGACAGCGCCAAUUUAGCCAACAUCACGGACCGUUAUGAGGACAAUGCACAACAUGCUACGACCAAAUGCGACCACAUCUUUGAUAAUCCCUGUAACGGCCAACGUCGUUACGCUACUUGGCCGCCAUCACCUGCUGCGAUCUCCGCUACCGUUCUGUUGCGGACGUCUUCCGUCGCAUCCAACGCAGGUGCGAGUCCCAAAUGGUCGAUACGACUGCCCUGUCUACCUCAGGCAUACUUGGGAUGUCCCAAGUUCUCGGCCGGGGGGACAUCAGGGCGAUUUGUUCUCUGUCGAUGCAAUAUUCGCAAUCCCAGGGGUGCGGGAAAGUUAACCGCAGGGAGGGCGUUUUGUCUCGAGAUACGGAGUCCGGCAGCUCUCCCGUGCUGGUGCAAACUUUCUUGUUCAUUAGAAUGCCAACUCGUCAAAAGUCGAUGUAUGACUCCAUUGCGAAUCCGUAAUUCAACGGAGGGUCUCGGCAAGCGCCUACGGGAUUUUGGGCCAGGGAGUAUUGCGCCGCUGAGUUAUCCGGGUGCACUCAGACCACCCCAUUCAGUUUAUCAAUUAUGGUAUCUUACAAGCAGGUUUCUUAUCACCAUUCGGGUAUCUCAGGGUAAUAUGAUGAAAUUCAACGUGAAGAGAUGUGGGGUGUCGCCUUUGCCAGCUUCCAAUUAUUUCCACCCCUUAACAUCCAAUCGCGCGGCGGUUAGUGGGGGUUAUCUGACGUCUACGUUGAGGUCUGGAUGGUGCGAGACGUUACAUUCGAAUUCCCUAGCAUUUUCUGUACGGCUAGUGAAACUUGCUGGUGGUUGUGGAAUCAAUAUCACUCGAGUACCCUGUCGCAACGGGGGAAUGCUGGAAAUGGGAUCACAAGCGCGCCGACUUGCAUGGAUAAUAGACGUAACACGACACGCGCCUCCAAAAGGCCUCUACGAAUAUGGUGGAGCUCUCAAGGACCUUCGUCAGUCGCGGAUCGUGCACAAAUUAGUCUCAUCAGUAAAAUUCUCGGACUUACCUGAUAUAGCCGGAUGGCUUUUCAAUUACCCAUUCAUGCUGAGAGUAUACAAUCGGGUGGUUCAAUGGCAGGUGCCCAUAUUCCCUCGAAAGGACCCCGAAUGGAUGGAUUACAAAUCGUAUCUCACCGAGAGAGCUGUAAUGAAUAUAGGCCACGUGAUAUUGGUGUGUCGAUUAUGCAAUGACAGGUUGACCGUAUCGGGUGGCACUAACAAGAGUACCGUCAGCGAUAGUGCGUCUUUAUCCCAGUAUCACAAAUGUUAUCGGAGGGUUCCGAACAGGGAAUUGAGGGCAUUUCUACCACGCCGAGCUCUACACGAGCCGUCCCGAUAUAAUUGGCUUCUCCGGGAUCGUUUAGGAGCUCGGUCCGCAGUAUAUACAGGAUUUGCCCGAGCUGCUAAAGCUAGCAUAUCAACUAGGGACGUACUGUUGUUGGAGCGAACAGGCACACGUGAAGACUAA"
translateCodons(rnaToCodon(rna))

'MSESTVVAVAERSRCCREIRVATYDLRSAYVPSILFPRNVVCSWLSEPRISRRLRTHHSPVQWGGICVKVTLRYGFNVSLCLQRLKTFYAGGRTGSEKASSLSATFLPPVAGEGQPRRVVLTLRGPREGAPLRRVSLSGMGQSHVNVPAASMISQVSRCHHASLRPISTSAFPMKVGVLSKSVHDRADIKGMSPEVKVSTCVRGPPPNSPKSMSILGRMSPIYRWAPWLGSRVDAGHFGLVPRGSKWFQTPAQPRCVKLAASYNNSQSRSMLLAHVSPPPGVSQDYKVVILSCDSLMLLSSALAAVAYQPGGISLPMPLCPNSYAFTLYAVSPVKDAKPEKQTPPAESMTSFHIHLRRIILGRPLRSSGSADHTRVLESPDRPFPRNTPSNTCSSQRCRLIPGHALFSAVTGVHACYLTSVYDTLSTQSSFFKCLARPTQIKEIRLSAHENHAFSLGVASARQASELLCCVCDNLDSDVEYRTQTWDSNARRKSIKSYTVVYVTSSSVTTIGARCRTELLPCESLSTRYQVTRRAGLVNMHVTWRATSGALSLTYPYTKSSTASQQKFISPLCRSRGHHRTWRGAALHLNFGCYCYTPGYAVRPRQRHMRINLVVSSCDWHSQVITSMSRYNQQSSSDVSDRTPTLASSFPPCWCTLDDYRHRTVYARGGLKAIGLISLINKNQINTHFRLRCKRIISRRRKPLGLKHRLLPSFFCCIFRLLFRENRSSNDYFRMLSRHNSSTQIGQVPEEVDNQPMRPAAPFSLIVNLCRSDHLIEPIRVLYSELCRTYCAWRFLPSTKVFCNVKYARMWDPPVTTSMDRKETRSHPKRKVVEFYVILGDGYTRYTPSGVNSQIEDNAPCLVRRSNTEIPTEHAGSPRLSQYHPIEKQESIRARPQPPTAGVYGLQHLRAYGHPDPSQDRRVINFPVRLLPPENTGSRIIINTFRLKFVGRGLGLGNHLVYSRSHTTEYHSGDLLVLETACRMPKNRPSSHVETYLPH