In [4]:
from lookup import amino_acid_to_rna_codon
from tools import read_local_file

Two useful facts in modular arithmetic:

if **a≡b mod n** and **c≡d mod n**, then **a+c≡b+d** mod n and **a×c≡b×d** mod n

Example, a = 15, b = 5, c = 11, d = 1: a % 10 = b % 10, c % 10 = d % 10 ==> a × c % 10 = b × d % 10

Thus, to calculate a1 × a2 × a3 × ... × an   % 1,000,000  we can just calculate the product of ai % 1,000,000, applying % 1,000,000 to every initial terms and after each multiplication .

In [5]:
def update_product(product, aminoacid, n):
    codons = amino_acid_to_rna_codon[aminoacid]
    return product * len(codons) % n


def solve(path):
    lines = read_local_file(path)
    protein_string = lines[0]
    print(protein_string)
    product = 1
    n = 1000000
    for aa in protein_string:
        product = update_product(product, aa, n)
    # Different stop codons were possible
    product = update_product(product, 'Stop', n)
    return product

In [6]:
# Test
solve('./txt/rosalind_mrna_test.txt')

MA


12

In [7]:
# Submission
solve('./txt/rosalind_mrna.txt')

MGMQINAGVKRNLQDTHWFNLKMRTADPDWRYCLAHRHHPDLPNTTCLWVMIQVCIDMESEVRWAPWFRQGFEFTKYLIYPVRDQTLGTGVPSSCKACKIGWHFKKGTPHYIRRVNPAGENCWTAPVRLEICLQDTYCPQDKYAPFGVHEEHDCCPNPFRDIYEGKTEINTGFNAAAKFALAIMPVTYWWQEHPRLQHTNCLKDRYVAPRGINAEDWSPWNTMINSQMKLDTIFSELNGRNYTHGPSHMSCHGGRTIMDHQHQYPDIQFYYVQRFIAPLFNHGLARMNSSDPCQLFTRKLTKRAESQMMWDYTHYKNLRWMCTSGTKRFTYYMAVYMAQWATTTVKLPSIKPSAANANGYWMNLEYPYIMDFQPNPHWQPKHTPFGWLVRPGHLNLWVFHWTEVHSYRVYYWDACMMSFWVMNQWPCHIFMWEAWELKMHEHHPRHACESKAVERYKMIKCDDESMWVHVNKIEAENATGELFLHIAGYSWWKAMRENWYKFFQIFLQWWSVQMLHFDPSDVAVIWYIPGCIFFWHAMSRDFVDSDRHKPCCMQIYRAVTLDYNEHAWMPCTNAQHNHWCKRYYFKSAKYPIGHTYLGHDSWANFSMDVCWPTWCLPNSMREPPWWISNPFVFCHKFNSMEPMSHYWNTWAIVDDRVEGPTFFSLWQKPQHGEMADHQLILIHLLAFKANQGAMHGQYWENMVEYCKNVICSMSLMHLKKFCIIPPVARCGCWYLISHPMGCLKNYKTKAEKQPGSGHDTPCYRCCNYHVEYSHSAVQPYYNVVNTRDVSMTSKSICYFGWRCFHVYEYNSYARHKLYQTRINNKYGHGMNHAPNPFVHGVCADKQLQFENSIKAILEARCWSTGILDVHGYDAPPPLFFSGLVKCNQDTMTYFLIEEAYSPHGSYEMCLVDPKRFYCMIWCFWTRSPGVFSKSHFTMVCLADMHSAVQGDPIQMEWDCRLETCSYMTRLEHWPCAKVFLQNRVVSMAAQMTENGDMSKD

724224