In [6]:
"""
Unfortunately, although any RNA string can be translated into a unique protein string, 
reversing the process yields a huge number of possible RNA strings from a single protein string 
because most amino acids correspond to multiple RNA codons (see the RNA Codon Table).

Because of memory considerations, most data formats that are built into languages have upper bounds on how 
large an integer can be: in some versions of Python, an "int" variable may be required to be no 
larger than 231−1, or 2,147,483,647. As a result, to deal with very large numbers in Rosalind, 
we need to devise a system that allows us to manipulate large numbers without 
actually having to store large numbers.

"""

with open('sequences/reversing_trans.txt', 'r') as f:
    lines = f.readlines()
    
sequence = ''
for line in lines:
    sequence = sequence + line


In [7]:
codon_table = {
    'R':['CGU','CGC','CGA','CGG','AGA','AGG',],
    'V':['GUU','GUC','GUA','GUG',],
    'T':['ACU','ACC','ACA','ACG',],
    'A':['GCU','GCC','GCA','GCG',],
    'G':['GGU','GGC','GGA','GGG',],
    'S':['UCU','UCC','UCA','UCG','AGU','AGC',],
    'P':['CCU','CCC','CCA','CCG',],
    'F':['UUU','UUC',],
    'L':['UUA','UUG','CUU','CUC','CUA','CUG'],
    'Y':['UAU','UAC',],
    '*':['UAA','UAG','UGA',],
    'C':['UGU','UGC',],
    'W':['UGG',],
    'I':['AUU','AUC','AUA',],
    'M':['AUG',],
    'H':['CAU','CAC',],
    'N':['AAU','AAC',],
    'D':['GAU','GAC',],
    'Q':['CAA','CAG',],
    'K':['AAA','AAG',],
    'E':['GAA','GAG',],
}

def number_of_rna_strings(sequence):
    # Every sequence may have a stop codon at the end, which is encoded by 3 codons, thus just making sure 
    # we multiply from 3
    
    all_combinations = 3
    for aa in sequence:
        different_combinations = len(codon_table.get(aa))
        all_combinations *= different_combinations
        
    return all_combinations % 1000000

print(number_of_rna_strings(sequence))
        
        
    

12
