In [1]:
match = {
    'A': 'T',
    'T': 'A',
    'G': 'C',
    'C': 'G',
    'R': 'Y',
    'Y': 'R',
    'M': 'K',
    'K': 'M',
    'S': 'S',
    'W': 'W',
    'H': 'D',
    'D': 'H',
    'B': 'V',
    'V': 'B',
    'N': 'N'
}

mixed_bases = {'A':['A'],
               'T':['T'],
               'G':['G'],
               'C':['C'],
               'R':['A','G'],
               'Y':['C','T'],
               'M':['A','C'],
               'K':['G','T'],
               'S':['G','C'],
               'W':['A','T'],
               'H':['A','C','T'],
               'B':['G','C','T'],
               'V':['A','C','G'],
               'D':['A','G','T'],
               'N':['A','C','G','T']}

codon = { 
        'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M', 
        'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', 
        'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K', 
        'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',                  
        'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 
        'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 
        'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q', 
        'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R', 
        'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 
        'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A', 
        'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', 
        'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 
        'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 
        'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L', 
        'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*', 
        'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W', 
    }

In [2]:
def translate(seq):
    
    l = len(seq)
    assert( l % 3 == 0)
    splited_seq = [seq[ii:ii+3] for ii in range(0,l,3)]
    
    results = []
    for c in splited_seq:
        aas = set()
        for x in mixed_bases[c[0]]:
            for y in mixed_bases[c[1]]:
                for z in mixed_bases[c[2]]:
                    aas.add(codon[x+y+z])
        results.append(aas)
        
    for resi,aas in enumerate(results, start=1):
        print("{:<10d}{:20s}{:20s}".format(resi, splited_seq[resi-1], ''.join(aas)) )
        
def translate2(seq):
    
    l = len(seq)
    assert( l % 3 == 0)
    splited_seq = [seq[ii:ii+3] for ii in range(0,l,3)]
    
    results = []
    for c in splited_seq:
        aas = set()
        for x in mixed_bases[c[0]]:
            for y in mixed_bases[c[1]]:
                for z in mixed_bases[c[2]]:
                    aas.add(codon[x+y+z])
        results.append(aas)
        
    to_print = ""
    for resi,aas in enumerate(results, start=1):
        if ( len(aas) == 1):
            to_print += "".join(aas)
        else:
            to_print += "[" + ''.join(aas) + "]"
            
    print(to_print)
        
    #return results

def reverse_complementary(seq):
    r = [match[ii] for ii in seq][::-1]
    return ''.join(r)
        
    #return results

def reverse_complementary(seq):
    r = [match[ii] for ii in seq][::-1]
    return ''.join(r)

In [9]:

# Enter your original unpadded DNA here. Ensure that the output below matches your protein sequence
# This sequence comes from step 7


seq = 'GCGCGCATTGAAAATGCGCTGGCGGAAGAACTGAGCGCGGAATTTGCGCGCGAAGAAGAACTGCTGGAACTGCATCGCCTGATTGAAAATTTTCTGGAAGAAGCGAAACGCGTGAAACTGACCAAAGCGCTGCAGGGCAATGGCGATCCGCGCCTGGAACTGGCGGCG'
translate(seq)

1         GCG                 A                   
2         CGC                 R                   
3         ATT                 I                   
4         GAA                 E                   
5         AAT                 N                   
6         GCG                 A                   
7         CTG                 L                   
8         GCG                 A                   
9         GAA                 E                   
10        GAA                 E                   
11        CTG                 L                   
12        AGC                 S                   
13        GCG                 A                   
14        GAA                 E                   
15        TTT                 F                   
16        GCG                 A                   
17        CGC                 R                   
18        GAA                 E                   
19        GAA                 E                   
20        GAA                 E

In [10]:
s = seq
s_splited = [s[ii:ii+3] for ii in range(0,len(s),3)]

# You may need to adjust this path. You made this file in step 6

with open("degenerate_codons.list") as f:
    lines = [line.strip() for line in f]
    
d = {}
for line in lines:
    line = line.strip()
    if (len(line) == 0 ):
        continue
    temp = line.strip().split()
    d[int(temp[0])] = temp[1]
    
for k,v in d.items():
    s_splited[k-1] = v
    


In [11]:
s2 = ''.join(s_splited)
translate(s2)

1         KVM                 AYEDSC*G            
2         CVA                 RQP                 
3         ATT                 I                   
4         GAA                 E                   
5         AAT                 N                   
6         GCG                 A                   
7         CTG                 L                   
8         GCG                 A                   
9         GAA                 E                   
10        DHA                 TASEI*KLV           
11        CWA                 QL                  
12        AGC                 S                   
13        GCG                 A                   
14        GAA                 E                   
15        TTT                 F                   
16        GCG                 A                   
17        CGC                 R                   
18        GAA                 E                   
19        GAA                 E                   
20        GAA                 E

In [12]:
print("This is your degenerate codon DNA sequence")
print("")
print(s2)
print("")
print("#######################################################################")

This is your degenerate codon DNA sequence

KVMCVAATTGAAAATGCGCTGGCGGAADHACWAAGCGCGGAATTTGCGCGCGAAGAAGAACTGCTGGAACTGCATCGCCTGATTGAAAATTTTCTGGAAGAAGCGAAACGCGTGAAACTGACCMAAGCGKYACAGRRCAATGGCGATCCGCGCCTGGAASYAGCGGCG

#######################################################################


In [13]:
translate2(s2.upper())

[AYEDSC*G][RQP]IENALAE[TASEI*KLV][QL]SAEFAREEELLELHRLIENFLEEAKRVKLT[KQ]A[SAVL]Q[GNSD]NGDPRLE[AVLP]AA


In [14]:
my_final_sequence = "KVMCVAATTGAAAATGCGCTGGCGGAADHACWAAGCGCGGAATTTGCGCGCGAAGAAGAACTGCTGGAACTGCATCGCCTGATTGAAAATTTTCTGGAAGAAGCGAAACGCGTGAAACTGACCMAAGCGKYACAGRRCAATGGCGATCCGCGCCTGGAASYAGCGGCG"

translate2(my_final_sequence)

[AYEDSC*G][RQP]IENALAE[TASEI*KLV][QL]SAEFAREEELLELHRLIENFLEEAKRVKLT[KQ]A[SAVL]Q[GNSD]NGDPRLE[AVLP]AA
