In [1]:
import Bio
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna

In [36]:
dnaseq = Seq("AGTACTAGAGCATTCTATGGAGA", generic_dna)
print(dnaseq)

AGTACTAGAGCATTCTATGGAGA


In [37]:
dnaseq.complement()

Seq('TCATGATCTCGTAAGATACCTCT', DNAAlphabet())

In [38]:
#Let's assume that translation can start from any codons - not just from START codon (Methionin). 
#So we need to generate three frameshifts for each of the complement strands.

In [39]:
seq1 = dnaseq[0:]
seq2 = dnaseq[1:]
seq3 = dnaseq[2:]
seq4 = dnaseq.complement()[0:]
seq5 = dnaseq.complement()[1:]
seq6 = dnaseq.complement()[2:]

In [40]:
proteins = []
for i in range(3):
    proteins.append(seq1[i:].translate(table=1, to_stop=True))
    proteins.append(seq2[i:].translate(table=1, to_stop=True))
    proteins.append(seq3[i:].translate(table=1, to_stop=True))
    proteins.append(seq4[i:].translate(table=1, to_stop=True))
    proteins.append(seq5[i:].translate(table=1, to_stop=True))
    proteins.append(seq6[i:].translate(table=1, to_stop=True))

In [41]:
sorted(proteins, key = lambda x: len(x))

[Seq('', ExtendedIUPACProtein()),
 Seq('', ExtendedIUPACProtein()),
 Seq('Y', ExtendedIUPACProtein()),
 Seq('S', ExtendedIUPACProtein()),
 Seq('Y', ExtendedIUPACProtein()),
 Seq('Y', ExtendedIUPACProtein()),
 Seq('MIS', ExtendedIUPACProtein()),
 Seq('MIS', ExtendedIUPACProtein()),
 Seq('MIS', ExtendedIUPACProtein()),
 Seq('TRAFYG', ExtendedIUPACProtein()),
 Seq('TRAFYG', ExtendedIUPACProtein()),
 Seq('LEHSME', ExtendedIUPACProtein()),
 Seq('DLVRYL', ExtendedIUPACProtein()),
 Seq('STRAFYG', ExtendedIUPACProtein()),
 Seq('VLEHSME', ExtendedIUPACProtein()),
 Seq('HDLVRYL', ExtendedIUPACProtein()),
 Seq('VLEHSME', ExtendedIUPACProtein()),
 Seq('HDLVRYL', ExtendedIUPACProtein())]

In [42]:
#If we need to translate just from a "proper" START codon and until STOP codon:

In [43]:
proteins = []
seqs = [seq1, seq2, seq3, seq4, seq5, seq6]
for seq in seqs:
    position = seq.find("ATG")
    proteins.append(seq[position:].translate(table=1, to_stop=True))

In [44]:
sorted(proteins, key = lambda x: len(x))

[Seq('ME', ExtendedIUPACProtein()),
 Seq('ME', ExtendedIUPACProtein()),
 Seq('ME', ExtendedIUPACProtein()),
 Seq('MIS', ExtendedIUPACProtein()),
 Seq('MIS', ExtendedIUPACProtein()),
 Seq('MIS', ExtendedIUPACProtein())]

In [None]:
#Apply the function to generate all possible translates skipping STOP codon:

In [45]:
def translate(seq, start=False):
    dnaseq = Seq(seq, generic_dna)
    seqs = [dnaseq[0:], dnaseq[1:], dnaseq[2:], dnaseq.complement()[0:], dnaseq.complement()[1:], dnaseq.complement()[2:]]
    proteins = []
    if start:
        for seq in seqs:
            position = seq.find("ATG")
            proteins.append(seq[position:].translate(table=1))
    else:
        for seq in seqs:
            for i in range(3):
                proteins.append(seq[i:].translate(table=1))
    for protein in sorted(proteins, key = lambda x: len(x)):
        print(protein)

In [46]:
translate("AGTACTAGAGCATTCTATGGAG")

Y*SILW
Y*SILW
TRAFYG
Y*SILW
TRAFYG
LEHSME
MIS*DT
MIS*DT
*SRKIP
MIS*DT
*SRKIP
DLVRYL
STRAFYG
VLEHSME
VLEHSME
S*SRKIP
HDLVRYL
HDLVRYL


In [23]:
translate("AGTACTAGAGCATTCTATGGAG", start = True)

ME
ME
ME
MIS*DT
MIS*DT
MIS*DT
