https://rosalind.info/problems/ba4b/

class PeptideEncoding : find out all substrings in dna encoding with a peptide sequence<br>
- init(): input a dna string and a amino acid string and initialize instance variables
- reverse_complement(): reverse complement of dna sequence
- kmers(): calculate k-mers in a dna sequence
- translate(): find out all substrings encoded with peptide

In [4]:
class PeptideEncoding:
    def __init__(self, dna, peptide):
        # a dna string
        self.dna = dna
        # a amino acid string
        self.peptide = peptide
        # length of substrings in dna
        self.n = len(peptide)*3
        # codon table to translate codons into proteins
        self.codonTable = {'TTT': 'F', 'CTT': 'L', 'ATT': 'I', 'GTT': 'V',
                          'TTC': 'F', 'CTC': 'L', 'ATC': 'I', 'GTC': 'V',
                          'TTA': 'L', 'CTA': 'L', 'ATA': 'I', 'GTA': 'V',
                          'TTG': 'L', 'CTG': 'L', 'ATG': 'M', 'GTG': 'V',
                          'TCT': 'S', 'CCT': 'P', 'ACT': 'T', 'GCT': 'A',
                          'TCC': 'S', 'CCC': 'P', 'ACC': 'T', 'GCC': 'A',
                          'TCA': 'S', 'CCA': 'P', 'ACA': 'T', 'GCA': 'A',
                          'TCG': 'S', 'CCG': 'P', 'ACG': 'T', 'GCG': 'A',
                          'TAT': 'Y', 'CAT': 'H', 'AAT': 'N', 'GAT': 'D',
                          'TAC': 'Y', 'CAC': 'H', 'AAC': 'N', 'GAC': 'D',
                          'TAA': '*', 'CAA': 'Q', 'AAA': 'K', 'GAA': 'E',
                          'TAG': '*', 'CAG': 'Q', 'AAG': 'K', 'GAG': 'E',
                          'TGT': 'C', 'CGT': 'R', 'AGT': 'S', 'GGT': 'G',
                          'TGC': 'C', 'CGC': 'R', 'AGC': 'S', 'GGC': 'G',
                          'TGA': '*', 'CGA': 'R', 'AGA': 'R', 'GGA': 'G',
                          'TGG': 'W', 'CGG': 'R', 'AGG': 'R', 'GGG': 'G'}
        
    def reverse_complement(self, dna):
        complement_pair = {'A':'T', 'C':'G', 'G':'C', 'T':'A'}
        # reverse complement of a dna
        return "".join([complement_pair[base] for base in dna])[::-1]
    
    def kmers(self, seq, k):
        # all k-mers in dna
        return [seq[i:i+k] for i in range(len(seq)-k+1)]

    def translate(self):
        substrs = []  # to store all substrings
        # iterate over all k-mers of dna
        for kmer in self.kmers(self.dna, self.n):
            # search peptide in forward kmer strand
            protein = "".join([self.codonTable[kmer[i:i+3]] for i in range(0, self.n-2, 3)])
            if protein == self.peptide:
                substrs.append(kmer)
                
            # search peptide in reverse kmer strand
            kmer = self.reverse_complement(kmer)    
            protein = "".join([self.codonTable[kmer[i:i+3]] for i in range(0, self.n-2, 3)])
            if protein == self.peptide:
                substrs.append(self.reverse_complement(kmer))

        return substrs
    
if __name__ == "__main__":
    # input filename and read it
    filename = input("Enter Filename: ")
    with open(filename, "r") as f:
        dna, peptide = f.read().strip().split("\n")
        
    # create object for PeptideEncoding class
    pepenc = PeptideEncoding(dna, peptide)
    # find out all substrings encoded peptide
    substrs = pepenc.translate()
    # write result to a file object
    with open(filename+".out", "w") as ofile:
        print("\n".join(substrs), file=ofile)

Enter Filename: rosalind_ba4b.txt
