## Problem
Either strand of a DNA double helix can serve as the coding strand for RNA transcription. Hence, a given DNA string implies six total reading frames, or ways in which the same region of DNA can be translated into amino acids: three reading frames result from reading the string itself, whereas three more result from reading its reverse complement.

An open reading frame (ORF) is one which starts from the start codon and ends by stop codon, without any other stop codons in between. Thus, a candidate protein string is derived by translating an open reading frame into amino acids until a stop codon is reached.

**Given**: A DNA string s
 of length at most 1 kbp in FASTA format.

**Return**: Every distinct candidate protein string that can be translated from ORFs of s
. Strings can be returned in any order.

### Sample Dataset

**>Rosalind_99 AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG**

### Sample Output
MLLGSFRLIPKETLIQVAGSSPCNLS

M

MGMTPRLGLESLLE

MTPRLGLESLLE

In [16]:
sequence = ">Rosalind_99 AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG"

def translate_rna_to_protein(rna):
    """Translates an RNA sequence into a protein sequence."""
    codon_table = {
        'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',
        'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
        'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M',
        'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
        'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',
        'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
        'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
        'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
        'UAU': 'Y', 'UAC': 'Y', 'UAA': '*', 'UAG': '*',
        'CAU': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
        'AAU': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
        'GAU': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
        'UGU': 'C', 'UGC': 'C', 'UGA': '*', 'UGG': 'W',
        'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
        'AGU': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
        'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
    }
    protein = ""
    for i in range(0, len(rna), 3):
        codon = rna[i:i+3]
        if len(codon) == 3:
            amino_acid = codon_table.get(codon, '')
            if amino_acid == '*':
                break
            protein += amino_acid
    return protein

def rv_comp(seq):
  if ' > ' in seq:
        seq = seq.split(' ')[1]
  complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
  return ''.join([complement.get(base, base) for base in seq[::-1]])

def find_orfs(dna):
    orfs = set()
    if ' > ' in dna:
        dna = dna.split(' ')[1]
    for strand in [dna, rv_comp(dna)]:
        for frame in range(3):
            rna = strand[frame:].replace('T', 'U')
            for i in range(0, len(rna), 3):
                if rna[i:i+3] == "AUG":
                    protein = translate_rna_to_protein(rna[i:])
                    if protein:
                        orfs.add(protein)
    return orfs


for protein in find_orfs(sequence):
    print(protein)

M
MLLGSFRLIPKETLIQVAGSSPCNLS
MGMTPRLGLESLLE
MIRVASQ
MA
MTPRLGLESLLE
