In [21]:
def translate_sequence(sequence):
    
    codon_table= {
        'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',
        'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
        'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M',
        'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
        'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',
        'AGU': 'S', 'AGC': 'S', 'CCU': 'P', 'CCC': 'P',
        'CCA': 'P', 'CCG': 'P', 'ACU': 'T', 'ACC': 'T',
        'ACA': 'T', 'ACG': 'T', 'GCU': 'A', 'GCC': 'A',
        'GCA': 'A', 'GCG': 'A', 'UAU': 'Y', 'UAC': 'Y',
        'UAA': '*', 'UAG': '*', 'UGA': '*', 'CAU': 'H',
        'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'AAU': 'N',
        'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'GAU': 'D',
        'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'UGU': 'C',
        'UGC': 'C', 'UGG': 'W', 'CGU': 'R', 'CGC': 'R',
        'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
        'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
    }

    # Remove spaces from sequence
    sequence = sequence.replace(" ", "")

    sequence = sequence.upper()

    # Check for invalid characters
    for char in sequence:
        if char not in 'ATGCU':
            return "Error: Invalid character found. Only 'A', 'T', 'G', 'C', 'U' are allowed."

    # Detect if it's DNA or RNA
    if 'T' in sequence:
        sequence = sequence.replace('T', 'U')  # Convert DNA to RNA

    protein = ""

    # Translate codons into amino acids
    for i in range(0, len(sequence) , 3):
        codon = sequence[i:i+3]
        amino_acid = codon_table.get(codon, '?')  # '?' for unknown codons
        if amino_acid == '*':  # Stop translation at stop codon
            break
        protein += amino_acid

    return protein



# Get user input
user_seq = input("Enter a DNA or RNA sequence: ").strip().upper()

# Translate and display the result
translated_seq = translate_sequence(user_seq)
print("\nTranslated Protein Sequence:", translated_seq)


Enter a DNA or RNA sequence:   ACTCCGGACTTTGGCCTTAGCAGTAGTTAGTGTGGGAGGCTGGGAAGACTGGGAGCAGTCTCTTAAACAA AAGCAAAAGAATAAGCTTCGGGCGCTGTAGTACCTGCCAGCTTTCGCCACAGGAGCCTAGGCTGCCACCT GTAGGTCACTTGGGCTCCAGCTATGTGGCTGCCTCTTCTGCTGGGTGCCTTACTCTGGGCAGTGCTGTGG TTGCTCAGGGACCGGCAGAGCCTGCCCGCCAGCAATGCCTTTGTCTTCATCACCGGCTGTGACTCAGGCT TTGGGCGCCTTCTGGCACTGCAGCTGGACCAGAGAGGCTTCCGAGTCCTGGCCAGCTGCCTGACCCCCTC CGGGGCCGAGGACCTGCAGCGGGTGGCCTCCTCCCGCCTCCACACCACCCTGTTGGATATCACTGATCCC CAGAGCGTCCAGCAGGCAGCCAAGTGGGTGGAGATGCACGTTAAGGAAGCAGGGCTTTTTGGTCTGGTGA ATAATGCTGGTGTGGCTGGTATCATCGGACCCACACCATGGCTGACCCGGGACGATTTCCAGCGGGTGCT GAATGTGAACACAATGGGTCCCATCGGGGTCACCCTTGCCCTGCTGCCTCTGCTGCAGCAAGCCCGGGGC CGGGTGATCAACATCACCAGCGTCCTGGGTCGCCTGGCAGCCAATGGTGGGGGCTACTGTGTCTCCAAAT TTGGCCTGGAGGCCTTCTCTGACAGCCTGAGGCGGGATGTAGCTCATTTTGGGATACGAGTCTCCATCGT GGAGCCTGGCTTCTTCCGAACCCCTGTGACCAACCTGGAGAGTCTGGAGAAAACCCTGCAGGCCTGCTGG GCACGGCTGCCTCCTGCCACACAGGCCCACTATGGGGGGGCCTTCCTCACCAAGTACCTGAAAATGCAAC AGCGCATCATGAACCTGATCTGTGACCCGGACCTAACCAAGGTGAG


Translated Protein Sequence: TPDFGLSSS


In [26]:
def reverse_complement(seq):
    """Generate the reverse complement of an RNA sequence."""
    complement = {'A': 'U', 'U': 'A', 'G': 'C', 'C': 'G'}
    return "".join(complement[base] for base in reversed(seq))

def validate_sequence(sequence):
    """Check if the sequence contains only valid nucleotides (A, T, G, C, U)."""
    valid_bases = {'A', 'T', 'G', 'C', 'U'}
    for base in sequence:
        if base not in valid_bases:
            return False
    return True

def translate_sequence(sequence):
    """Translate a DNA or RNA sequence into a protein sequence."""
    codon_table = {
        'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',
        'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
        'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M',
        'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
        'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',
        'AGU': 'S', 'AGC': 'S', 'CCU': 'P', 'CCC': 'P',
        'CCA': 'P', 'CCG': 'P', 'ACU': 'T', 'ACC': 'T',
        'ACA': 'T', 'ACG': 'T', 'GCU': 'A', 'GCC': 'A',
        'GCA': 'A', 'GCG': 'A', 'UAU': 'Y', 'UAC': 'Y',
        'UAA': '*', 'UAG': '*', 'UGA': '*', 'CAU': 'H',
        'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'AAU': 'N',
        'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'GAU': 'D',
        'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'UGU': 'C',
        'UGC': 'C', 'UGG': 'W', 'CGU': 'R', 'CGC': 'R',
        'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
        'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
    }

    # Remove spaces and convert to uppercase
    sequence = sequence.replace(" ", "").upper()

    # Validate sequence
    if not validate_sequence(sequence):
        return "Error: Invalid character found. Only 'A', 'T', 'G', 'C', 'U' are allowed."

    # Convert DNA to RNA if necessary
    if 'T' in sequence:
        sequence = sequence.replace('T', 'U')

    protein = ""

    # Translate codons into amino acids
    for i in range(0, len(sequence) - 2, 3):
        codon = sequence[i:i+3]
        amino_acid = codon_table.get(codon, '?')  # '?' for unknown codons
        if amino_acid == '*':  # Stop translation at stop codon
            break
        protein += amino_acid

    return protein

def translate_frames(sequence):
    """Find all six reading frames and their translated protein sequences."""
    codon_table = {
        'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',
        'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
        'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M',
        'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
        'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',
        'AGU': 'S', 'AGC': 'S', 'CCU': 'P', 'CCC': 'P',
        'CCA': 'P', 'CCG': 'P', 'ACU': 'T', 'ACC': 'T',
        'ACA': 'T', 'ACG': 'T', 'GCU': 'A', 'GCC': 'A',
        'GCA': 'A', 'GCG': 'A', 'UAU': 'Y', 'UAC': 'Y',
        'UAA': '*', 'UAG': '*', 'UGA': '*', 'CAU': 'H',
        'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'AAU': 'N',
        'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'GAU': 'D',
        'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'UGU': 'C',
        'UGC': 'C', 'UGG': 'W', 'CGU': 'R', 'CGC': 'R',
        'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
        'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
    }

    # Remove spaces and convert to uppercase
    sequence = sequence.replace(" ", "").upper()

    # Validate sequence
    if not validate_sequence(sequence):
        return "Error: Invalid character found. Only 'A', 'T', 'G', 'C', 'U' are allowed."

    # Convert DNA to RNA if necessary
    if 'T' in sequence:
        sequence = sequence.replace('T', 'U')

    # Generate 3 forward reading frames
    forward_frames = [sequence[i:] for i in range(3)]
    
    # Generate 3 reverse complement reading frames
    reverse_seq = reverse_complement(sequence)
    reverse_frames = [reverse_seq[i:] for i in range(3)]

    all_frames = forward_frames + reverse_frames
    frame_labels = ["Forward Frame 1", "Forward Frame 2", "Forward Frame 3",
                    "Reverse Frame 1", "Reverse Frame 2", "Reverse Frame 3"]

    results = {}

    # Translate each frame
    for label, frame in zip(frame_labels, all_frames):
        protein = ""
        start_found = False  # Track if start codon (AUG) has been found

        for i in range(0, len(frame) - 2, 3):
            codon = frame[i:i+3]

            if codon == 'AUG':  # Start translation at AUG
                start_found = True
                protein = "M"

            elif start_found:
                if codon in {'UAA', 'UAG', 'UGA'}:  # Stop translation at stop codon
                    break
                protein += codon_table.get(codon, '?')

        results[label] = protein if protein else "No valid protein found"

    return results

# Get user input
user_seq = input("Enter a DNA or RNA sequence: ").strip()

print("\nSingle-frame Translation:")
print("Translated Protein Sequence:", translate_sequence(user_seq))

print("\nTranslated Protein Sequences from all Reading Frames:")
translated_frames = translate_frames(user_seq)
for frame, protein_seq in translated_frames.items():
    print(f"{frame}: {protein_seq}")


Enter a DNA or RNA sequence:  ACTCCGGACTTTGGCCTTAGCAGTAGTTAGTGTGGGAGGCTGGGAAGACTGGGAGCAGTCTCTTAAACAA AAGCAAAAGAATAAGCTTCGGGCGCTGTAGTACCTGCCAGCTTTCGCCACAGGAGCCTAGGCTGCCACCT GTAGGTCACTTGGGCTCCAGCTATGTGGCTGCCTCTTCTGCTGGGTGCCTTACTCTGGGCAGTGCTGTGG TTGCTCAGGGACCGGCAGAGCCTGCCCGCCAGCAATGCCTTTGTCTTCATCACCGGCTGTGACTCAGGCT TTGGGCGCCTTCTGGCACTGCAGCTGGACCAGAGAGGCTTCCGAGTCCTGGCCAGCTGCCTGACCCCCTC CGGGGCCGAGGACCTGCAGCGGGTGGCCTCCTCCCGCCTCCACACCACCCTGTTGGATATCACTGATCCC CAGAGCGTCCAGCAGGCAGCCAAGTGGGTGGAGATGCACGTTAAGGAAGCAGGGCTTTTTGGTCTGGTGA ATAATGCTGGTGTGGCTGGTATCATCGGACCCACACCATGGCTGACCCGGGACGATTTCCAGCGGGTGCT GAATGTGAACACAATGGGTCCCATCGGGGTCACCCTTGCCCTGCTGCCTCTGCTGCAGCAAGCCCGGGGC CGGGTGATCAACATCACCAGCGTCCTGGGTCGCCTGGCAGCCAATGGTGGGGGCTACTGTGTCTCCAAAT TTGGCCTGGAGGCCTTCTCTGACAGCCTGAGGCGGGATGTAGCTCATTTTGGGATACGAGTCTCCATCGT GGAGCCTGGCTTCTTCCGAACCCCTGTGACCAACCTGGAGAGTCTGGAGAAAACCCTGCAGGCCTGCTGG GCACGGCTGCCTCCTGCCACACAGGCCCACTATGGGGGGGCCTTCCTCACCAAGTACCTGAAAATGCAAC AGCGCATCATGAACCTGATCTGTGACCCGGACCTAACCAAGGTGAGC


Single-frame Translation:
Translated Protein Sequence: TPDFGLSSS

Translated Protein Sequences from all Reading Frames:
Forward Frame 1: MNLICDPDLTKVSRCLEHALTARHPRTRYSPGWDAKLLWLPASYLPASLVDAVLTWVLPKPAQAVY
Forward Frame 2: MPLSSSPAVTQALGAFWHCSWTREASESWPAA
Forward Frame 3: MADPGRFPAGAECEHNGSHRGHPCPAASAAASPGPGDQHHQRPGSPGSQWWGLLCLQIWPGGLL
Reverse Frame 1: MVWVR
Reverse Frame 2: MLQASAHLG
Reverse Frame 3: MRCCIFRYLVRKAPP
