In [1]:
import os

codon_table = {
    'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',
    'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
    'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M',
    'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
    'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',
    'AGU': 'S', 'AGC': 'S', 'CCU': 'P', 'CCC': 'P',
    'CCA': 'P', 'CCG': 'P', 'ACU': 'T', 'ACC': 'T',
    'ACA': 'T', 'ACG': 'T', 'GCU': 'A', 'GCC': 'A',
    'GCA': 'A', 'GCG': 'A', 'UAU': 'Y', 'UAC': 'Y',
    'UAA': '*', 'UAG': '*', 'UGA': '*', 'CAU': 'H',
    'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'AAU': 'N',
    'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'GAU': 'D',
    'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'UGU': 'C',
    'UGC': 'C', 'UGG': 'W', 'CGU': 'R', 'CGC': 'R',
    'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
    'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
}

def reverse_complement(seq):
    """Generate the reverse complement of an RNA sequence."""
    complement = {'A': 'U', 'U': 'A', 'G': 'C', 'C': 'G'}
    return "".join(complement[base] for base in reversed(seq))

def validate_sequence(sequence):
    """Check if the sequence contains only valid nucleotides (A, T, G, C, U)."""
    valid_bases = {'A', 'T', 'G', 'C', 'U'}
    return all(base in valid_bases for base in sequence)

def translate_sequence(sequence):
    """Translate a DNA or RNA sequence into a protein sequence."""
    sequence = sequence.replace(" ", "").upper()
    if not validate_sequence(sequence):
        return "Error: Invalid character found. Only 'A', 'T', 'G', 'C', 'U' are allowed."
    if 'T' in sequence:
        sequence = sequence.replace('T', 'U')
    
    protein = ""
    for i in range(0, len(sequence) - 2, 3):
        codon = sequence[i:i+3]
        amino_acid = codon_table.get(codon, '?')
        if amino_acid == '*':
            break
        protein += amino_acid
    return protein

def translate_frames(sequence):
    """Find all six reading frames and their translated protein sequences."""
    sequence = sequence.replace(" ", "").upper()
    if not validate_sequence(sequence):
        return "Error: Invalid character found. Only 'A', 'T', 'G', 'C', 'U' are allowed."
    if 'T' in sequence:
        sequence = sequence.replace('T', 'U')
    
    forward_frames = [sequence[i:] for i in range(3)]
    reverse_seq = reverse_complement(sequence)
    reverse_frames = [reverse_seq[i:] for i in range(3)]
    
    all_frames = forward_frames + reverse_frames
    frame_labels = ["Forward Frame 1", "Forward Frame 2", "Forward Frame 3",
                    "Reverse Frame 1", "Reverse Frame 2", "Reverse Frame 3"]
    
    results = {}
    for label, frame in zip(frame_labels, all_frames):
        protein_regions = []
        protein = ""
        in_orf = False
        
        for i in range(0, len(frame) - 2, 3):
            codon = frame[i:i+3]
            if codon == 'AUG':
                if in_orf and protein:
                    protein_regions.append(protein)
                in_orf = True
                protein = "M"
            elif in_orf:
                if codon in {'UAA', 'UAG', 'UGA'}:
                    protein_regions.append(protein)
                    in_orf = False
                    protein = ""
                else:
                    protein += codon_table.get(codon, '?')
        
        if in_orf and protein:
            protein_regions.append(protein)
        
        results[label] = protein_regions if protein_regions else ["No valid ORF found"]
    return results

def read_fasta_or_sequence(file_path):
    """Reads a sequence from a FASTA or plain sequence file."""
    with open(file_path, 'r') as file:
        lines = file.readlines()
        if lines[0].startswith('>'):
            return ''.join(line.strip() for line in lines[1:])
        return ''.join(line.strip() for line in lines)

def save_output_to_txt(output_text, filename):
    """Saves the output to a text file in the same directory as input file."""
    with open(filename, 'w') as file:
        file.write(output_text)

def main():
    input_file = input("Enter the path to your FASTA or sequence file: ").strip()
    if not os.path.exists(input_file):
        print("Error: File not found.")
        return
    
    sequence = read_fasta_or_sequence(input_file)
    output_file = os.path.join(os.path.dirname(input_file), "translated_output.txt")
    
    translation_result = "Single-frame Translation:\n"
    translation_result += f"Translated Protein Sequence: {translate_sequence(sequence)}\n\n"
    
    translation_result += "Translated Protein Sequences from all Reading Frames:\n"
    translated_frames = translate_frames(sequence)
    for frame, protein_seqs in translated_frames.items():
        translation_result += f"{frame}:\n"
        for idx, protein_seq in enumerate(protein_seqs, start=1):
            translation_result += f"  ORF {idx}: {protein_seq}\n"
    
    save_output_to_txt(translation_result, output_file)
    print(f"Translation results saved to {output_file}")

if __name__ == "__main__":
    main()


Enter the path to your FASTA or sequence file:  C:\Users\miari\Downloads\Homo_sapiens_RDH5_sequence.fa


Translation results saved to C:\Users\miari\Downloads\translated_output.txt


In [4]:
import os

codon_table = {
    'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',
    'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
    'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M',
    'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
    'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',
    'AGU': 'S', 'AGC': 'S', 'CCU': 'P', 'CCC': 'P',
    'CCA': 'P', 'CCG': 'P', 'ACU': 'T', 'ACC': 'T',
    'ACA': 'T', 'ACG': 'T', 'GCU': 'A', 'GCC': 'A',
    'GCA': 'A', 'GCG': 'A', 'UAU': 'Y', 'UAC': 'Y',
    'UAA': '*', 'UAG': '*', 'UGA': '*', 'CAU': 'H',
    'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'AAU': 'N',
    'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'GAU': 'D',
    'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'UGU': 'C',
    'UGC': 'C', 'UGG': 'W', 'CGU': 'R', 'CGC': 'R',
    'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
    'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
}

def reverse_complement(seq):
    """Generate the reverse complement of an RNA sequence."""
    complement = {'A': 'U', 'U': 'A', 'G': 'C', 'C': 'G'}
    return "".join(complement[base] for base in reversed(seq))

def validate_sequence(sequence):
    """Check if the sequence contains only valid nucleotides (A, T, G, C, U)."""
    valid_bases = {'A', 'T', 'G', 'C', 'U'}
    return all(base in valid_bases for base in sequence)

def translate_sequence(sequence):
    """Translate a DNA or RNA sequence into a protein sequence."""
    sequence = sequence.replace(" ", "").upper()
    if not validate_sequence(sequence):
        return "Error: Invalid character found. Only 'A', 'T', 'G', 'C', 'U' are allowed."
    if 'T' in sequence:
        sequence = sequence.replace('T', 'U')
    
    protein = ""
    for i in range(0, len(sequence) - 2, 3):
        codon = sequence[i:i+3]
        amino_acid = codon_table.get(codon, '?')
        if amino_acid == '*':
            break
        protein += amino_acid
    return protein

def translate_frames(sequence):
    """Find all six reading frames and their translated protein sequences."""
    sequence = sequence.replace(" ", "").upper()
    if not validate_sequence(sequence):
        return "Error: Invalid character found. Only 'A', 'T', 'G', 'C', 'U' are allowed."
    if 'T' in sequence:
        sequence = sequence.replace('T', 'U')
    
    forward_frames = [sequence[i:] for i in range(3)]
    reverse_seq = reverse_complement(sequence)
    reverse_frames = [reverse_seq[i:] for i in range(3)]
    
    all_frames = forward_frames + reverse_frames
    frame_labels = ["Forward Frame 1", "Forward Frame 2", "Forward Frame 3",
                    "Reverse Frame 1", "Reverse Frame 2", "Reverse Frame 3"]
    
    results = {}
    for label, frame in zip(frame_labels, all_frames):
        protein_regions = []
        protein = ""
        in_orf = False
        
        for i in range(0, len(frame) - 2, 3):
            codon = frame[i:i+3]
            if codon == 'AUG':
                if in_orf and protein:
                    protein_regions.append(protein)
                in_orf = True
                protein = "M"
            elif in_orf:
                if codon in {'UAA', 'UAG', 'UGA'}:
                    protein_regions.append(protein)
                    in_orf = False
                    protein = ""
                else:
                    protein += codon_table.get(codon, '?')
        
        if in_orf and protein:
            protein_regions.append(protein)
        
        results[label] = protein_regions if protein_regions else ["No valid ORF found"]
    return results

def read_fasta_or_sequence(file_path):
    """Reads a sequence from a FASTA or plain sequence file."""
    with open(file_path, 'r') as file:
        lines = file.readlines()
        if lines[0].startswith('>'):
            return ''.join(line.strip() for line in lines[1:])
        return ''.join(line.strip() for line in lines)

def main():
    input_file = input("Enter the path to your FASTA or sequence file: ").strip()
    if not os.path.exists(input_file):
        print("Error: File not found.")
        return
    
    sequence = read_fasta_or_sequence(input_file)
    
    print("Single-frame Translation:")
    print(f"Translated Protein Sequence: {translate_sequence(sequence)}")
    
    print("\nTranslated Protein Sequences from all Reading Frames:")
    translated_frames = translate_frames(sequence)
    for frame, protein_seqs in translated_frames.items():
        print(f"{frame}:")
        for idx, protein_seq in enumerate(protein_seqs, start=1):
            print(f"  ORF {idx}: {protein_seq}")

if __name__ == "__main__":
    main()


Enter the path to your FASTA or sequence file:  C:\Users\miari\Downloads\Homo_sapiens_RDH5_sequence.fa


Single-frame Translation:
Translated Protein Sequence: RWFP

Translated Protein Sequences from all Reading Frames:
Forward Frame 1:
  ORF 1: MASPRLTLSSV
  ORF 2: MSLSQVGD
  ORF 3: MTGPVTVTNRAVQLPSLTAPSSLET
  ORF 4: MT
  ORF 5: MVRNPG
  ORF 6: MWF
  ORF 7: MLTPDASS
  ORF 8: MPLSSSPAVTQALGAFWHCSWTREASESWPAA
  ORF 9: MVDHQEYGVGCPDPHSHPRSHLQGLW
  ORF 10: MADPGRFPAGAECEHNGSHRGHPCPAASAAASPGPGDQHHQRPGSPGSQWWGLLCLQIWPGGLL
  ORF 11: MGS
  ORF 12: MKSTL
  ORF 13: M
  ORF 14: MPGLFLYF
  ORF 15: MLARLVSNS
  ORF 16: M
  ORF 17: MELYHVGQAGFELLTSGDPPDPASQSAGITG
  ORF 18: MSHHTRPKA
  ORF 19: MSPVSQP
  ORF 20: MPG
  ORF 21: MCFGSKG
  ORF 22: MGFL
  ORF 23: MVFFIFKWLHCKLLYKYLIISLILFLGLPCLKY
  ORF 24: MKGNKYQKGQSCISLLGGGWGTQGRVEGEQDVTRVPRPCGTCPLPTLRRGLRVTSPGPQKTVPK
  ORF 25: MGWSEEGKLIATTYGAADLK
  ORF 26: MQQRI
  ORF 27: MNLICDPDLTKVSRCLEHALTARHPRTRYSPGWDAKLLWLPASYLPASLVDAVLTWVLPKPAQAVY
  ORF 28: MLTVSGLFSPCKTASTHSGAPGNCLTSTAARRGVAIKGHFNTFPLFSEGE
  ORF 29: MVFIFLGGEEQV
  ORF 30: MRVLGQAGWLPWKKRQYSHKFSC
 

In [7]:
import os

codon_table = {
    'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',
    'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
    'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M',
    'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
    'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',
    'AGU': 'S', 'AGC': 'S', 'CCU': 'P', 'CCC': 'P',
    'CCA': 'P', 'CCG': 'P', 'ACU': 'T', 'ACC': 'T',
    'ACA': 'T', 'ACG': 'T', 'GCU': 'A', 'GCC': 'A',
    'GCA': 'A', 'GCG': 'A', 'UAU': 'Y', 'UAC': 'Y',
    'UAA': '*', 'UAG': '*', 'UGA': '*', 'CAU': 'H',
    'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'AAU': 'N',
    'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'GAU': 'D',
    'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'UGU': 'C',
    'UGC': 'C', 'UGG': 'W', 'CGU': 'R', 'CGC': 'R',
    'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
    'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
}

def reverse_complement(seq):
    """Generate the reverse complement of an RNA sequence."""
    complement = {'A': 'U', 'U': 'A', 'G': 'C', 'C': 'G'}
    return "".join(complement[base] for base in reversed(seq))

def validate_sequence(sequence):
    """Check if the sequence contains only valid nucleotides (A, T, G, C, U)."""
    valid_bases = {'A', 'T', 'G', 'C', 'U'}
    return all(base in valid_bases for base in sequence)

def translate_sequence(sequence):
    """Translate a DNA or RNA sequence into a protein sequence."""
    sequence = sequence.replace(" ", "").upper()
    if not validate_sequence(sequence):
        return "Error: Invalid character found. Only 'A', 'T', 'G', 'C', 'U' are allowed."
    if 'T' in sequence:
        sequence = sequence.replace('T', 'U')
    
    protein = ""
    for i in range(0, len(sequence) - 2, 3):
        codon = sequence[i:i+3]
        amino_acid = codon_table.get(codon, '?')
        if amino_acid == '*':
            break
        protein += amino_acid
    return protein

def translate_frames(sequence):
    """Find all six reading frames and their translated protein sequences."""
    sequence = sequence.replace(" ", "").upper()
    if not validate_sequence(sequence):
        return "Error: Invalid character found. Only 'A', 'T', 'G', 'C', 'U' are allowed."
    if 'T' in sequence:
        sequence = sequence.replace('T', 'U')
    
    forward_frames = [sequence[i:] for i in range(3)]
    reverse_seq = reverse_complement(sequence)
    reverse_frames = [reverse_seq[i:] for i in range(3)]
    
    all_frames = forward_frames + reverse_frames
    frame_labels = ["Forward Frame 1", "Forward Frame 2", "Forward Frame 3",
                    "Reverse Frame 1", "Reverse Frame 2", "Reverse Frame 3"]
    
    results = {}
    for label, frame in zip(frame_labels, all_frames):
        protein_regions = []
        protein = ""
        in_orf = False
        
        for i in range(0, len(frame) - 2, 3):
            codon = frame[i:i+3]
            if codon == 'AUG':
                if in_orf and protein:
                    protein_regions.append(protein)
                in_orf = True
                protein = "M"
            elif in_orf:
                if codon in {'UAA', 'UAG', 'UGA'}:
                    protein_regions.append(protein)
                    in_orf = False
                    protein = ""
                else:
                    protein += codon_table.get(codon, '?')
        
        if in_orf and protein:
            protein_regions.append(protein)
        
        results[label] = protein_regions if protein_regions else ["No valid ORF found"]
    return results

def read_fasta_or_sequence(file_path):
    """Reads a sequence from a FASTA or plain sequence file."""
    with open(file_path, 'r') as file:
        lines = file.readlines()
        if lines[0].startswith('>'):
            return ''.join(line.strip() for line in lines[1:])
        return ''.join(line.strip() for line in lines)

def main():
    input_file = input("Enter the path to your FASTA or sequence file: ").strip()
    if not os.path.exists(input_file):
        print("Error: File not found.")
        return
    
    sequence = read_fasta_or_sequence(input_file)
    
    print("Single-frame Translation:")
    print(f"Translated Protein Sequence: {translate_sequence(sequence)}")
    
    print("\nTranslated Protein Sequences from all Reading Frames:")
    translated_frames = translate_frames(sequence)
    for frame, protein_seqs in translated_frames.items():
        print(f"{frame}:")
        for idx, protein_seq in enumerate(protein_seqs, start=1):
            print(f"  ORF {idx}: {protein_seq}")

if __name__ == "__main__":
    main()


Enter the path to your FASTA or sequence file:  C:\Users\miari\Downloads\Homo_sapiens_RDH5_sequence.fa


Single-frame Translation:
Translated Protein Sequence: RWFP

Translated Protein Sequences from all Reading Frames:
Forward Frame 1:
  ORF 1: MASPRLTLSSV
  ORF 2: MSLSQVGD
  ORF 3: MTGPVTVTNRAVQLPSLTAPSSLET
  ORF 4: MT
  ORF 5: MVRNPG
  ORF 6: MWF
  ORF 7: MLTPDASS
  ORF 8: MPLSSSPAVTQALGAFWHCSWTREASESWPAA
  ORF 9: MVDHQEYGVGCPDPHSHPRSHLQGLW
  ORF 10: MADPGRFPAGAECEHNGSHRGHPCPAASAAASPGPGDQHHQRPGSPGSQWWGLLCLQIWPGGLL
  ORF 11: MGS
  ORF 12: MKSTL
  ORF 13: M
  ORF 14: MPGLFLYF
  ORF 15: MLARLVSNS
  ORF 16: M
  ORF 17: MELYHVGQAGFELLTSGDPPDPASQSAGITG
  ORF 18: MSHHTRPKA
  ORF 19: MSPVSQP
  ORF 20: MPG
  ORF 21: MCFGSKG
  ORF 22: MGFL
  ORF 23: MVFFIFKWLHCKLLYKYLIISLILFLGLPCLKY
  ORF 24: MKGNKYQKGQSCISLLGGGWGTQGRVEGEQDVTRVPRPCGTCPLPTLRRGLRVTSPGPQKTVPK
  ORF 25: MGWSEEGKLIATTYGAADLK
  ORF 26: MQQRI
  ORF 27: MNLICDPDLTKVSRCLEHALTARHPRTRYSPGWDAKLLWLPASYLPASLVDAVLTWVLPKPAQAVY
  ORF 28: MLTVSGLFSPCKTASTHSGAPGNCLTSTAARRGVAIKGHFNTFPLFSEGE
  ORF 29: MVFIFLGGEEQV
  ORF 30: MRVLGQAGWLPWKKRQYSHKFSC
 