**Project : ADN to protein conversion

chat gpt code

In [8]:
genetic_code_dna = {
    "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
    "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
    "TAT": "Y", "TAC": "Y", "TAA": "*", "TAG": "*",
    "TGT": "C", "TGC": "C", "TGA": "*", "TGG": "W",
    "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
    "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
    "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
    "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
    "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
    "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
    "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
    "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
    "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
    "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
    "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
    "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
}

genetic_code_rna = {
    "UUU": "F", "UUC": "F", "UUA": "L", "UUG": "L",
    "UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S",
    "UAU": "Y", "UAC": "Y", "UAA": "*", "UAG": "*",
    "UGU": "C", "UGC": "C", "UGA": "*", "UGG": "W",
    "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
    "CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P",
    "CAU": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
    "CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R",
    "AUU": "I", "AUC": "I", "AUA": "I", "AUG": "M",
    "ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T",
    "AAU": "N", "AAC": "N", "AAA": "K", "AAG": "K",
    "AGU": "S", "AGC": "S", "AGA": "R", "AGG": "R",
    "GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
    "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
    "GAU": "D", "GAC": "D", "GAA": "E", "GAG": "E",
    "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G"
}

def transcribe_dna_to_rna(dna_sequence):
    return dna_sequence.replace('T', 'U')

def reverse_transcribe_rna_to_dna(rna_sequence):
    return rna_sequence.replace('U', 'T')

def find_start_codon(dna_sequence, sd_sequence):
    sd_index = dna_sequence.find(sd_sequence)
    if sd_index == -1:
        return -1
    start_index = sd_index + len(sd_sequence) + 9
    return start_index

def translate_dna_to_protein(dna_sequence):
    protein_sequence = ""
    for i in range(0, len(dna_sequence) - 2, 3):  # Ensure complete codons
        codon = dna_sequence[i:i+3]
        if codon in genetic_code_dna:
            amino_acid = genetic_code_dna[codon]
            if amino_acid == "*":
                break
            protein_sequence += amino_acid
        else:
            protein_sequence += "X"
    return protein_sequence

def translate_rna_to_protein(rna_sequence):
    protein_sequence = ""
    for i in range(0, len(rna_sequence) - 2, 3):  # Ensure complete codons
        codon = rna_sequence[i:i+3]
        if codon in genetic_code_rna:
            amino_acid = genetic_code_rna[codon]
            if amino_acid == "*":
                break
            protein_sequence += amino_acid
        else:
            protein_sequence += "X"
    return protein_sequence

def translate_dna_with_sd(dna_sequence, sd_sequence):
    start_index = find_start_codon(dna_sequence, sd_sequence)
    if start_index == -1:
        return "Start codon not found"
    dna_sequence = dna_sequence[start_index:]
    protein_sequence = ""
    for i in range(0, len(dna_sequence) - 2, 3):  # Ensure complete codons
        codon = dna_sequence[i:i+3]
        if codon in genetic_code_dna:
            amino_acid = genetic_code_dna[codon]
            if amino_acid == "*":
                break
            protein_sequence += amino_acid
        else:
            protein_sequence += "X"
    return protein_sequence

# Example usage
dna_sequence = "GCGAGGAGGTAAATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGCTG"
sd_sequence = "AGGAGG"

# Translate DNA to RNA
rna_sequence = transcribe_dna_to_rna(dna_sequence)
print("RNA Sequence:", rna_sequence)

# Translate RNA to protein
protein_sequence_from_rna = translate_rna_to_protein(rna_sequence)
print("Protein Sequence from RNA:", protein_sequence_from_rna)

# Reverse translate RNA to DNA
dna_sequence_from_rna = reverse_transcribe_rna_to_dna(rna_sequence)
print("DNA Sequence from RNA:", dna_sequence_from_rna)

# Translate DNA to protein using SD sequence
protein_sequence_with_sd = translate_dna_with_sd(dna_sequence, sd_sequence)
print("Protein Sequence with Shine-Dalgarno Sequence:", protein_sequence_with_sd)


RNA Sequence: GCGAGGAGGUAAAUGGUGCAUCUGACUCCUGAGGAGAAGUCUGCCGUUACUGCCCUGUGGGGCAAGGUGAACGUGGAUGAAGCUG
Protein Sequence from RNA: ARR
DNA Sequence from RNA: GCGAGGAGGTAAATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGCTG
Protein Sequence with Shine-Dalgarno Sequence: HLTPEEKSAVTALWGKVNVDEA


In [44]:
genetic_code_rna = {
    "UUU": "F", "UUC": "F", "UUA": "L", "UUG": "L",
    "UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S",
    "UAU": "Y", "UAC": "Y", "UAA": "*", "UAG": "*",
    "UGU": "C", "UGC": "C", "UGA": "*", "UGG": "W",
    "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
    "CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P",
    "CAU": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
    "CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R",
    "AUU": "I", "AUC": "I", "AUA": "I", "AUG": "M",
    "ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T",
    "AAU": "N", "AAC": "N", "AAA": "K", "AAG": "K",
    "AGU": "S", "AGC": "S", "AGA": "R", "AGG": "R",
    "GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
    "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
    "GAU": "D", "GAC": "D", "GAA": "E", "GAG": "E",
    "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G"
}

def transcribe_dna_to_rna(dna_sequence):
    return dna_sequence.replace('T', 'U')
    

rna_sequence = transcribe_dna_to_rna(dna_sequence)

def find_start_codons_rna(rna_sequence):
    start_codons = []
    for i in range(len(rna_sequence)):
        if rna_sequence[i:i+3] == "AUG":
            start_codons.append(i)
    return start_codons


def translate_rna_to_protein(rna_sequence):
    start_index = find_start_codon_rna(rna_sequence)
    if start_index == -1:
        return "Start codon not found"
    rna_sequence = rna_sequence[start_index:]
    protein_sequence = ""
    for i in range(0, len(rna_sequence) - 2, 3):
        codon = rna_sequence[i:i+3]
        if codon in genetic_code_rna:
            amino_acid = genetic_code_rna[codon]
            if amino_acid == "*":
                break
            protein_sequence += amino_acid
        else:
            protein_sequence += "X"
    return protein_sequence

def translate_rna_to_proteins_all_frames(rna_sequence):
    proteins = []
    for start_index in find_start_codons_rna(rna_sequence):
        protein_sequence = translate_rna_to_protein(rna_sequence[start_index:])
        proteins.append(protein_sequence)
    return proteins

def translate_one_letter_to_three_letter_list(one_letter_sequences):
    three_letter_code = {
        "A": "Ala", "C": "Cys", "D": "Asp", "E": "Glu",
        "F": "Phe", "G": "Gly", "H": "His", "I": "Ile",
        "K": "Lys", "L": "Leu", "M": "Met", "N": "Asn",
        "P": "Pro", "Q": "Gln", "R": "Arg", "S": "Ser",
        "T": "Thr", "V": "Val", "W": "Trp", "Y": "Tyr",
        "*": "Stop"
    }
    three_letter_sequences = []
    for one_letter_sequence in one_letter_sequences:
        three_letter_sequence = [three_letter_code.get(aa, "Unknown") for aa in one_letter_sequence]
        three_letter_sequences.append("-".join(three_letter_sequence))
    return three_letter_sequences

def print_proteins_in_frames1L53(protein_sequence_list):
    for i, protein in enumerate(protein_sequence_list, start=1):
        print(f"Frame {i} ,C5'-C3', 1 letters : {protein}")

def print_proteins_in_frames3L53(protein_sequence_list):
    for i, protein in enumerate(protein_sequence_list, start=1):
        print(f"Frame {i} ,C5'-C3', 3 letters : {protein}")

def print_proteins_in_frames1L35(protein_sequence_list):
    for i, protein in enumerate(protein_sequence_list, start=1):
        print(f"Frame {i} ,C3'-C5', 1 letters : {protein}")

def print_proteins_in_frames3L35(protein_sequence_list):
    for i, protein in enumerate(protein_sequence_list, start=1):
        print(f"Frame {i} ,C3'-C5', 3 letters : {protein}")
        

def complementary_sequences(dna_sequence):
    complementary_dna = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}

    # Generate complementary DNA sequence
    comp_dna_sequence = ''.join(complementary_dna[base] for base in dna_sequence)

    return comp_dna_sequence


# Example usage
dna_sequence = "ACGTACGTATGCGAGGAGGTAAATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGCTG"


# Translate RNA to protein with start codon "AUG" for 5'3'
protein_sequence_rna53 = translate_rna_to_proteins_all_frames(rna_sequence)


# Find the complementary of the ADN and rna sequence
dna_sequence35 = complementary_sequences(dna_sequence)
rna_sequence35 = transcribe_dna_to_rna(dna_sequence35)

# Translate RNA to protein with start codon "AUG" for 5'3'
protein_sequence_rna35 = translate_rna_to_proteins_all_frames(rna_sequence35)

# Translating one letter symbol amino acid into three
protein_sequence_3letters53 = translate_one_letter_to_three_letter_list(protein_sequence_rna)
protein_sequence_3letters35 = translate_one_letter_to_three_letter_list(protein_sequence_rna35)



print("DNA sequence 5'-3':", dna_sequence)
print("RNA sequence 5'-3':", rna_sequence)


print_proteins_in_frames1L53(protein_sequence_rna53)
print_proteins_in_frames3L53(protein_sequence_3letters53)

print('-----------------------------------------------------------------------------------------------')


print("DNA sequence 3'-5':", dna_sequence35)
print("RNA sequence 3'-5':", rna_sequence35)

print_proteins_in_frames1L35(protein_sequence_rna35)
print_proteins_in_frames3L35(protein_sequence_3letters35)


DNA sequence 5'-3': ACGTACGTATGCGAGGAGGTAAATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGCTG
RNA sequence 5'-3': ACGUACGUAUGCGAGGAGGUAAAUGGUGCAUCUGACUCCUGAGGAGAAGUCUGCCGUUACUGCCCUGUGGGGCAAGGUGAACGUGGAUGAAGCUG
Frame 1 ,C5'-C3', 1 letters : MRGGKWCI
Frame 2 ,C5'-C3', 1 letters : MVHLTPEEKSAVTALWGKVNVDEA
Frame 3 ,C5'-C3', 1 letters : MKL
Frame 1 ,C5'-C3', 3 letters : Met-Arg-Gly-Gly-Lys-Trp-Cys-Ile
Frame 2 ,C5'-C3', 3 letters : Met-Val-His-Leu-Thr-Pro-Glu-Glu-Lys-Ser-Ala-Val-Thr-Ala-Leu-Trp-Gly-Lys-Val-Asn-Val-Asp-Glu-Ala
Frame 3 ,C5'-C3', 3 letters : Met-Lys-Leu
-----------------------------------------------------------------------------------------------
DNA sequence 3'-5': TGCATGCATACGCTCCTCCATTTACCACGTAGACTGAGGACTCCTCTTCAGACGGCAATGACGGGACACCCCGTTCCACTTGCACCTACTTCGAC
RNA sequence 3'-5': UGCAUGCAUACGCUCCUCCAUUUACCACGUAGACUGAGGACUCCUCUUCAGACGGCAAUGACGGGACACCCCGUUCCACUUGCACCUACUUCGAC
Frame 1 ,C3'-C5', 1 letters : MHTLLHLPRRLRTPLQTAMTGHPVPLAPTS
Frame 2 ,C3'-C5', 1 l