<a href="https://colab.research.google.com/github/ItsAnr/CodeToLife/blob/main/CentralDogmaLab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
def get_dna_input():
    valid_strands = ["coding", "template"]

    while True:
        dna = input("Enter DNA sequence (only A, T, C, G): ").upper()

        valid_dna = True
        for base in dna:
            if base not in "ATCG":
                print("Invalid DNA sequence. Only A, T, C, G are allowed.")
                valid_dna = False
                break

        if not valid_dna:
            continue

        strand_type = input("Enter strand type (coding/template): ").lower()
        if strand_type not in valid_strands:
            print("Invalid strand type. Please enter 'coding' or 'template'.")
            continue

        return dna, strand_type

In [2]:
def get_reverse_complement(dna):
    complement = {
        "A": "T",
        "T": "A",
        "G": "C",
        "C": "G"
    }
    reverse_complement = ''.join([complement[base] for base in dna[::-1]])
    return reverse_complement

def dna_to_mrna(dna, strand_type):
    if strand_type == "coding":
        return dna.replace("T", "U")
    elif strand_type == "template":
        reverse_comp = get_reverse_complement(dna)
        return reverse_comp.replace("T", "U")

In [3]:

def extract_protein_region_strict(rna):
    start_codon = "AUG"
    stop_codons = {"UAA", "UGA", "UAG"}

    start_index = -1
    for i in range(len(rna) - 2):
        if rna[i:i+3] == start_codon:
            start_index = i
            break

    if start_index == -1:
        return ""  # No start codon found

    codons = []
    for i in range(start_index, len(rna) - 2, 3):
        codon = rna[i:i+3]
        if codon in stop_codons:
            break
        codons.append(codon)

    return "".join(codons)

In [4]:
codon_to_amino = {
    "UUU": "Phenylalanine", "UUC": "Phenylalanine",
    "UUA": "Leucine", "UUG": "Leucine",
    "CUU": "Leucine", "CUC": "Leucine",
    "CUA": "Leucine", "CUG": "Leucine",
    "AUU": "Isoleucine", "AUC": "Isoleucine", "AUA": "Isoleucine",
    "AUG": "Methionine",  # Start codon
    "GUU": "Valine", "GUC": "Valine", "GUA": "Valine", "GUG": "Valine",
    "UCU": "Serine", "UCC": "Serine", "UCA": "Serine", "UCG": "Serine",
    "AGU": "Serine", "AGC": "Serine",
    "CCU": "Proline", "CCC": "Proline", "CCA": "Proline", "CCG": "Proline",
    "ACU": "Threonine", "ACC": "Threonine", "ACA": "Threonine", "ACG": "Threonine",
    "GCU": "Alanine", "GCC": "Alanine", "GCA": "Alanine", "GCG": "Alanine",
    "UAU": "Tyrosine", "UAC": "Tyrosine",
    "CAU": "Histidine", "CAC": "Histidine",
    "CAA": "Glutamine", "CAG": "Glutamine",
    "AAU": "Asparagine", "AAC": "Asparagine",
    "AAA": "Lysine", "AAG": "Lysine",
    "GAU": "Aspartic acid", "GAC": "Aspartic acid",
    "GAA": "Glutamic acid", "GAG": "Glutamic acid",
    "UGU": "Cysteine", "UGC": "Cysteine",
    "UGG": "Tryptophan",
    "CGU": "Arginine", "CGC": "Arginine", "CGA": "Arginine", "CGG": "Arginine",
    "AGA": "Arginine", "AGG": "Arginine",
    "GGU": "Glycine", "GGC": "Glycine", "GGA": "Glycine", "GGG": "Glycine",
    "UAA": "Stop", "UAG": "Stop", "UGA": "Stop"
}

In [8]:
def translate_to_protein(coding_region):
    protein = ""
    for i in range(0, len(coding_region) - 2, 3):
        codon = coding_region[i:i+3]
        aa = codon_to_amino.get(codon, "")
        if aa == "Stop":
            break
        protein += aa + "-"
    return protein

In [6]:
def calculate_gc_content(dna):
    g_count = dna.count("G")
    c_count = dna.count("C")
    total_length = len(dna)

    if total_length == 0:
        return 0

    gc_percent = (g_count + c_count) / total_length * 100
    return gc_percent

In [10]:
dna, strand_type = get_dna_input()
mrna = dna_to_mrna(dna, strand_type)
coding_region = extract_protein_region_strict(mrna)
protein = translate_to_protein(coding_region)

print("Protein:", protein)
print("GC content of DNA: {:.2f}%".format(calculate_gc_content(dna)))

Enter DNA sequence (only A, T, C, G): ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG
Enter strand type (coding/template): Coding
Protein: Methionine-Alanine-Isoleucine-Valine-Methionine-Glycine-Arginine-
GC content of DNA: 56.41%
