In [5]:
from collections import defaultdict

def replace_codons(dna_sequence, replacements):
    codons = {
        'C': ['TGT', 'TGC'],
        'H': ['CAT', 'CAC'],
        'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
    }
    
    dna_sequence_codons = [ dna_sequence[i:i+3] for i in range(0, len(dna_sequence), 3) ]
    
    for amino_acid, new_codon in replacements.items():
        new_codon = replacements[amino_acid]
        for old_codon in codons[amino_acid]:
            for i in range(0, len(dna_sequence_codons)):
                if dna_sequence_codons[i] == old_codon:
                    dna_sequence_codons[i] = new_codon

    return "".join(dna_sequence_codons)

original_gene_seq =  "ATGGTACCACAAGCATTATTATTTGTACCATTATTAGTATTTCCATTATGTTTTGGAAAATTTCCAATTTATACAATTCCAGATAAATTAGGACCATGGTCTCCAATTGATATTCATCATTTATCTTGTCCAAATAATTTAGTAGTAGAAGATGAAGGATGTACAAATTTATCTGGATTTTCTTATATGGAATTAAAAGTAGGATATATTTCTGCAATTAAAATGAATGGATTTACATGTACAGGAGTAGTAACAGAAGCAGAAACATATACAAATTTTGTAGGATATGTAACAACAACATTTAAACGTAAACATTTTCGTCCAACACCAGATGCATGTCGTGCAGCATATAATTGGAAAATGGCAGGAGATCCACGTTATGAAGAATCTTTACATAATCCATATCCAGATTATCATTGGTTACGTACAGTAAAAACAACAAAAGAATCTTTAGTAATTATTTCTCCATCTGTAGCAGATTTAGATCCATATGATCGTTCTTTACATTCTCGTGTATTTCCAGGAGGAAATTGTTCTGGAGTAGCAGTATCTTCTACATATTGTTCTACAAATCATGATTATACAATTTGGATGCCAGAAAATCCACGTTTAGGAATGTCTTGTGATATTTTTACAAATTCTCGTGGAAAACGTGCATCTAAAGGATCTGAAACATGTGGATTTGTAGATGAACGTGGATTATATAAATCTTTAAAAGGAGCATGTAAATTAAAATTATGTGGAGTATTAGGATTACGTTTAATGGATGGAACATGGGTAGCAATGCAAACATCTAATGAAACAAAATGGTGTCCACCAGGACAATTAGTAAATTTACATGATTTTCGTTCTGATGAAATTGAACATTTAGTAGTAGAAGAATTAGTAAAAAAACGTGAAGAATGTTTAGATGCATTAGAATCTATTATGACAACAAAATCTGTATCTTTTCGTCGTTTATCTCATTTACGTAAATTAGTACCAGGATTTGGAAAAGCATATACAATTTTTAATAAAACATTAATGGAAGCAGATGCACATTATAAATCTGTACGTACATGGAATGAAATTATTCCATCTAAAGGATGTTTACGTGTAGGAGGACGTTGTCATCCACATGTAAATGGAGTATTTTTTAATGGAATTATTTTAGGACCAGATGGAAATGTATTAATTCCAGAAATGCAATCTTCTTTATTACAACAACATATGGAATTATTAGTATCTTCTGTAATTCCATTAATGCATCCATTAGCAGATCCATCTACAGTATTTAAAAATGGAGATGAAGCAGAAGATTTTGTAGAAGTACATTTACCAGATGTACATGAACGTATTTCTGGAGTAGATTTAGGATTACCAAATTGGGGAAAATATGTATTATTATCTGCAGGAGCATTAACAGCATTAATGTTAATTATTTTTTTAATGACATGTTGGCGTCGTGTAAATCGTTCTGAACCAACACAACATAATTTACGTGGAACAGGACGTGAAGTATCTGTAACACCACAATCTGGAAAAATTATTTCTTCTTGGGAATCTTATAAATCTGGAGGAGAAACAGGATTA"
replacements = {'S': 'TCC', 'C': 'TGC', 'H': 'CAC'}

optimized_gene_seq = replace_codons(original_gene_seq, replacements)

highlighted_changes = ""
num_changed_codons = 0
num_same_codons = 0
changed_codon_counts = defaultdict(int)

for i in range(0, len(original_gene_seq), 3):
    original_codon = original_gene_seq[i:i+3]
    optimized_codon = optimized_gene_seq[i:i+3]
    if original_codon != optimized_codon:
        highlighted_changes += f"[{optimized_codon}]"
        num_changed_codons += 1
        changed_codon_counts[optimized_codon] += 1
    else:
        highlighted_changes += optimized_codon
        num_same_codons += 1

print("Original Gene Sequence:  ", original_gene_seq)
print("Optimized Gene Sequence:  ", optimized_gene_seq)
print("Highlighted Changes:     ", highlighted_changes)
print(f"Number of changed codons: {num_changed_codons}")
print(f"Number of same codons: {num_same_codons}")
print("Changed codon counts: ", dict(changed_codon_counts))

Original Gene Sequence:   ATGGTACCACAAGCATTATTATTTGTACCATTATTAGTATTTCCATTATGTTTTGGAAAATTTCCAATTTATACAATTCCAGATAAATTAGGACCATGGTCTCCAATTGATATTCATCATTTATCTTGTCCAAATAATTTAGTAGTAGAAGATGAAGGATGTACAAATTTATCTGGATTTTCTTATATGGAATTAAAAGTAGGATATATTTCTGCAATTAAAATGAATGGATTTACATGTACAGGAGTAGTAACAGAAGCAGAAACATATACAAATTTTGTAGGATATGTAACAACAACATTTAAACGTAAACATTTTCGTCCAACACCAGATGCATGTCGTGCAGCATATAATTGGAAAATGGCAGGAGATCCACGTTATGAAGAATCTTTACATAATCCATATCCAGATTATCATTGGTTACGTACAGTAAAAACAACAAAAGAATCTTTAGTAATTATTTCTCCATCTGTAGCAGATTTAGATCCATATGATCGTTCTTTACATTCTCGTGTATTTCCAGGAGGAAATTGTTCTGGAGTAGCAGTATCTTCTACATATTGTTCTACAAATCATGATTATACAATTTGGATGCCAGAAAATCCACGTTTAGGAATGTCTTGTGATATTTTTACAAATTCTCGTGGAAAACGTGCATCTAAAGGATCTGAAACATGTGGATTTGTAGATGAACGTGGATTATATAAATCTTTAAAAGGAGCATGTAAATTAAAATTATGTGGAGTATTAGGATTACGTTTAATGGATGGAACATGGGTAGCAATGCAAACATCTAATGAAACAAAATGGTGTCCACCAGGACAATTAGTAAATTTACATGATTTTCGTTCTGATGAAATTGAACATTTAGTAGTAGAAGAATTAGTAAAAAAACGTGAAGAATGTTTAGATGCATTAGAATCTATTATGACAACAAAATCTGTATCTTTTCGTCGTTTATCTCATTTACGTAA

In [6]:
def cg_ratio(sequence):
    return (sequence.count('C') + sequence.count('G')) / len(sequence)

original_cg_ratio = cg_ratio(original_gene_seq)
optimized_cg_ratio = cg_ratio(optimized_gene_seq)

print(f"Oryginalny stosunek par CG: {original_cg_ratio:.2f}")
print(f"Zoptymalizowany stosunek par CG: {optimized_cg_ratio:.2f}")


Oryginalny stosunek par CG: 0.31
Zoptymalizowany stosunek par CG: 0.36
