In [1]:
from collections import Counter
from math import log
import random

def calculate_cai(sequence, codon_usage):
    codon_counts = {} #
    total_codons = 0
    #"CGTAGTTACCATGGAGCAGT"
    for i in range(0, len(sequence), 3):
        codon = sequence[i:i + 3] # CGT - 
        codon_counts[codon] = codon_counts.get(codon, 0) + 1
        
        total_codons += 1

    #print (codon_counts)
    cai = 1.0

    for codon, count in codon_counts.items():
        if codon in codon_usage:
            cai *= (codon_usage[codon] ** count)

    cai = cai ** (1 / total_codons)
    return cai
    

def optimize_codon_sequence(target_cai, current_sequence, codon_usage, max_iterations=1000, mutation_rate=0.1):
    current_cai = calculate_cai(current_sequence, codon_usage)
    best_sequence = current_sequence
    best_cai = current_cai

    for _ in range(max_iterations):
        position_to_mutate = random.randint(0, len(current_sequence) - 3)
        new_sequence = list(current_sequence)
        new_codon = random.choice(list(codon_usage.keys()))
        new_sequence[position_to_mutate:position_to_mutate+3] = list(new_codon)
        new_sequence = ''.join(new_sequence)
        
        new_cai = calculate_cai(new_sequence, codon_usage)

        if abs(new_cai - target_cai) < abs(best_cai - target_cai):
            best_sequence = new_sequence
            best_cai = new_cai

        current_sequence = best_sequence
        current_cai = best_cai

    return best_sequence

if __name__ == "__main__":
    # Replace with your actual target CAI value and codon usage data
    target_cai = 0.9
    codon_usage = {
         #Homo Sapiens
        'UUU':17.6, 'UCU':15.2,'UAU':12.2, 'UGU':10.6,
        'UUC':20.3, 'UCC':17.7, 'UAC':15.3, 'UGC':12.6,
        'UUA':7.7, 'UCA':12.2, 'UAA':1.0, 'UGA':1.6,
        'UUG':12.9, 'UCG':4.4, 'UAG':0.8, 'UGG':13.2,
        
        'CUU':13.2, 'CCU':17.5, 'CAU':10.9, 'CGU':4.5,
        'CUC':19.6, 'CCC':19.8, 'CAC':15.1, 'CGC':10.4,
        'CUA':7.2, 'CCA':16.9, 'CAA':12.3, 'CGA':6.2,
        'CUG':39.6, 'CCG':6.9, 'CAG':34.2, 'CGG':11.4,
        
        'AUU':16.0, 'ACU':13.1, 'AAU':17.0, 'AGU':12.1,
        'AUC':20.8, 'ACC':18.9, 'AAC':19.1, 'AGC':19.5,
        'AUA':7.5, 'ACA':15.1, 'AAA':24.4, 'AGA':12.2,
        'AUG':22.0, 'ACG':6.1, 'AAG':31.9, 'AGG':12.0,
        
        'GUU':11.0, 'GCU':18.4, 'GAU':21.8, 'GGU':10.8,
        'GUC':14.5, 'GCC':27.7, 'GAC':25.1, 'GGC':22.2,
        'GUA':7.1, 'GCA':15.8, 'GAA':29.0, 'GGA':16.5,
        'GUG':28.1, 'GCG':7.4, 'GAG':39.6, 'GGG':16.5,
    }

    # Replace this with your initial DNA sequence
    #Input sequence is Val-Ala-His-Val-Gly-Ala-Leu-Gln
    current_sequence = "GTTCGCATGTTCGGAGCGCTACAG"

    optimized_sequence = optimize_codon_sequence(target_cai, current_sequence, codon_usage)
    print(f"Current Sequence: {current_sequence}")
    print(f"Optimized Sequence: {optimized_sequence}")
    print(f"Optimized CAI: {calculate_cai(optimized_sequence, codon_usage):.3f}")


Current Sequence: GTTCGCATGTTCGGAGCGCTACAG
Optimized Sequence: GTTUAGUAGUAGUAAUAAUTUUAG
Optimized CAI: 0.894
