In [10]:
from collections import Counter
from math import log
import random

def calculate_cai(sequence, codon_usage):
    codon_counts = {} #
    total_codons = 0
    #"CGTAGTTACCATGGAGCAGT"
    for i in range(0, len(sequence), 3):
        codon = sequence[i:i + 3] # CGT - 
        codon_counts[codon] = codon_counts.get(codon, 0) + 1
        
        total_codons += 1

    #print (codon_counts)
    cai = 1.0

    for codon, count in codon_counts.items():
        if codon in codon_usage:
            cai *= (codon_usage[codon] ** count)

    cai = cai ** (1 / total_codons)
    return cai
    

def optimize_codon_sequence(target_cai, current_sequence, codon_usage, max_iterations=1000, mutation_rate=0.1):
    current_cai = calculate_cai(current_sequence, codon_usage)
    best_sequence = current_sequence
    best_cai = current_cai

    for _ in range(max_iterations):
        position_to_mutate = random.randint(0, len(current_sequence) - 3)
        new_sequence = list(current_sequence)
        new_codon = random.choice(list(codon_usage.keys()))
        new_sequence[position_to_mutate:position_to_mutate+3] = list(new_codon)
        new_sequence = ''.join(new_sequence)
        
        new_cai = calculate_cai(new_sequence, codon_usage)

        if abs(new_cai - target_cai) < abs(best_cai - target_cai):
            best_sequence = new_sequence
            best_cai = new_cai

        current_sequence = best_sequence
        current_cai = best_cai

    return best_sequence

if __name__ == "__main__":
    # Replace with your actual target CAI value and codon usage data
    target_cai = 0.9
    codon_usage = {
        'UUU':17.5, 'UCU':16.2, 'UAU':14.0, 'UGU':11.2,
        'UUC':23.9, 'UCC':17.0,  'UAC':17.0, 'UGC':13.5,
        'UUA':7.6, 'UCA':11.2,  'UAA':0.9,  'UGA':1.8,
        'UUG':13.2, 'UCG':3.4,  'UAG':0.7,  'UGG':15.6,




        'CUU': 13.2, 'CCU':15.8, 'CAU':11.9, 'CGU':4.8,
        'CUC':22.3,  'CCC':18.8, 'CAC':16.2, 'CGC':10.9,
        'CUA': 7.8,  'CCA':14.4, 'CAA':14.5, 'CGA': 5.7,
        'CUG':43.2,  'CCG': 6.5, 'CAG':37.5, 'CGG': 9.7,




        'AUU':16.9,  'ACU':13.5, 'AAU':16.1, 'AGU':10.6,
        'AUC':22.9,  'ACC':20.8, 'AAC':18.7, 'AGC':17.5,
        'AUA': 8.9,  'ACA':15.3, 'AAA':23.6, 'AGA':14.9,
        'AUG':22.9,  'ACG': 5.8, 'AAG':29.3, 'AGG':12.9,




        'GUU':12.1,  'GCU':18.9, 'GAU':16.6, 'GGU': 9.4,
        'GUC':15.2,  'GCC':26.8, 'GAC':22.2, 'GGC':19.4,
        'GUA': 6.9,  'GCA':14.7, 'GAA':22.9, 'GGA':15.2,
        'GUG':29.1,  'GCG': 7.6, 'GAG':36.9, 'GGG':17.4



    }

    # Replace this with your initial DNA sequence
    #Input sequence is Val-Ala-His-Val-Gly-Ala-Leu-Gln
    current_sequence = "ATGCAACGTTACTGCAGT"

    optimized_sequence = optimize_codon_sequence(target_cai, current_sequence, codon_usage)
    print(f"Current Sequence: {current_sequence}")
    print(f"Optimized Sequence: {optimized_sequence}")
    print(f"Optimized CAI: {calculate_cai(optimized_sequence, codon_usage):.3f}")


Current Sequence: ATGCAACGTTACTGCAGT
Optimized Sequence: ATUUGAUAGUAAUAGUAG
Optimized CAI: 0.907
