In [2]:
import random
import numpy as np
import textwrap
from google.colab import drive
drive.mount('/content/drive')

def generate_random_sequences(num_sequences, sequence_length):
    return [''.join(random.choice('ACGT') for _ in range(sequence_length)) for _ in range(num_sequences)]

def incorporate_motif(sequence, motif):
    v = random.randint(1, len(sequence) - len(motif))
    return sequence[:v] + motif + sequence[v:]

def incorporate_for_all(sequences, motif):
    return [incorporate_motif(s, motif) for s in sequences]

random.seed(202501181854)
sekos = generate_random_sequences(100, 100)

motif = 'AAAAAAAAAA'
sekos2 = incorporate_for_all(sekos, motif)
for s in sekos2:
    print(s)

def fitness(motif, sequences):
    scores = []
    motif_length = len(motif)
    for seq in sequences:
        score = []
        for i in range(len(seq) - motif_length + 1):
            match_score = sum(1 for a, b in zip(motif, seq[i:i + motif_length]) if a == b)
            score.append(match_score)
        scores.append(max(score))
    return sum(scores)

def generate_initial_population(pop_size, motif_length):
    return [''.join(random.choice('ACGT') for _ in range(motif_length)) for _ in range(pop_size)]

def crossover(parent1, parent2):
    cut = random.randint(1, len(parent1) - 1)
    return parent1[:cut] + parent2[cut:]

def mutate(motif, mutation_rate):
    return ''.join(random.choice('ACGT') if random.random() < mutation_rate else c for c in motif)

def local_search(motif, sequences):
    best_motif = motif
    best_score = fitness(motif, sequences)
    for i in range(len(motif)):
        for base in 'ACGT':
            new_motif = motif[:i] + base + motif[i+1:]
            new_score = fitness(new_motif, sequences)
            if new_score > best_score:
                best_motif, best_score = new_motif, new_score
    return best_motif

def memetic_algorithm_modified(sequences, motif_length, pop_size, generations, mutation_rate):
    population = generate_initial_population(pop_size, motif_length)

    output_file_path = '/content/drive/My Drive/output.txt'

    try:
        with open(output_file_path, 'w') as f:
            for generation in range(generations):
                population = [local_search(motif, sequences) for motif in population]
                best_motif = max(population, key=lambda m: fitness(m, sequences))

                output_line = f"Generation {generation + 1}: Best motif {best_motif} with score {fitness(best_motif, sequences)}\n"
                f.write(output_line)

                print(output_line, end='')

                if fitness(best_motif, sequences) == len(sequences) * motif_length:
                    return generation + 1
    except Exception as e:
        print(f"Error writing to output1.txt: {e}")

    return generations

def update_progress(mutation_rate, current_run):
    with open(overall_results_file_path, 'a') as results_file:
        results_file.write(f"PROGRESS,{mutation_rate},{current_run}\n")

def get_current_progress(mutation_rate):
    current_run = 0
    try:
        with open(overall_results_file_path, 'r') as results_file:
            for line in results_file:
                if line.startswith("PROGRESS"):
                    parts = line.strip().split(',')
                    if parts[1] == str(mutation_rate):
                        current_run = int(parts[2])
    except FileNotFoundError:
        pass
    return current_run





Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
GAACAAGTTCATCATGAAAAAAAAAAAAGAGCAATAAGTTGTCCCATCAACATTATCTCAGCCGGAATGAAGTCGACCGACGTTGCAAATGTCCCTCTAACTTTATGAAG
CCCGCTTTTCCGGGCCTATATATTGAGCGCATCCAACTCATAGCGTCGTAGAGCCACACGACCCAAAAAAAAAATGAACGTCGTGCTTGTCAGCTGGTGGGCTACTAATC
GGACTATAAGGGAAGTCGGGGACGGGTCTGCTACCGAAAACTCACTAGATTTTCGCTCTTCTGCGAGTCTAAAAAAAAAATCGGAGCTACCGCGCCTGTATCTGAAAGTA
GCTTAGTATGAGCAGAATCTTTCATTGACCGCAATTGGACCAAGAAGCAGGGGATAAAGCATACCGGAAAAAAAAAACACGTATCGACGGCGAAGCTTTGCGCTGAGTAC
ACTGAGGATTCTCGACCGGCAACATGACCCGGGTGTGATCTCGCTGCGACAGTTAACAAAAAAAAAAACGCTTAAGAAACTTGAAGACGCAAGTTCCAACCCATTATTAG
ACTCCAAAACACAGTACCATAAAAAAAAAAAGTCAAGGGTTACGCGCCCCGCAACATGCGTAAACCTAACCTCGCATGTTATCATCTATAGTGACGTAATTAATCCACCT
ATAACGCCGCTAAGGGGCACGCGTGCGAAAAAAAAAAAGTACTACACAGGAGGAGCACCCGGTTGTGAACAACGTGCGCATAGGTTTCGCTCATAGCATGTTGAGGCTTC
TCTCTCCCTCAATAGAGTTATTGTACCCTCAAGTATTTGTGCATACGTGATCCTCCATGCCATCGAGGTCCAACGCTGAAAAAAAAAATCAACG

In [3]:
if __name__ == "__main__":
    random.seed(202501181854)
    mutation_rates = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
    num_sequences = 100
    num_runs = 100


    overall_results_file_path = '/content/drive/My Drive/overall_results.txt'

    with open(overall_results_file_path, 'a') as results_file:
        for mutation_rate in mutation_rates:
            total_generations = 0
            start_run = get_current_progress(mutation_rate)

            for run_num in range(start_run, num_runs):
                generations_taken = memetic_algorithm_modified(sekos2, motif_length=10, pop_size=100, generations=100, mutation_rate=mutation_rate)
                total_generations += generations_taken
                update_progress(mutation_rate, run_num + 1)

            avg_generations = total_generations / num_runs
            result_line = f"Mutation rate: {mutation_rate}, Average generations: {avg_generations}\n"
            results_file.write(result_line)
            print(result_line, end='')

Generation 1: Best motif CTTAAGAAAA with score 737
Generation 2: Best motif CTTAAAAAAA with score 826
Generation 3: Best motif CTAAAAAAAA with score 886
Generation 4: Best motif AAAAAAAAAC with score 938
Generation 5: Best motif AAAAAAAAAA with score 1000
Generation 1: Best motif AAAACAATAT with score 744
Generation 2: Best motif AAAAAAATAT with score 840
Generation 3: Best motif AAAAAAAAAT with score 932
Generation 4: Best motif AAAAAAAAAA with score 1000
Generation 1: Best motif GCAAAAAAAT with score 788
Generation 2: Best motif GCAAAAAAAA with score 874
Generation 3: Best motif GAAAAAAAAA with score 929
Generation 4: Best motif AAAAAAAAAA with score 1000
Generation 1: Best motif AAACAAAAAC with score 839
Generation 2: Best motif AAAAAAAAAC with score 938
Generation 3: Best motif AAAAAAAAAA with score 1000
Generation 1: Best motif AAAAAATGTC with score 773
Generation 2: Best motif AAAAAAACCA with score 839
Generation 3: Best motif AAAAAAAACA with score 908
Generation 4: Best motif AA

# New Section