In [None]:
import pandas as pd
import csv
import os
import time
import random
import numpy as np
from sklearn.metrics import f1_score
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# Hugging Face access token and model ID
access_token = ""
model_id = "meta-llama/Meta-Llama-3.2-11B-Instruct"

# Load the LLaMA 3.2 11B model and tokenizer
print("Loading LLaMA 3.2 11B model...")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=access_token)
model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=access_token, device_map="auto")
llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=800, device=0)
print("Model loaded successfully.")

# Genetic algorithm parameters
population_size = 9
num_generations = 30
mutation_rate = 0.3
selection_proportion = 0.5


In [None]:
roles = [
    "Gene interaction analyst",
    "Pathway relationship analyst",
    "Molecular biologist specializing in gene interactions",
    "Gene interaction specialist",
    "Molecular genetics expert",
    "Computational biologist",
    "Gene interaction analyst",
    "Bioinformatics researcher",
    "Molecular genetics expert",
    "Gene interaction analyst",
    "Molecular biologist specializing in gene interactions",
    "Gene interaction specialist",
    "Gene interaction analyst",
    "Molecular biologist specializing in gene interactions",
    "Bioinformatics researcher",
    "Gene interaction specialist",
    "Molecular genetics expert",
    "Computational biologist",
    "Molecular biologist specializing in gene interactions",
    "Gene interaction analyst",
    "Bioinformatics researcher",
    "Gene interaction specialist",
    "Gene interaction analyst",
    "Molecular genetics expert",
    "Gene interaction specialist",
    "Molecular biologist specializing in gene interactions",
    "Molecular genetics expert",
    "Gene interaction analyst",
    "Bioinformatics researcher",
    "Molecular biologist specializing in gene interactions",
    "Gene interaction specialist",
    "Gene interaction analyst",
    "Molecular biologist specializing in gene interactions",
    "Molecular genetics expert",
    "Bioinformatics researcher",
    "Gene interaction specialist",
    "Molecular genetics expert",
    "Computational biologist",
    "Bioinformatics researcher",
    "Gene interaction specialist",
    "Molecular genetics expert",
    "Molecular biologist specializing in gene interactions",
    "Gene interaction analyst",
    "Molecular biologist specializing in gene interactions",
    "Molecular genetics expert",
    "Bioinformatics researcher"
]
aims = [
    "Extract gene relationships from the provided pathway diagram, ensuring accuracy in categorizing interaction types.",
    "Extract and classify gene interactions from the provided pathway diagram.",
    "Distinguish and document gene interactions from the provided pathway diagram.",
    "Analyze gene interactions within the provided diagram, ensuring accurate extraction and categorization.",
    "Extract gene interactions from the pathway diagram, categorizing each interaction accurately.",
    "Extract and document gene interactions from the provided pathway diagram.",
    "Extract and categorize gene interactions from the provided pathway diagram.",
    "Extract gene interactions from the pathway diagram with accuracy and precision.",
    "Extract and classify gene interactions from the provided pathway diagram.",
    "Extract and label gene interactions from the pathway diagram with precision.",
    "Accurately extract and classify gene relationships from the provided pathway diagram.",
    "Extract and categorize gene interactions from the provided diagram with accuracy.",
    "Accurately identify and document gene interactions within the pathway diagram.",
    "Extract gene relationships from the pathway diagram with precise classification.",
    "Analyze and extract gene interactions from the pathway diagram with accuracy.",
    "Extract and classify gene interactions from the provided diagram, ensuring precision.",
    "Identify and extract gene relationships from the pathway diagram with clarity.",
    "Extract and classify gene interactions from the pathway diagram, ensuring clarity.",
    "Identify and document gene relationships from the pathway diagram with accuracy.",
    "Extract and categorize gene interactions from the pathway diagram with precision.",
    "Accurately extract and classify gene relationships from the provided pathway diagram.",
    "Extract and accurately document gene interactions from the pathway diagram.",
    "Accurately extract and classify gene interactions from the pathway diagram.",
    "Extract and classify gene interactions from the pathway diagram, ensuring accuracy.",
    "Accurately extract gene interactions from the provided pathway diagram.",
    "Extract and accurately document gene relationships from the pathway diagram.",
    "Extract and classify gene interactions from the pathway diagram with precision.",
    "Accurately extract and classify gene relationships from the provided pathway diagram.",
    "Extract and categorize gene interactions from the pathway diagram with precision.",
    "Accurately extract gene interactions from the pathway diagram.",
    "Extract and categorize gene interactions from the pathway diagram with precision.",
    "Accurately identify and document gene interactions within the pathway diagram.",
    "Extract and classify gene relationships from the provided pathway diagram.",
    "Identify and categorize gene relationships from the pathway diagram with precision.",
    "Accurately extract gene interactions from the provided pathway diagram.",
    "Accurately extract and categorize gene interactions from the pathway diagram.",
    "Extract and document gene relationships from the pathway diagram with accuracy.",
    "Extract and classify gene interactions from the pathway diagram with precision.",
    "Accurately extract and document gene interactions from the provided pathway diagram.",
    "Extract and classify gene relationships from the pathway diagram with precision.",
    "Accurately document and classify gene relationships from the pathway diagram.",
    "Extract and categorize gene interactions from the pathway diagram with precision.",
    "Accurately extract and document gene interactions within the pathway diagram.",
    "Extract and classify gene relationships from the provided pathway diagram.",
    "Identify and categorize gene relationships from the pathway diagram with precision.",
    "Accurately extract gene interactions from the provided pathway diagram.",
    "Extract and classify gene interactions from the pathway diagram with precision.",
    "Accurately extract and document gene interactions from the provided pathway diagram.",
    "Extract and classify gene relationships from the pathway diagram with precision."
]

instruction = [
    "Symbols in the diagram represent specific interactions: 'Inhibition' is shown by T-bar symbols (----|), with dashed T-bars indicating 'Indirect Inhibition.' 'Activation' is represented by arrow symbols (?), with dashed arrows for 'Indirect Activation.' Arrows point from gene1 (starter) to gene2 (receptor).",
    "Use the diagram's symbols: 'Inhibition' is marked by T-bars (----|), with dashed T-bars for 'Indirect Inhibition.' 'Activation' is denoted by arrows (?), with dashed arrows for 'Indirect Activation.' The arrow direction indicates the flow between gene1 and gene2.",
    "Refer to the symbols in the diagram: T-bars (----|) indicate 'Inhibition,' with dashed T-bars for 'Indirect Inhibition.' Arrows (?) show 'Activation,' with dashed arrows representing 'Indirect Activation.' Arrows indicate the direction of interaction from gene1 to gene2.",
    "Use the provided symbols in the diagram: T-bars (----|) denote 'Inhibition,' with dashed T-bars for 'Indirect Inhibition.' Arrows (?) represent 'Activation,' with dashed arrows for 'Indirect Activation.' The direction of arrows shows the relationship flow from gene1 to gene2.",
    "Identify relationships using the diagram's symbols: T-bars (----|) represent 'Inhibition,' with dashed T-bars indicating 'Indirect Inhibition.' Arrows (?) show 'Activation,' with dashed arrows signifying 'Indirect Activation.' Arrows indicate the flow from gene1 to gene2.",
    "The diagram uses specific symbols: T-bars (----|) for 'Inhibition,' with dashed T-bars for 'Indirect Inhibition.' Arrows (?) for 'Activation,' with dashed arrows for 'Indirect Activation.' Arrows indicate the direction of the interaction from gene1 to gene2.",
    "Symbols in the diagram represent interactions: T-bars (----|) for 'Inhibition,' with dashed T-bars for 'Indirect Inhibition.' Arrows (?) for 'Activation,' with dashed arrows for 'Indirect Activation.' The direction of the arrows shows the flow between gene1 and gene2.",
    "Use the symbols provided in the diagram: T-bars (----|) represent 'Inhibition,' with dashed T-bars indicating 'Indirect Inhibition.' Arrows (?) denote 'Activation,' with dashed arrows representing 'Indirect Activation.' Arrows point from gene1 (starter) to gene2 (receptor).",
    "Refer to the diagram's symbols: T-bars (----|) indicate 'Inhibition,' with dashed T-bars representing 'Indirect Inhibition.' Arrows (?) signify 'Activation,' with dashed arrows for 'Indirect Activation.' Arrows indicate the direction from gene1 to gene2.",
    "Use the symbols in the diagram: T-bars (----|) for 'Inhibition,' with dashed T-bars indicating 'Indirect Inhibition.' Arrows (?) represent 'Activation,' with dashed arrows for 'Indirect Activation.' Arrows show the relationship flow from gene1 to gene2.",
    "The pathway diagram uses symbols to show interactions: 'Inhibition' is represented by T-bar (----|), with dashed T-bars indicating 'Indirect Inhibition.' 'Activation' is shown with arrows (?), with dashed arrows indicating 'Indirect Activation.' Arrows illustrate the direction from gene1 to gene2.",
    "Symbols used in the diagram include T-bars (----|) for 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) for 'Activation,' with dashed arrows for 'Indirect Activation.' Arrows point from gene1 to gene2 to indicate direction.",
    "Refer to the pathway diagram symbols: 'Inhibition' is shown by T-bars (----|), with dashed T-bars for 'Indirect Inhibition.' 'Activation' is indicated by arrows (?), with dashed arrows for 'Indirect Activation.' The direction of arrows shows the flow from gene1 to gene2.",
    "The pathway diagram uses T-bars (----|) for 'Inhibition,' with dashed T-bars indicating 'Indirect Inhibition,' and arrows (?) for 'Activation,' with dashed arrows representing 'Indirect Activation.' The arrow's direction indicates the flow from gene1 to gene2.",
    "Symbols in the pathway diagram include T-bars (----|) for 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) for 'Activation,' with dashed arrows representing 'Indirect Activation.' Arrows point from gene1 to gene2.",
    "The diagram uses T-bars (----|) to depict 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) to show 'Activation,' with dashed arrows representing 'Indirect Activation.' The direction of arrows shows interaction flow from gene1 to gene2.",
    "In the pathway diagram, T-bars (----|) indicate 'Inhibition,' with dashed T-bars representing 'Indirect Inhibition,' and arrows (?) signify 'Activation,' with dashed arrows for 'Indirect Activation.' Arrows point from gene1 to gene2.",
    "The diagram uses T-bars (----|) for 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) for 'Activation,' with dashed arrows for 'Indirect Activation.' Arrows indicate the direction from gene1 to gene2.",
    "Symbols in the diagram include T-bars (----|) for 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) for 'Activation,' with dashed arrows representing 'Indirect Activation.' Arrows show the direction from gene1 to gene2.",
    "The diagram depicts interactions with T-bars (----|) for 'Inhibition,' with dashed T-bars indicating 'Indirect Inhibition,' and arrows (?) for 'Activation,' with dashed arrows for 'Indirect Activation.' Arrows point from gene1 to gene2.",
    "The pathway diagram illustrates interactions using T-bars (----|) for 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) for 'Activation,' with dashed arrows representing 'Indirect Activation.' The direction of arrows shows the flow from gene1 to gene2.",
    "The diagram uses specific symbols to depict interactions: T-bars (----|) indicate 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) represent 'Activation,' with dashed arrows for 'Indirect Activation.' Arrows indicate the direction from gene1 to gene2.",
    "Refer to the symbols in the pathway diagram: 'Inhibition' is represented by T-bars (----|), with dashed T-bars for 'Indirect Inhibition,' and 'Activation' is shown by arrows (?), with dashed arrows for 'Indirect Activation.' The arrows indicate the relationship direction from gene1 to gene2.",
    "In the diagram, 'Inhibition' is shown by T-bars (----|), with dashed T-bars indicating 'Indirect Inhibition,' and 'Activation' is represented by arrows (?), with dashed arrows for 'Indirect Activation.' Arrows indicate the interaction direction from gene1 to gene2.",
    "Symbols in the diagram represent interactions: T-bars (----|) indicate 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) represent 'Activation,' with dashed arrows for 'Indirect Activation.' The arrows point from gene1 to gene2.",
    "In the diagram, 'Inhibition' is indicated by T-bars (----|), with dashed T-bars for 'Indirect Inhibition,' and 'Activation' is shown by arrows (?), with dashed arrows for 'Indirect Activation.' The arrows illustrate the direction from gene1 to gene2.",
    "The diagram uses T-bars (----|) to indicate 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) to show 'Activation,' with dashed arrows representing 'Indirect Activation.' The direction of arrows points from gene1 to gene2.",
    "In the pathway diagram, 'Inhibition' is indicated by T-bars (----|), with dashed T-bars for 'Indirect Inhibition,' and 'Activation' is shown by arrows (?), with dashed arrows for 'Indirect Activation.' The direction of arrows shows the relationship from gene1 to gene2.",
    "The diagram depicts interactions using T-bars (----|) to indicate 'Inhibition,' with dashed T-bars for 'Indirect Inhibition,' and arrows (?) to show 'Activation,' with dashed arrows representing 'Indirect Activation.' The arrows indicate the direction from gene1 to gene2.",
    "Identify relationships using the diagram's symbols: T-bars (----|) represent 'Inhibition,' with dashed T-bars indicating 'Indirect Inhibition.' Arrows (?) show 'Activation,' with dashed arrows signifying 'Indirect Activation.' Arrows indicate the flow from gene1 to gene2.",
    "The diagram uses specific symbols: T-bars (----|) for 'Inhibition,' with dashed T-bars for 'Indirect Inhibition.' Arrows (?) for 'Activation,' with dashed arrows for 'Indirect Activation.' Arrows indicate the direction of the interaction from gene1 to gene2."
]
descriptions = [
    "Identify each gene interaction, differentiate between 'direct' and 'indirect' interactions, and format them as 'gene1 relationship gene2.'",
    "Differentiate between 'inhibition' and 'activation' as well as between 'direct' and 'indirect' interactions, and format the results as 'gene1 relationship gene2.'",
    "Analyze and extract each interaction, labeling them as 'direct' or 'indirect' and formatting them as 'gene1 relationship gene2.'",
    "Carefully extract each interaction, distinguishing between 'direct' and 'indirect' interactions, and format them as 'gene1 relationship gene2.'",
    "Classify each interaction as either 'direct' or 'indirect' and format it as 'gene1 relationship gene2.'",
    "Extract and classify each gene interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Identify and classify each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Accurately extract and label each interaction as 'direct' or 'indirect,' formatting them as 'gene1 relationship gene2.'",
    "Identify, classify, and format each interaction as 'gene1 relationship gene2.'",
    "Extract and categorize each interaction, distinguishing between 'direct' and 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Identify and differentiate each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Extract each interaction, classify it as 'direct' or 'indirect,' and format it as 'gene1 relationship gene2.'",
    "Analyze and classify each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Extract and distinguish each interaction as 'direct' or 'indirect,' formatting them as 'gene1 relationship gene2.'",
    "Identify and distinguish each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Extract, classify, and distinguish each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Identify and classify each interaction, distinguishing between 'direct' and 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Extract and differentiate each interaction, classifying them as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Extract and classify each interaction, determining whether it is 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Analyze and classify each interaction, distinguishing between 'direct' and 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Extract and differentiate each interaction, classifying it as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Extract, classify, and differentiate each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Identify each interaction, distinguish between 'direct' and 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Carefully extract each interaction, categorize it as 'direct' or 'indirect,' and format it as 'gene1 relationship gene2.'",
    "Identify and categorize each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Carefully extract each interaction, classify it as 'direct' or 'indirect,' and format it as 'gene1 relationship gene2.'",
    "Extract each interaction, categorize it as 'direct' or 'indirect,' and format it as 'gene1 relationship gene2.'",
    "Identify, classify, and differentiate each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Extract, classify, and distinguish each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Identify and classify each interaction, distinguishing between 'direct' and 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Extract, classify, and differentiate each interaction as 'direct' or 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Identify each interaction, distinguish between 'direct' and 'indirect,' and format them as 'gene1 relationship gene2.'",
    "Carefully extract each interaction, classify it as 'direct' or 'indirect,' and format it as 'gene1 relationship gene2.'",
    "Carefully extract each interaction, categorize it as 'direct' or 'indirect,' and format it as 'gene1 relationship gene2.'"
]


In [None]:



def ask_question(prompt, question, temperature):
    try:
        response = llama_pipeline(f"{prompt}\n{question}", max_length=800, temperature=temperature)[0]['generated_text']
        return response.strip().lower()
    except Exception as e:
        print(f"Error generating response: {e}")
        return None

def calculate_metrics(df):
    relations = ['activation', 'inhibition', 'phosphorylation', 'no information']
    f1_scores = []

    for relation in relations:
        true_values = (df['relation'] == relation)
        predicted_values = (df['predict_relation'] == relation)
        f1 = f1_score(true_values, predicted_values, zero_division=0)
        f1_scores.append(f1)

    overall_f1 = f1_score(df['relation'], df['predict_relation'], average='micro', zero_division=0)
    return overall_f1, f1_scores

def get_fitness(prompt, question, temperature):
    training_path = ""  # Update with the actual path to your training data
    training_df = pd.read_csv(training_path)
    training_df = training_df.sample(frac=1).reset_index(drop=True)

    answers = []
    for index, row in training_df.iterrows():
        starter = row['starter']
        receiver = row['receiver']
        relation = row['relation_name']

        llama_answer = ask_question(prompt, question.format(gene1=starter.upper(), gene2=receiver.upper()), temperature)
        answers.append({
            'starter': starter,
            'receiver': receiver,
            'relation': relation,
            'LLaMA_3.2_answer': llama_answer,
            'predict_relation': llama_answer,
            'prompt': question
        })

    answer_df = pd.DataFrame(answers)
    overall_f1, f1_scores = calculate_metrics(answer_df)
    return overall_f1

def select_parents(population, fitness_values, population_size, selection_proportion):
    fitness_sum = sum(fitness_values)
    probabilities = [fitness / fitness_sum for fitness in fitness_values]
    num_parents = int(population_size * selection_proportion)
    parents = random.choices(population, weights=probabilities, k=num_parents)
    return parents

def crossover(parent1, parent2):
    crossover_point = random.randint(0, len(parent1) - 1)
    child1 = parent1[:crossover_point] + parent2[crossover_point:]
    child2 = parent2[:crossover_point] + parent1[crossover_point:]
    return child1, child2

def mutate(individual):
    mutated_individual = list(individual)
    gene_index = random.randint(0, len(mutated_individual) - 1)

    if gene_index == 0:
        mutated_individual[gene_index] = random.choice(roles)
    elif gene_index == 1:
        mutated_individual[gene_index] = random.choice(aims)
    elif gene_index == 2:
        mutated_individual[gene_index] = random.choice(descriptions)
    elif gene_index == 3:
        mutated_individual[gene_index] = random.choice(instructions)

    return tuple(mutated_individual)

def generate_prompt_from_individual(individual):
    selected_role, selected_aim, selected_description, selected_instruction = individual
    prompt = f"As a {selected_role}, {selected_aim}. {selected_description} {selected_instruction}."
    return prompt

# Generate initial population
population = []
for _ in range(population_size):
    individual = (
        random.choice(roles),
        random.choice(aims),
        random.choice(descriptions),
        random.choice(instructions)
    )
    population.append(individual)

# Main genetic algorithm loop
population_fitness = []

for generation in range(num_generations):
    new_population_fitness = []

    while len(new_population_fitness) < population_size:
        parent1, parent2 = random.sample(population, 2)
        child1, child2 = crossover(parent1, parent2)

        if random.random() < mutation_rate:
            child1 = mutate(child1)
        if random.random() < mutation_rate:
            child2 = mutate(child2)

        for child in [child1, child2]:
            prompt = generate_prompt_from_individual(child)
            fitness = get_fitness(prompt, "Extract the interaction between {gene1} and {gene2}.", temperature=0.9)
            new_population_fitness.append((child, fitness))

    population_fitness = sorted(new_population_fitness, key=lambda x: x[1], reverse=True)[:population_size]

    if generation % 5 == 0 or generation == num_generations - 1:
        print(f"Generation {generation + 1}: Best Fitness = {population_fitness[0][1]:.7f}")

best_individual, best_fitness = max(population_fitness, key=lambda x: x[1])
best_prompt = generate_prompt_from_individual(best_individual)

print(f"\nBest prompt of the final generation: {best_prompt}")
print(f"Best fitness of the final generation: {best_fitness:.7f}")



SyntaxError: unterminated string literal (detected at line 4) (<ipython-input-1-63fca5fd33f5>, line 4)

In [None]:

# Define a function to print population stats
def print_population_stats(population_fitness, generation, ascii_keys, detail_every_n_gen=5):
    if generation % detail_every_n_gen == 0 or generation == num_generations - 1:
        print(f"Generation {generation + 1}: )
        for i, (individual, fitness) in enumerate(population_fitness):
            print(f"Individual {i + 1}: Fitness = {fitness:.7f}")
    else:
        max_fitness = max(fitness for individual, fitness in population_fitness)
        print(f"Generation {generation + 1}: Max Fitness = {max_fitness:.7f}")

# Initialize population with fitness
population_fitness = []
ascii_keys = []


# Main genetic algorithm loop
for generation in range(num_generations):
    # Generate new population
    new_population_fitness = []
    new_ascii_keys = []
    while len(new_population_fitness) < population_size:
        # Select parents randomly from the entire population
        (parent1, _), (parent2, _) = random.sample(population_fitness, 2)
        child1, child2 = crossover(parent1, parent2)

        # Mutate children with a certain probability
        if random.random() < mutation_rate:
            child1 = mutate(child1)
        if random.random() < mutation_rate:
            child2 = mutate(child2)

        # Calculate fitness for new individuals only
        for child in [child1, child2]:
            prompt = generate_prompt_from_individual(child)
            fitness = get_fitness(prompt, questions[0], temperature=0.9)
            new_population_fitness.append((child, fitness))
            new_ascii_keys.append(ascii_key)
  # Print population stats
    print_population_stats(population_fitness, generation, ascii_keys)

# Find the best result after all generations
best_individual, best_fitness = max(population_fitness, key=lambda x: x[1])
best_prompt = generate_prompt_from_individual(best_individual)
best_ascii_key = ascii_keys[population_fitness.index((best_individual, best_fitness))]

print(f"\nBest prompt of the final generation: {best_prompt}")
print(f"Best fitness of the final generation: {best_fitness:.7f}")


In [None]:
def generate_ascii_key(length):
    return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))

# Define a function to print population stats
def print_population_stats(population_fitness, generation, ascii_keys, detail_every_n_gen=5):
    if generation % detail_every_n_gen == 0 or generation == num_generations - 1:
        print(f"Generation {generation + 1}: )
        for i, (individual, fitness) in enumerate(population_fitness):
            print(f"Individual {i + 1}: Fitness = {fitness:.7f}")
    else:
        max_fitness = max(fitness for individual, fitness in population_fitness)
        print(f"Generation {generation + 1}: Max Fitness = {max_fitness:.7f}")

# Initialize population with fitness
population_fitness = []
ascii_keys = []


# Main genetic algorithm loop
for generation in range(num_generations):
    # Generate new population
    new_population_fitness = []
    new_ascii_keys = []
    while len(new_population_fitness) < population_size:
        # Select parents randomly from the entire population
        (parent1, _), (parent2, _) = random.sample(population_fitness, 2)
        child1, child2 = crossover(parent1, parent2)

        # Mutate children with a certain probability
        if random.random() < mutation_rate:
            child1 = mutate(child1)
        if random.random() < mutation_rate:
            child2 = mutate(child2)

        # Calculate fitness for new individuals only
        for child in [child1, child2]:
            prompt = generate_prompt_from_individual(child)
            fitness = get_fitness(prompt, questions[0], temperature=0.9)
            new_population_fitness.append((child, fitness))
            new_ascii_keys.append(ascii_key)
  # Print population stats
    print_population_stats(population_fitness, generation, ascii_keys)

# Find the best result after all generations
best_individual, best_fitness = max(population_fitness, key=lambda x: x[1])
best_prompt = generate_prompt_from_individual(best_individual)
best_ascii_key = ascii_keys[population_fitness.index((best_individual, best_fitness))]

print(f"\nBest prompt of the final generation: {best_prompt}")
print(f"Best fitness of the final generation: {best_fitness:.7f}")


In [None]:
import matplotlib.pyplot as plt

# Assuming all_generations_fitness and a corresponding all_generations_prompts list are defined
# all_generations_prompts should be a list of lists, where each inner list contains the prompts for one generation

x_data = []  # Generation numbers
y_data = []  # Fitness values
prompts_data = []  # Prompts

for gen_number, (fitness_values, prompts) in enumerate(zip(all_generations_fitness, all_generations_prompts), start=1):
    x_data.extend([gen_number] * len(fitness_values))  # Repeat the generation number for each individual
    y_data.extend(fitness_values)  # Fitness values of individuals
    prompts_data.extend(prompts)  # Prompts of individuals

# Create scatter plot
plt.figure(figsize=(10, 6))
scatter_plot = plt.scatter(x_data, y_data, alpha=0.6, c='blue', edgecolors='w', s=100)

# Adding prompt annotations to the scatter plot
for i, prompt in enumerate(prompts_data):
    plt.annotate(prompt, (x_data[i], y_data[i]), fontsize=8, alpha=0.6)

plt.title('Fitness over Generations with Prompts')
plt.xlabel('Generation')
plt.ylabel('Fitness')
plt.xticks(range(1, len(all_generations_fitness) + 1))
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.show()
