In [None]:
import pandas as pd
import numpy as np
import random

from typing import List, Tuple, Dict

filename = "data/adjusted.csv"

In [None]:

def prepare_candidates_map() -> Dict[str, List[str]]:
    """
    Prepare a map of candidates for different categories.

    Returns:
        candidates_map (Dict[str, List[str]]): A dictionary containing lists of candidates for different categories.
            The categories include 'role', 'task', 'general_instructions', and 'user_question'.
    """

    roles_df = pd.read_excel('roles aims/roles.xlsx')
    general_instructions_df = pd.read_excel('roles aims/descriptions.xlsx')
    user_questions_df = pd.read_excel('roles aims/question.xlsx')
    task_df = pd.read_excel('roles aims/aims.xlsx')

    roles_candidates = roles_df["roles"].tolist()
    general_instructions_candidates = general_instructions_df["general_instructions"].tolist()
    user_questions_candidates = user_questions_df["user_questions"].tolist()
    task_candidates = task_df["aims"].tolist()

    candidates_map = {
        "role": roles_candidates,
        "task": task_candidates,
        "general_instructions": general_instructions_candidates,
        "user_question": user_questions_candidates
    }
    return candidates_map


def mutate_population(candidates_map: Dict[str, List[str]],
                      population_df: pd.DataFrame,
                      mutation_cols: pd.DataFrame):
    """
    Mutates the population dataframe by randomly selecting a mutation column and assigning a random value from the
    corresponding candidate values to each row in that column.

    Args:
        candidates_map (Dict[List[str]]): A dictionary mapping mutation columns to their corresponding candidate values.
        population_df (pd.DataFrame): The population dataframe to be mutated.
        mutation_cols (pd.DataFrame): The columns in the population dataframe that can be mutated.

    Returns:
        pd.DataFrame: The mutated population dataframe.
    """
    picked_mutation_col = random.choice(mutation_cols)
    candidate_values = candidates_map[picked_mutation_col]
    population_df[picked_mutation_col] = [random.choice(candidate_values) for i in range(len(population_df))]
    return population_df



population_df = pd.read_csv(filename, encoding='latin1')
candidates_map = prepare_candidates_map()

mutation_cols = ["role", "task", "general_instructions", "user_question"]
new_population_df = mutate_population(candidates_map, population_df, mutation_cols=mutation_cols)

new_population_df

Unnamed: 0,role,task,general_instructions,user_question,normalized_fitness
0,Telomere Biology Specialist,Verify the change in {gene2} when {gene1} is m...,Reveal the molecular link between {gene1} and ...,"""Indicate the causal relationship or function ...",0.214286
1,Biomedical Analytics Expert,Probe into the depth of the KEGG Pathway Datab...,Employ the following denointments when express...,"Using 'activation', 'inhibition', and 'phospho...",0.357143
2,Gene Ontology Specialist,Identify the nature of interaction between {ge...,Capture the connection between {gene1} and {ge...,"""Indicate the causal relationship or function ...",0.214286
3,Virologist Specializing in RNA Viruses,Unravel the interplay between {gene1} and {gen...,"Use a explicit phrase such as ""activation"", ""i...",Clearly identify the interrelation between {ge...,0.571429
4,Nutrigenomics Specialist,Your duty is to find how {gene1} might affect ...,Provide an indisputable resolution using the t...,"Based on the KEGG Pathway Database, what is th...",1.000000
...,...,...,...,...,...
95,Immunogeneticist involved in ABO genetics,"Through the KEGG Pathway Database, determine h...","Monitor, and release data, relating {gene1} an...","Referring to the KEGG Pathway Database, what i...",0.571429
96,Genetic Immunologist,Unravel the interplay between {gene1} and {gen...,"Use a explicit phrase such as ""activation"", ""i...","""Please report the interaction between {gene1}...",0.500000
97,Virologist Specializing in RNA Viruses,Illustrate the mechanism of interaction betwee...,Specify the peculiar bio chemical interaction ...,According to the KEGG Pathway Database analysi...,0.500000
98,Biomedical Analytics Expert,Develop a comprehensive understanding of how {...,VO favorites enclosed state citations often br...,Clearly identify the interrelation between {ge...,0.285714
