In [None]:
import pandas as pd
import numpy as np
import random

from typing import List, Tuple, Dict

filename = "data/adjusted.csv"

In [None]:

def prepare_candidates_map() -> Dict[str, List[str]]:
    """
    Prepare a map of candidates for different categories.

    Returns:
        candidates_map (Dict[str, List[str]]): A dictionary containing lists of candidates for different categories.
            The categories include 'role', 'task', 'general_instructions', and 'user_question'.
    """

    roles_df = pd.read_excel('roles aims/roles.xlsx')
    general_instructions_df = pd.read_excel('roles aims/descriptions.xlsx')
    user_questions_df = pd.read_excel('roles aims/question.xlsx')
    task_df = pd.read_excel('roles aims/aims.xlsx')

    roles_candidates = roles_df["roles"].tolist()
    general_instructions_candidates = general_instructions_df["general_instructions"].tolist()
    user_questions_candidates = user_questions_df["user_questions"].tolist()
    task_candidates = task_df["aims"].tolist()

    candidates_map = {
        "role": roles_candidates,
        "task": task_candidates,
        "general_instructions": general_instructions_candidates,
        "user_question": user_questions_candidates
    }
    return candidates_map


def mutate_population(candidates_map: Dict[str, List[str]],
                      population_df: pd.DataFrame,
                      mutation_cols: pd.DataFrame):
    """
    Mutates the population dataframe by randomly selecting a mutation column and assigning a random value from the
    corresponding candidate values to each row in that column.

    Args:
        candidates_map (Dict[List[str]]): A dictionary mapping mutation columns to their corresponding candidate values.
        population_df (pd.DataFrame): The population dataframe to be mutated.
        mutation_cols (pd.DataFrame): The columns in the population dataframe that can be mutated.

    Returns:
        pd.DataFrame: The mutated population dataframe.
    """
    picked_mutation_col = random.choice(mutation_cols)
    candidate_values = candidates_map[picked_mutation_col]
    population_df[picked_mutation_col] = [random.choice(candidate_values) for i in range(len(population_df))]
    return population_df



population_df = pd.read_csv(filename, encoding='latin1')
candidates_map = prepare_candidates_map()

mutation_cols = ["role", "task", "general_instructions", "user_question"]
new_population_df = mutate_population(candidates_map, population_df, mutation_cols=mutation_cols)

new_population_df