In [10]:
import os 
import re
import pandas as pd

In [11]:
TASKS_XLSX_PATH = "tasks/tasks_for_llm_gpt4_new_exo.xlsx"
DUCK_DUCK_TXT_FOLDER = "./gpt4_data_new_exo"
OUTPUT_XLSX_PATH = "data/parsed_results_gpt4o_new_exo.xlsx"

In [12]:
# -------------------------------------------------
# 1) READ THE TASKS.XLSX AND BUILD A LOOKUP
# -------------------------------------------------
tasks_df = pd.read_excel(TASKS_XLSX_PATH)

prompt_lookup = {}
info_lookup = {}

for idx, row in tasks_df.iterrows():
    code = row['CODE']
    consigne = str(row['Consigne']).strip()
    reponses_acceptees = str(row['Réponses acceptées']).strip()
    info_lookup[code] = (consigne, reponses_acceptees)
    
    correct_prompt = str(row['Prompt correct']).strip()
    incorrect_prompt = str(row['Prompt incorrect']).strip()
    prompt_lookup[correct_prompt] = (code, "correct")
    prompt_lookup[incorrect_prompt] = (code, "incorrect")

# -------------------------------------------------
# 2) PREPARE A RESULT DATAFRAME STRUCTURE
# -------------------------------------------------
results = []
iteration_counter = {}

# -------------------------------------------------
# 3) PARSE EACH TXT FILE IN THE OUTPUT FOLDER
# -------------------------------------------------
output_folder = DUCK_DUCK_TXT_FOLDER 
txt_files = [f for f in os.listdir(output_folder) if f.endswith(".txt")]

for txt_filename in txt_files:
    file_path = os.path.join(output_folder, txt_filename)
    
    with open(file_path, "r", encoding="utf-8") as f:
        content = f.read()
    
    user_prompt_pattern = (
        r"Message\s*\d+\s*sur\s*\d+\s*de\s*l'utilisateur\s*-\s*\d{2}/\d{2}/\d{4}\s*\d{2}:\d{2}:\d{2}:\n"
        r"(.*?)"
        r"(?=\n(?:GPT-4o mini|Llama 3\.3 70B|Mistral Small 3):)"
    )
    match = re.search(user_prompt_pattern, content, flags=re.DOTALL)
    
    if not match:
        print(f"Warning: Could not extract user prompt from {txt_filename}. Skipping.")
        continue
    
    user_prompt = match.group(1).strip()
    
    if user_prompt not in prompt_lookup:
        print(f"Warning: Prompt not found in tasks lookup for file {txt_filename}. Skipping.")
        continue
    
    exercise_code, prompt_type = prompt_lookup[user_prompt]
    consigne, reponse_attendue = info_lookup[exercise_code]

    known_models_pattern = r"(GPT-4o mini|Llama 3\.3 70B|Mistral Small 3):([\s\S]*)"
    model_match = re.search(known_models_pattern, content)
    
    if not model_match:
        print(f"Warning: Could not find known model name in {txt_filename}. Skipping.")
        continue
    
    model_name = model_match.group(1).strip()
    model_answer = model_match.group(2).strip()
    
    combo_key = (exercise_code, model_name, prompt_type)
    iteration_counter[combo_key] = iteration_counter.get(combo_key, 0) + 1
    iteration_number = iteration_counter[combo_key]
    
    results.append({
        "Code": exercise_code,
        "model": model_name,
        "prompt": prompt_type,
        "but": consigne,
        "réponse_attendue": reponse_attendue,
        "réponse_llm": model_answer,
        "iteration": iteration_number
    })

# -------------------------------------------------
# 4) BUILD A DATAFRAME AND SAVE TO EXCEL
# -------------------------------------------------
os.makedirs(os.path.dirname(OUTPUT_XLSX_PATH), exist_ok=True)
results_df = pd.DataFrame(
    results, 
    columns=["Code", "model", "prompt", "but", "réponse_attendue", "réponse_llm", "iteration"]
)
results_df.to_excel(OUTPUT_XLSX_PATH, index=False)



In [13]:
# -------------------------------------------------
# 5) COUNT THE NUMBER OF RECORDS BY EXERCISE, MODEL, AND PROMPT
# -------------------------------------------------
counts_df = results_df.groupby(['Code', 'model', 'prompt']).size().reset_index(name='count')
print(counts_df.to_string())

    Code        model     prompt  count
0      1  GPT-4o mini    correct     10
1      1  GPT-4o mini  incorrect     10
2      2  GPT-4o mini    correct     10
3      2  GPT-4o mini  incorrect     10
4      3  GPT-4o mini    correct     10
5      3  GPT-4o mini  incorrect     10
6      4  GPT-4o mini    correct     10
7      4  GPT-4o mini  incorrect     10
8      5  GPT-4o mini    correct     10
9      5  GPT-4o mini  incorrect     10
10     6  GPT-4o mini    correct     10
11     6  GPT-4o mini  incorrect     10
12     7  GPT-4o mini    correct     10
13     7  GPT-4o mini  incorrect     10
14     8  GPT-4o mini    correct     10
15     8  GPT-4o mini  incorrect     10
16     9  GPT-4o mini    correct     10
17     9  GPT-4o mini  incorrect     10
18    10  GPT-4o mini    correct     10
19    10  GPT-4o mini  incorrect     10
20    11  GPT-4o mini    correct     10
21    11  GPT-4o mini  incorrect     10
22    12  GPT-4o mini    correct     10
23    12  GPT-4o mini  incorrect     10
