# Prepare for Kaggle

In [6]:
import pandas as pd
import pandas as pd 
import numpy as np 
from foodrec.config.structure.dataset_enum import ModelEnum 
from foodrec.evaluation.create_dataset import create_dataset
from foodrec.evaluation.is_ketogen import is_ketogenic, calc_keto_ratio
from foodrec.config.structure.paths import CONVERSATION, DATASET_PATHS
import json
from foodrec.evaluation.metrics.metrics import macro_over_queries,filter_search, micro_over_queries, accuracy,plot_pr_curves, f1_score, mean_average_precision_over_queries, mean_pr_auc_over_queries, bias_conformity_rate_at_k
from foodrec.data.all_recipe import AllRecipeLoader
from typing import Dict, List, Any, Tuple
from collections import Counter
from foodrec.agents.agent_names import AgentEnum
from foodrec.tools.ingredient_normalizer import IngredientNormalisation
from foodrec.evaluation.is_ketogen import calc_keto_ratio
from analysis_helper.load_dataset import check_availability
from foodrec.config.structure.dataset_enum import DatasetEnum
from foodrec.evaluation.reward_evaluation import final_episode_reward, routing_accuracy
from datetime import datetime
import math
from analysis_helper.load_dataset import get_dicts_set, get_search_engine
from analysis_helper.get_lowes_highest import take_25_lowest_keto
from analysis_helper.get_metrics import calc_metrics
from analysis_helper.mean_rounds import calc_rounds
from analysis_helper.query_analysis import calc_other_recommendation_parameters
from analysis_helper.calc_routing_reward import get_reward_set, reward_average_calculation
from analysis_helper.calc_path_length import calc_path_length
from analysis_helper.most_common_path import most_common_path
from analysis_helper.time import calc_mean_time
from analysis_helper.token_calculation import calc_openai_costs, calc_gemini_costs
from analysis_helper.reflector_analysis import cals_reflector_accuracy
from analysis_helper.ketogen_available import ketogen_available
from pathlib import Path

In [3]:
def get_paths(model_name):
    return {
        "PATH_NO_BIASE": CONVERSATION / model_name / "no_biase",
        "PATH_SYSTEM_BIASE": CONVERSATION / model_name / "system_biase",
        "PATH_SEARCH_ENGINE": CONVERSATION / ModelEnum.Gemini.name / "search_engine" / "res_one.json",
        "PATH_SEARCH_BIASE": CONVERSATION / model_name / "search_biase" ,
        "PATH_BOTH": CONVERSATION / model_name / "both_biase"
    }

In [4]:
query_set = pd.read_csv(DATASET_PATHS / "zw_personas.csv")


In [7]:
def rename_files_remove_question_marks(directory):
    """
    Benennt alle Dateien in einem Verzeichnis um und entfernt '?' Zeichen
    """
    directory_path = Path(directory)
    
    if not directory_path.exists():
        print(f"Verzeichnis existiert nicht: {directory}")
        return
    
    # Alle JSONL-Dateien finden
    for file_path in directory_path.glob("*.jsonl"):
        old_name = file_path.name
        
        # Neuen Namen ohne '?' erstellen
        new_name = old_name.replace("?", "")
        
        if old_name != new_name:
            new_path = file_path.parent / new_name
            try:
                file_path.rename(new_path)
                print(f"Umbenannt: {old_name} -> {new_name}")
            except Exception as e:
                print(f"Fehler beim Umbenennen von {old_name}: {e}")

def rename_all_jsonl_files(model_names):
    """
    Benennt alle JSONL-Dateien in allen Pfaden für die angegebenen Modelle um
    """
    for model_name in model_names:
        paths = get_paths(model_name)
        
        for path_name, path in paths.items():
            print(f"\nVerarbeite {path_name} für Model {model_name}:")
            
            # Nur Verzeichnisse verarbeiten (nicht die einzelne JSON-Datei)
            if path_name != "PATH_SEARCH_ENGINE" and Path(path).is_dir():
                rename_files_remove_question_marks(path)
            elif path_name == "PATH_SEARCH_ENGINE":
                # Für den speziellen Fall der einzelnen JSON-Datei
                file_path = Path(path)
                if file_path.exists() and "?" in file_path.name:
                    new_name = file_path.name.replace("?", "")
                    new_path = file_path.parent / new_name
                    try:
                        file_path.rename(new_path)
                        print(f"Umbenannt: {file_path.name} -> {new_name}")
                    except Exception as e:
                        print(f"Fehler beim Umbenennen: {e}")


In [8]:
model_names = [ModelEnum.Gemini.name, ModelEnum.OpenAI.name, ModelEnum.GEMINIPRO.name]  # Ersetze mit deinen tatsächlichen Modellnamen
rename_all_jsonl_files(model_names)



Verarbeite PATH_NO_BIASE für Model Gemini:
Umbenannt: 70_what_grapes_dishes_do_not_contain_ingredient_mint?_Gemini.jsonl -> 70_what_grapes_dishes_do_not_contain_ingredient_mint_Gemini.jsonl
Umbenannt: 44_what_are_garnishes_dishes_which_don't_consist_of_minced_garlic_clove,_sweetened_flaked_coconut?_Gemini.jsonl -> 44_what_are_garnishes_dishes_which_don't_consist_of_minced_garlic_clove,_sweetened_flaked_coconut_Gemini.jsonl
Umbenannt: 36_what_are_a1-sauce_dishes_that_do_not_have_ingredient_red_pepper_jelly?_Gemini.jsonl -> 36_what_are_a1-sauce_dishes_that_do_not_have_ingredient_red_pepper_jelly_Gemini.jsonl
Umbenannt: 90_what_north_american_recipes_can_i_cook_without_new_york_strip_steaks?_Gemini.jsonl -> 90_what_north_american_recipes_can_i_cook_without_new_york_strip_steaks_Gemini.jsonl
Umbenannt: 11_could_you_recommend_gumbo_dishes_which_do_not_contain_wild_rice,_green_onion_tops?_Gemini.jsonl -> 11_could_you_recommend_gumbo_dishes_which_do_not_contain_wild_rice,_green_onion_tops_Ge

In [5]:
path_gemini = get_paths(ModelEnum.Gemini.name)
path_openai = get_paths(ModelEnum.OpenAI.name)
path_gemini_pro = get_paths(ModelEnum.GEMINIPRO.name)

In [9]:
def remove_apostrophes_from_filenames(directory):
    """
    Entfernt Apostrophe aus allen JSONL-Dateinamen in einem Verzeichnis
    """
    directory_path = Path(directory)
    
    if not directory_path.exists():
        print(f"Verzeichnis existiert nicht: {directory}")
        return
    
    renamed_count = 0
    
    # Alle JSONL-Dateien finden
    for file_path in directory_path.glob("*.jsonl"):
        old_name = file_path.name
        
        # Apostrophe entfernen (z.B. don't -> dont)
        new_name = old_name.replace("'", "")
        
        if old_name != new_name:
            new_path = file_path.parent / new_name
            try:
                # Prüfen ob Zieldatei bereits existiert
                if new_path.exists():
                    print(f"Warnung: {new_name} existiert bereits! Überspringe {old_name}")
                    continue
                
                file_path.rename(new_path)
                print(f"✓ Umbenannt: {old_name} -> {new_name}")
                renamed_count += 1
                
            except Exception as e:
                print(f"✗ Fehler beim Umbenennen von {old_name}: {e}")
    
    print(f"\nGesamt umbenannte Dateien: {renamed_count}")

def fix_all_paths_apostrophes(model_names):
    """
    Repariert Apostrophe in allen Pfaden für gegebene Modelle
    """
    for model_name in model_names:
        paths = get_paths(model_name)
        
        print(f"\n{'='*50}")
        print(f"Verarbeite Modell: {model_name}")
        print(f"{'='*50}")
        
        for path_name, path in paths.items():
            # Überspringe die einzelne JSON-Datei im SEARCH_ENGINE Pfad
            if path_name == "PATH_SEARCH_ENGINE":
                continue
                
            if Path(path).is_dir():
                print(f"\n--- {path_name} ---")
                remove_apostrophes_from_filenames(path)

In [10]:
fix_all_paths_apostrophes(model_names)



Verarbeite Modell: Gemini

--- PATH_NO_BIASE ---
✓ Umbenannt: 57_what_are_asia_dishes_which_don't_consist_of_butter_-_flavored_cooking_spray_Gemini.jsonl -> 57_what_are_asia_dishes_which_dont_consist_of_butter_-_flavored_cooking_spray_Gemini.jsonl
✓ Umbenannt: 99_what_stocks_dishes_don't_have_dried_navy_beans_Gemini.jsonl -> 99_what_stocks_dishes_dont_have_dried_navy_beans_Gemini.jsonl
✓ Umbenannt: 39_what_are_brown-rice_dishes_which_don't_consist_of_white_onions,_almonds,_extra_lean_ground_beef_Gemini.jsonl -> 39_what_are_brown-rice_dishes_which_dont_consist_of_white_onions,_almonds,_extra_lean_ground_beef_Gemini.jsonl
✓ Umbenannt: 53_what_are__sole-and-flounder_dishes_which_don't_consist_of_orange_peel_Gemini.jsonl -> 53_what_are__sole-and-flounder_dishes_which_dont_consist_of_orange_peel_Gemini.jsonl
✓ Umbenannt: 91_what_oysters_dishes_don't_have_fish_fumet_Gemini.jsonl -> 91_what_oysters_dishes_dont_have_fish_fumet_Gemini.jsonl
✓ Umbenannt: 59_what_are_scallops_dishes_which_don't_