In [8]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_ollama import OllamaEmbeddings
from langchain_core.documents import Document
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import GoogleGenerativeAI
from langchain_openai import ChatOpenAI
from inspect import signature
from yaml import safe_load
import pandas as pd 
import tqdm
import re
import json
from pathlib import Path
import os
import faiss
import gc
from typing import Tuple
import dotenv
import time
OUTPUT_DIR = Path("test_results")
today = pd.Timestamp.now().strftime("%Y-%m-%d")

dotenv.load_dotenv()


False

In [116]:
class XRIFGenerator:
    def __init__(self, 
                 prompt_template: str,
                 waypoints_csv: str,
                 language_model_type = "Ollama",
                 embed_model_type = "Ollama",
                 language_model: str = "deepseek-r1:1.5b",
                 embed_model: str = "deepseek-r1:1.5b",
                 chat_kwargs: dict = {"num_predict": 500, "temperature": 0.2, "top_p": 0.2},
                 fetch_all: bool = True):
        self.waypoints_list = []
        self.kwargs = chat_kwargs
        self.prompt_template = prompt_template
        self.waypoints_csv = waypoints_csv
        self.language_model_type = language_model_type
        self.x_coord = 0
        self.y_coord = 0
        print(f"Loading model: {language_model}...")
        
        if embed_model_type == "Ollama":
            embed = OllamaEmbeddings(model = embed_model)
        elif embed_model_type == "HuggingFace":
            embed = HuggingFaceEmbeddings(model_name = embed_model)
        else:
            raise Exception("Invalid embed_model_type")

        if language_model_type == "Ollama":
            self.llm = OllamaLLM(model=language_model, **self.filter_kwargs_for_OllamaLLM(self.kwargs))
        elif language_model_type == "google":
            if os.getenv("GOOGLE_API_KEY"):
                self.llm = GoogleGenerativeAI(model=language_model, **self.kwargs)
            else:
                raise Exception("GOOGLE_API_KEY not found in environment variables")
        elif language_model_type == "OpenAI":
            try: 
                self.llm = ChatOpenAI(model=language_model, **self.filter_kwargs_for_OpenAI(self.kwargs))
            except Exception as e:
                print(f"Error loading OpenAI model: {e}")
        else:
            raise Exception("Invalid language_model_type")

        self.output_parser = StrOutputParser()
        waypoints = pd.read_csv(self.waypoints_csv)
        waypoint_cols = ['Location', 'X co-ordinate', 'Y co-ordinate', 'Floor', 'Section', 'Keywords']
        given_waypoints_cols = list(waypoints.columns)
        if given_waypoints_cols != waypoint_cols:
            raise Exception("Columns do not match, please provide a new dataset")
        
        num_rows = len(waypoints)
        print(f"Processing {num_rows} waypoints...")
        with tqdm.tqdm(total=num_rows) as pbar:
            for index, row in waypoints.iterrows():
                self.waypoints_list.append(Document(id = f"Waypoint_{index}", page_content=f"A waypoint called {row['Location']} exists at X-Coordinate, {row['X co-ordinate']}, and Y-Coordinate, {row['Y co-ordinate']}, apart of Section, {row['Section']}. Words associated with the waypoint, {row['Location']} are {row['Keywords']}."))
                pbar.update(1)
        
        index = faiss.IndexFlatL2(len(embed.embed_query("Hello World!")))

        self.vector_store = FAISS(
            embedding_function=embed,
            index=index,
            docstore=InMemoryDocstore(),
            index_to_docstore_id={}            
        )
        
        print("Adding waypoints to vector store...")
        self.vector_store.add_documents(self.waypoints_list)
        print("Creating retriever...")
        k = len(self.waypoints_list) if fetch_all else 5
        
        self.retriever = self.vector_store.as_retriever(
            search_type = "mmr",
            search_kwargs = {'k': k, 'fetch_k': len(self.waypoints_list)},
        )

        with open(self.prompt_template, 'r') as file:
            prompt_dict = safe_load(file)
            self.prompt = prompt_dict['prompt']
            self.prompt_name = prompt_dict['prompt_name']
        
        print("Creating prompt template...")
        self.prompt_template = PromptTemplate(template=self.prompt, input_variables=["documents", "query", "starting_location"])

        print("Creating RAG chain...")
        self.rag_chain = self.prompt_template | self.llm | self.output_parser
        
    def filter_kwargs_for_OllamaLLM(self, kwargs: dict) -> dict:
        chatollama_params = signature(OllamaLLM.__init__).parameters
        return {k: v for k, v in kwargs.items() if k in chatollama_params}
    
    def filter_kwargs_for_OpenAI(self, kwargs: dict) -> dict:
        chatopenai_params = signature(ChatOpenAI.__init__).parameters
        return {k: v for k, v in kwargs.items() if k in chatopenai_params}
    
    def generate_xrif_with_deepseek_model(self, query: str):
        xrif_response = None
        documents = self.retriever.invoke(query)
        num_waypoints_injected = len(documents)
        doc_texts = "\n".join([doc.page_content for doc in documents])

        rendered_prompt = self.prompt_template.format(documents = doc_texts, query= query, starting_location = f"X-Coordinate: {self.x_coord}, Y-Coordinate: {self.y_coord}")

        llm_response = self.rag_chain.invoke({"documents": doc_texts, "query": query, "starting_location": f"X-Coordinate: {self.x_coord}, Y-Coordinate: {self.y_coord}"})

        xrif_response = re.sub(r"<think>.*?</think>", "", llm_response, flags=re.DOTALL)

        xrif_response = re.sub(r"\n", "", xrif_response)

        xrif_response = re.sub(r"\\n", "", xrif_response)

        start = xrif_response.find('{')
        end = xrif_response.rfind('}')

        if start != -1 and end != -1:
            xrif_response = xrif_response[start:end+1]
            try:
                xrif_response = json.loads(xrif_response)
            except json.JSONDecodeError as e:
                print("Error parsing JSON response")

        
        return xrif_response, llm_response, rendered_prompt, doc_texts, num_waypoints_injected
    
    def batch_test_run(self, df: pd.DataFrame, output_folder: Path):
        num_rows = len(df)
        print(f"Processing {num_rows} test prompts...")
        with tqdm.tqdm(total=num_rows) as pbar:
            for index, row in df.iterrows():
                now = pd.Timestamp.now().strftime("%Y-%m-%d-%H-%M-%S")
                dump_json = {}
                error_message = 'No Error'
                note = ''
                query = row['Prompt']
                self.x_coord = row['Starting X']
                self.y_coord = row['Starting Y']
                expected_response = row['Expected Response']
                llm_response, xrif_response = self.generate_xrif_with_deepseek_model(query)

                dump_json['Data Set'] = row['Data Set']
                dump_json['Experiment ID'] = row['Experiment ID']
                dump_json['Prompt ID'] = row['Prompt ID']
                dump_json['Prompt'] = query
                dump_json['Full LLM Response'] = str(llm_response)
                dump_json['XRIF Generated'] = xrif_response if isinstance(xrif_response, dict) else str(xrif_response)
                dump_json['Starting X'] = self.x_coord
                dump_json['Starting Y'] = self.y_coord
                dump_json['Expected Response'] = expected_response
                dump_json['Expected Response Type'] = row['Expected Response Type']
                expected_response = list(expected_response)
                if xrif_response and type(xrif_response) == dict:
                    if 'actions' in xrif_response.keys():
                        actions = xrif_response['actions']
                        if len(expected_response) == len(actions):
                            if row['Expected Response Type'] == 'Nav':
                                list_of_locations = [action['input']['name'] for action in actions if (action['action'] == 'navigate') and ('input' in action) and ('name' in action['input'])]
                                set_of_locations = set(list_of_locations)
                                set_of_expected_locations = set(expected_response)
                                if set_of_locations == set_of_expected_locations:
                                    note = "All locations are present in the response"
                                    dump_json['Notes'] = note
                            for i in range(len(xrif_response['actions'])):
                                if actions[i]['action'] == 'navigate' :
                                    if type(expected_response[i]) == 'str':
                                        if 'input' in actions[i]:
                                            if 'name' in actions[i]['input'].keys():
                                                if actions[i]['input']['name'] == expected_response[i] or (type(expected_response[i]) == 'list' and actions[i]['input']['name'] in expected_response[i]):
                                                    continue
                                                else:
                                                    error_message = f"Expected location name: {actions[i]['name']} does not match with the provided location name: {expected_response[i]}"
                                                    dump_json['Error Message'] = error_message
                                                    break
                                            else:
                                                error_message = f"Response Error Missing field: 'name' in action object {i}"
                                                dump_json['Error Message'] = error_message
                                                break
                                        else:
                                            error_message = f"Response Error Missing field: 'input' in action object {i}"
                                            dump_json['Error Message'] = error_message
                                            break
                                    else:
                                        error_message = f"Expected Response Error: Expected Response Type is not 'Nav' for action object {i}"
                                        dump_json['Error Message'] = error_message
                                        break
                                elif actions[i]['action'] == 'wait':
                                    if type(expected_response[i]) == 'tuple':
                                        if 'input' in actions[i]:
                                                if actions[i]['action'] != expected_response[i][0]:
                                                    error_message = f"Expected action at action object {i} : {expected_response[i][0]}  does not match with the provided action: {actions[i]['action']}"
                                                    dump_json['Error Message'] = error_message
                                                    break
                                                else:
                                                    if (int(actions[i]['input']))/60 == expected_response[i][1]:
                                                        error_message = f"Expected wait time: {actions[i]['input']} does not match with the provided wait time: {expected_response[i][1]}"
                                                        dump_json['Error Message'] = error_message
                                                        break
                                        else:
                                            error_message = f"Response Error Missing field: 'input' in action object {i}"
                                            dump_json['Error Message'] = error_message
                                            break
                                    else:
                                        error_message = f"Expected Response Error: Expected Response Type is not 'wait' for action object {i}"
                                        dump_json['Error Message'] = error_message
                                        break
                                elif actions[i]['action'] == 'speak':
                                    if type(expected_response[i]) == 'tuple':
                                        if ('action' in actions[i]) and ('input' in actions[i]):
                                            if actions[i]['action'] == expected_response[i][0] and (actions[i]['input'] == expected_response[i][1] or (type(expected_response[i]) == 'list' and actions[i]['input'] in expected_response[i])):
                                                continue
                                            else:
                                                error_message = f"Expected speak message: {expected_response[i]} does not match with the generated speak message: {actions[i]['input']} or Expected action: {expected_response[i][0]} does not match with the generated action: {actions[i]['action']}"
                                                dump_json['Error Message'] = error_message
                                                break
                                        else:
                                            error_message = f"Response Error Missing field: 'input' or 'action' in action object {i}"
                                            dump_json['Error Message'] = error_message
                                            break
                                    else:
                                        error_message = f"Expected Response Error: Expected Response Type is not correct for action object {i}"
                                        dump_json['Error Message'] = error_message
                                        break
                else:
                    error_message = "XRIF response is Invalid"
                    dump_json['Error Message'] = error_message
                
                if not os.path.exists(OUTPUT_DIR / output_folder):
                    os.makedirs(OUTPUT_DIR / output_folder)

                with open(OUTPUT_DIR / output_folder / f"Prompt_{row['Prompt ID']}_{now}.json", 'w') as file:
                    json.dump(dump_json, file, indent=4)
                pbar.update(1)

        return OUTPUT_DIR / output_folder

    def generate_xrif_with_google_model(self, query: str):
        xrif_response = None
        documents = self.retriever.invoke(query)
        num_waypoints_injected = len(documents) 
        doc_texts = "\n".join([doc.page_content for doc in documents])

        rendered_prompt = self.prompt_template.format(documents=doc_texts, query = query, starting_location = f"X-Coordinate: {self.x_coord}, Y-Coordinate: {self.y_coord}")

        llm_response = self.rag_chain.invoke({"documents": doc_texts, "query": query, "starting_location": f"X-Coordinate: {self.x_coord}, Y-Coordinate: {self.y_coord}"})

        try:
            xrif_response = llm_response.strip("```json\n```")
            xrif_response = json.loads(xrif_response)
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON response: {e}")
            xrif_response = llm_response

        return xrif_response, llm_response, rendered_prompt, doc_texts, num_waypoints_injected

    def eval_xrif(self, *, eval_json: dict, xrif_gen: dict, xrif_ex: list, response_type):
        error_message = None
        dump_json = eval_json
        xrif_ex = list(xrif_ex)
        if xrif_gen and type(xrif_gen) == dict:
            if 'actions' in xrif_gen.keys():
                actions = xrif_gen['actions']
                if len(xrif_ex) == len(actions):
                    if response_type == 'Nav':
                        list_of_locations = [action['input']['name'] for action in actions if ('action' in action and action['action'] == 'navigate') and ('input' in action) and ('name' in action['input'])]
                        set_of_locations = set(list_of_locations)
                        set_of_expected_locations = set(xrif_ex)
                        if set_of_locations == set_of_expected_locations:
                            note = "All locations are present in the response"
                            dump_json['Notes'] = note
                    for i in range(len(xrif_gen['actions'])):
                        if 'action' in actions[i]:
                            if actions[i]['action'] == 'navigate' :
                                if type(xrif_ex[i]) is str:
                                    if 'input' in actions[i]:
                                        if 'name' in actions[i]['input'].keys():
                                            if actions[i]['input']['name'] == xrif_ex[i] or (type(xrif_ex[i]) == 'list' and actions[i]['input']['name'] in xrif_ex[i]):
                                                continue
                                            else:
                                                error_message = f"Expected location name: {actions[i]['input']['name']} does not match with the provided location name: {xrif_ex[i]}"
                                                dump_json['Error Message'] = error_message
                                                break
                                        else:
                                            error_message = f"Response Error Missing field: 'name' in action object {i}"
                                            dump_json['Error Message'] = error_message
                                            break
                                    else:
                                        error_message = f"Response Error Missing field: 'input' in action object {i}"
                                        dump_json['Error Message'] = error_message
                                        break
                                else:
                                    error_message = f"Expected Response Error: Expected Response Type is not 'Nav' for action object {i}"
                                    dump_json['Error Message'] = error_message
                                    break
                            elif actions[i]['action'] == 'wait':
                                if type(xrif_ex[i]) is tuple:
                                    if 'input' in actions[i]:
                                            if actions[i]['action'] != xrif_ex[i][0]:
                                                error_message = f"Expected action at action object {i} : {xrif_ex[i][0]}  does not match with the provided action: {actions[i]['action']}"
                                                dump_json['Error Message'] = error_message
                                                break
                                            else:
                                                if (int(actions[i]['input']))/60 == xrif_ex[i][1]:
                                                    error_message = f"Expected wait time: {actions[i]['input']} does not match with the provided wait time: {xrif_ex[i][1]}"
                                                    dump_json['Error Message'] = error_message
                                                    break
                                    else:
                                        error_message = f"Response Error Missing field: 'input' in action object {i}"
                                        dump_json['Error Message'] = error_message
                                        break
                                else:
                                    error_message = f"Expected Response Error: Expected Response Type is not 'wait' for action object {i}"
                                    dump_json['Error Message'] = error_message
                                    break
                            elif actions[i]['action'] == 'speak':
                                if type(xrif_ex[i]) is tuple:
                                    if ('action' in actions[i]) and ('input' in actions[i]):
                                        if actions[i]['action'] == xrif_ex[i][0] and (actions[i]['input'] == xrif_ex[i][1] or (type(xrif_ex[i]) == 'list' and actions[i]['input'] in xrif_ex[i])):
                                            continue
                                        else:
                                            error_message = f"Expected speak message: {xrif_ex[i]} does not match with the generated speak message: {actions[i]['input']} or Expected action: {xrif_ex[i][0]} does not match with the generated action: {actions[i]['action']}"
                                            dump_json['Error Message'] = error_message
                                            break
                                    else:
                                        error_message = f"Response Error Missing field: 'input' or 'action' in action object {i}"
                                        dump_json['Error Message'] = error_message
                                        break
                                else:
                                    error_message = f"Expected Response Error: Expected Response Type is not correct for action object {i}"
                                    dump_json['Error Message'] = error_message
                                    break
                        else:
                            error_message = f"Response Error Missing field: 'action' in action object {i}"
                            dump_json['Error Message'] = error_message
                            break
        else:
            error_message = "XRIF response is Invalid"
            dump_json['Error Message'] = error_message
        if not error_message:
            dump_json['Error Message'] = ""
        return dump_json
    
    def generate_xrif_with_openai_model(self, query: str):
        xrif_response = None
        documents = self.retriever.invoke(query)
        num_waypoints_injected = len(documents)
        doc_texts = "\n".join([doc.page_content for doc in documents])
        rendered_prompt = self.prompt_template.format(documents=doc_texts, query = query, starting_location = f"X-Coordinate: {self.x_coord}, Y-Coordinate: {self.y_coord}")

        llm_response = self.rag_chain.invoke({"documents": doc_texts, "query": query, "starting_location": f"X-Coordinate: {self.x_coord}, Y-Coordinate: {self.y_coord}"})

        try:
            xrif_response = llm_response.strip("```json\n```")
            xrif_response = json.loads(xrif_response)
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON response: {e}")
            xrif_response = llm_response

        return xrif_response, llm_response, rendered_prompt, doc_texts, num_waypoints_injected

    def xrif_inference(self, query: str):
        if self.language_model_type == "Ollama":
            return self.generate_xrif_with_deepseek_model(query)
        elif self.language_model_type == "google":
            return self.generate_xrif_with_google_model(query)
        elif self.language_model_type == "OpenAI":
            return self.generate_xrif_with_openai_model(query)
        else:
            raise Exception("Invalid model type")
        
    def batch_experiment_run(self, df: pd.DataFrame, output_folder: Path, sleep_time: int = 0):
        num_rows = len(df)
        print(f"Processing {num_rows} test prompts...")
        with tqdm.tqdm(total=num_rows) as pbar:
            for index, row in df.iterrows():
                now = pd.Timestamp.now().strftime("%Y-%m-%d-%H-%M-%S")
                prompt_result = {}
                query = row['Prompt']
                self.x_coord = row['Starting X']
                self.y_coord = row['Starting Y']
                expected_response = row['Expected Response']
                xrif_response, llm_response, rendered_prompt, doc_texts, num_waypoints_injected = self.xrif_inference(query)
                prompt_result['rendered_prompt'] = rendered_prompt
                prompt_result['Waypoints Injected'] = doc_texts
                prompt_result['num_waypoints_injected'] = num_waypoints_injected
                prompt_result['Data Set'] = row['Data Set']
                prompt_result['Experiment ID'] = row['Experiment ID']
                prompt_result['Prompt ID'] = row['Prompt ID']
                prompt_result['Prompt'] = query
                prompt_result['Full LLM Response'] = str(llm_response)
                prompt_result['XRIF Generated'] = xrif_response if isinstance(xrif_response, dict) else str(xrif_response)
                prompt_result['Starting X'] = self.x_coord
                prompt_result['Starting Y'] = self.y_coord
                prompt_result['Expected Response'] = expected_response
                prompt_result['Expected Response Type'] = row['Expected Response Type']
                expected_response = list(expected_response)
                dump_json = self.eval_xrif(eval_json = prompt_result, xrif_gen = xrif_response, xrif_ex = expected_response, response_type = row['Expected Response Type'])
                if not os.path.exists(OUTPUT_DIR / output_folder):
                    os.makedirs(OUTPUT_DIR / output_folder)

                with open(OUTPUT_DIR / output_folder / f"Prompt_{row['Prompt ID']}_{now}.json", 'w') as file:
                    json.dump(dump_json, file, indent=4)

                if sleep_time > 0:
                    time.sleep(sleep_time)
                pbar.update(1)

        return OUTPUT_DIR / output_folder

In [10]:
def process_test_dataset(dataset: pd.DataFrame):
    dataset['Expected Response'] = dataset['Expected Response'].to_list()
    dataset.dropna(subset=['Prompt', 'Expected Response'], inplace=True)
    
    return dataset


def load_experiment_from_config(config_file: Path) -> Tuple[XRIFGenerator, pd.DataFrame, Path]:
    with open(config_file, 'r') as file:
        config_dict = safe_load(file)
        test_dataset_path = config_dict['test_dataset']

        test_dataset = pd.read_csv(test_dataset_path)
        output_folder = config_dict['output_folder']

        del config_dict['output_folder']
        del config_dict['test_dataset']

        test_dataset = process_test_dataset(test_dataset)
        
        return XRIFGenerator(**config_dict), test_dataset, output_folder

In [None]:
google_xrif = XRIFGenerator(prompt_template="prompts/e7_xrif_actions_6.yaml", 
                            waypoints_csv="waypoint_datasets/uw_e7_floor1.csv",
                            language_model_type="google",
                            embed_model_type="HuggingFace",
                            language_model="gemini-2.0-flash",
                            embed_model="sentence-transformers/all-MiniLM-L6-v2",
                            chat_kwargs={"max_tokens": 500, "temperature": 0.2, "top_p": 0.2},
                            fetch_all=True)

Loading model: gemini-2.0-flash...
Processing 20 waypoints...


100%|██████████| 20/20 [00:00<00:00, 37516.14it/s]

Adding waypoints to vector store...
Creating retriever...
Creating prompt template...
Creating RAG chain...





In [105]:
openai_xrif = XRIFGenerator(prompt_template="prompts/e7_xrif_actions_6.yaml",
                            waypoints_csv="waypoint_datasets/uw_e7_floor1.csv",
                            language_model_type="OpenAI",
                            embed_model_type="HuggingFace",
                            language_model="gpt-4o-mini",
                            embed_model="sentence-transformers/all-MiniLM-L6-v2",
                            chat_kwargs={"num_predict": 500, "temperature": 0.2, "top_p": 0.2},
                            fetch_all=True)

Loading model: gpt-4o-mini...
Processing 20 waypoints...


100%|██████████| 20/20 [00:00<00:00, 38746.46it/s]

Adding waypoints to vector store...
Creating retriever...
Creating prompt template...
Creating RAG chain...





In [115]:
experiment_generator, experiment_dataframe, output_folder = load_experiment_from_config(Path('experiment_configs/openai/gpt4o_mini_e5_hgf_embed.yaml'))

experiment_generator.batch_experiment_run(experiment_dataframe, output_folder, 7)

Loading model: gpt-4o-mini...
Processing 34 waypoints...


100%|██████████| 34/34 [00:00<00:00, 35821.74it/s]


Adding waypoints to vector store...
Creating retriever...
Creating prompt template...
Creating RAG chain...
Processing 50 test prompts...


100%|██████████| 50/50 [07:11<00:00,  8.63s/it]


PosixPath('test_results/gpt4o_mini_e5_hgf_embed')

In [118]:
config_dir = 'experiment_configs/google'
for config_file in os.listdir(config_dir):
    experiment_generator, experiment_dataframe, output_folder = load_experiment_from_config(Path(config_dir) / config_file)
    experiment_generator.batch_experiment_run(experiment_dataframe, output_folder, 7)


Loading model: gemini-2.0-flash...
Processing 24 waypoints...


100%|██████████| 24/24 [00:00<00:00, 34580.31it/s]


Adding waypoints to vector store...
Creating retriever...
Creating prompt template...
Creating RAG chain...
Processing 60 test prompts...


100%|██████████| 60/60 [11:24<00:00, 11.42s/it]


Loading model: gemini-2.0-flash...
Processing 34 waypoints...


100%|██████████| 34/34 [00:00<00:00, 34272.13it/s]


Adding waypoints to vector store...
Creating retriever...
Creating prompt template...
Creating RAG chain...
Processing 50 test prompts...


100%|██████████| 50/50 [10:18<00:00, 12.36s/it]


In [120]:
import os
from pathlib import Path
import json
import pandas as pd


for folder in os.listdir('test_results'):
    print(f"Results for {folder}...")
    if Path('test_results/' + folder).is_dir():
        json_list = []
        for file in os.listdir('test_results/' + folder):
            with open(Path('test_results/' + folder) / file, 'r') as f:
                json_list.append(json.load(f))
        
        df = pd.DataFrame(json_list)
        df.to_csv(f"test_results/{folder}.csv", index=False)

Results for eval_test...
Results for gemini_warehouse_hgf_embed...
Results for all_warehouse_deepseek1.5b...
Results for gpt4o_mini_warehouse_hgf_embed...
Results for eval_test.csv...
Results for gemini_e7_hgf_embed.csv...
Results for all_warehouse_deepseek14b...
Results for all_e7_deepseek7b...
Results for gpt4o_mini_e7_hgf_embed.csv...
Results for all_e7_deepseek7b_hfembed.csv...
Results for gpt4o_mini_e5_hgf_embed...
Results for all_e7_deepseek1.5b.csv...
Results for all_e7_deepseek14b.csv...
Results for all_e7_deepseek14b...
Results for gemini_e7_hgf_embed...
Results for all_e7_deepseek1.5b...
Results for all_e7_deepseek7b.csv...
Results for all_e5_deepseek14b.yaml...
Results for gpt4o_mini_warehouse_hgf_embed.csv...
Results for all_warehouse_deepseek14b.csv...
Results for gemini_warehouse_hgf_embed.csv...
Results for all_warehouse_deepseek7b...
Results for gpt4o_mini_e7_hgf_embed...
Results for gpt4o_mini_e5_hgf_embed.csv...
Results for gemini_e5_hgf_embed...
Results for all_e7_de