In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path


In [7]:
# Get the directory containing the notebook
notebook_dir = Path(os.getcwd())
# Get the parent directory (project root)
project_root = str(notebook_dir.parent)
# Add to Python path if not already there

results_path = project_root + "/results/cleaned"

if not os.path.exists(results_path):
    print(f"Directory not found: {results_path}")
else:
    print(f"Directory found: {results_path}")

Directory found: c:\Users\todor\Repositories\ir-project/results/cleaned


In [8]:
agents = ["pinecone", "openai", "hybrid", "auto_merge"]
datasets = ["nq", "hotpotqa", "sse_single", "sse_multi"]
metrics_columns = ["context_precision", "context_recall", "faithfulness", "factual_correctness(mode=f1)", "context_entity_recall", "answer_relevancy"]

In [11]:
metrics_tables = {dataset: {} for dataset in datasets}
for dataset in datasets:
    for agent in agents:
        path = f"../results/cleaned/{dataset}/{agent}/evaluation_results.csv"
        if os.path.exists(path):
            metrics_tables[dataset][agent] = pd.read_csv(path)[metrics_columns]
        else:
            print(f"File {path} does not exist")


In [12]:
# Get averages of metrics
metrics_averages = {}
for dataset in datasets:
    dataset_metrics = {}
    for agent in agents:
        dataset_metrics[agent] = metrics_tables[dataset][agent].mean()
    metrics_averages[dataset] = pd.DataFrame(dataset_metrics)
    
metrics_averages["nq"]
   

Unnamed: 0,pinecone,openai,hybrid,auto_merge
context_precision,0.621667,0.705,0.62,0.726667
context_recall,0.703333,0.82,0.6,0.8
faithfulness,0.724726,0.849737,0.690213,0.85484
factual_correctness(mode=f1),0.1484,0.0968,0.1752,0.1016
context_entity_recall,0.485417,0.545977,0.464815,0.582979
answer_relevancy,0.783246,0.848667,0.786651,0.834428


In [13]:

for dataset in datasets:
    save_path = results_path + f"/{dataset}/metrics_averages.csv"
    metrics_averages[dataset].to_csv(save_path, index=False)


In [22]:
# Generate heatmap
for dataset in datasets:
    save_path = results_path + f"/{dataset}/metrics_averages.png"
    
    plt.figure(figsize=(7, 4))
    sns.heatmap(metrics_averages[dataset], annot=True, cmap="Blues")
    plt.title(f"Evaluation Averages for {dataset.upper()}")
    plt.xlabel("Agents")
    plt.ylabel("Metrics")
    plt.tight_layout()
  
    plt.savefig(save_path)
    plt.close()


