# ⚙️ Setup

In [56]:
import pandas as pd
import gradio as gr
from retriever import Retriever
from datetime import datetime

# 🗃️ Modules

## Precision@k

In [62]:
def get_precision_per_query_at_k(df: pd.DataFrame, k: int) -> pd.DataFrame:
    df = df.copy()

    str_query_id = "query_id"
    str_relevant = "relevant"
    str_precision_at_k = f"Precision@{k}"

    df_topk = df[df["rank"] <= k]

    precision_per_query_at_k = (
        df_topk.groupby(str_query_id)
        .apply(lambda g: g[str_relevant].sum() / k)
        .reset_index(name=str_precision_at_k)
    )
    return precision_per_query_at_k

## Recall@k

In [63]:
def get_recall_per_query_at_k(df: pd.DataFrame, k: int) -> pd.DataFrame:
    df = df.copy()

    str_query_id = "query_id"
    str_rank = "rank"
    str_relevant = "relevant"
    str_recall_at_k = f"Recall@{k}"
    str_retrieved_relevant = "retrieved_relevant"
    str_total_relevant = "total_relevant"

    total_relevant = (
        df.groupby(str_query_id)[str_relevant]
        .sum()
        .reset_index(name=str_total_relevant)
    )

    df_topk = df[df[str_rank] <= k]

    recall_per_query_at_k = (
        df_topk.groupby(str_query_id)
        .apply(lambda g: (g[str_relevant].sum()))
        .reset_index(name=str_retrieved_relevant)
        .merge(total_relevant, on=str_query_id)
    )

    recall_per_query_at_k[str_recall_at_k] = (
        recall_per_query_at_k[str_retrieved_relevant]
        / recall_per_query_at_k[str_total_relevant]
    )

    return recall_per_query_at_k

## F1@k

In [64]:
def get_f1_per_query_at_k(df: pd.DataFrame, k: int) -> pd.Series:
    df = df.copy()

    str_precision_at_k = f"Precision@{k}"
    str_recall_at_k = f"Recall@{k}"

    precision_per_query_at_k = get_precision_per_query_at_k(df, k)
    recall_per_query_at_k = get_recall_per_query_at_k(df, k)

    f1_per_query_at_k = (
        2
        * precision_per_query_at_k[str_precision_at_k]
        * recall_per_query_at_k[str_recall_at_k]
    ) / (
        precision_per_query_at_k[str_precision_at_k]
        + recall_per_query_at_k[str_recall_at_k]
    )

    return f1_per_query_at_k

## Retriever Evaluation

In [65]:
def evaluate_retriever_at_k(df: pd.DataFrame, k: int) -> dict[str, float]:
    df = df.copy()

    precision_per_query_at_k = get_precision_per_query_at_k(df, k)
    mean_precision_at_k = precision_per_query_at_k[f"Precision@{k}"].mean()

    recall_per_query_at_k = get_recall_per_query_at_k(df, k)
    mean_recall_at_k = recall_per_query_at_k[f"Recall@{k}"].mean()
    
    f1_per_query_at_k = get_f1_per_query_at_k(df, k)
    mean_f1_at_k = f1_per_query_at_k.mean()

    return {
        f"Mean Precision@{k}": mean_precision_at_k,
        f"Mean Recall@{k}": mean_recall_at_k,
        f"Mean F1@{k}": mean_f1_at_k
    }

In [66]:
def evaluate_retriever(annotation_pools: pd.DataFrame, top_k: tuple[int]):
    evaluations_at_k = []
    for k in top_k:
        evaluation_at_k = evaluate_retriever_at_k(annotation_pools, k)
        evaluations_at_k.append(evaluation_at_k)
    
    evaluation = {}
    for evaluation_at_k in evaluations_at_k:
        for key, value in evaluation_at_k.items():
            evaluation[key] = value
    
    return evaluation

# Main

In [71]:
annotated_pools = pd.read_csv('annotations/annotations_kenji_b4c33c4c.csv', comment="#")

In [72]:
annotated_pools

Unnamed: 0,query_id,query,rank,chunk_id,file_name,title,content,relevant
0,1,Saan po pwede mag apply ng Japan Visa bukod sa...,1,ATTIC TOURS.pdf_chunk_0,ATTIC TOURS.pdf,ATTIC TOURS,ATTIC TOURS\nSpecialized in Japan Visa Applica...,1
1,1,Saan po pwede mag apply ng Japan Visa bukod sa...,2,JAPAN VISA GENERAL INFO.pdf_chunk_2,JAPAN VISA GENERAL INFO.pdf,JAPAN VISA – GENERAL INFORMATION,B. REQUIREMENTS（Details → https://www.ph.emb-j...,1
2,1,Saan po pwede mag apply ng Japan Visa bukod sa...,3,NIKKEI-JIN (JAPANESE DESCENDANT).pdf_chunk_6,NIKKEI-JIN (JAPANESE DESCENDANT).pdf,NIKKEI-JIN (JAPANESE DESCENDANT),【In case that applicant is planning to work fo...,0


In [73]:
evaluate_retriever(annotated_pools, top_k = (3, 5, 10))

  .apply(lambda g: g[str_relevant].sum() / k)
  .apply(lambda g: (g[str_relevant].sum()))
  .apply(lambda g: g[str_relevant].sum() / k)
  .apply(lambda g: (g[str_relevant].sum()))
  .apply(lambda g: g[str_relevant].sum() / k)
  .apply(lambda g: (g[str_relevant].sum()))
  .apply(lambda g: g[str_relevant].sum() / k)
  .apply(lambda g: (g[str_relevant].sum()))
  .apply(lambda g: g[str_relevant].sum() / k)
  .apply(lambda g: (g[str_relevant].sum()))
  .apply(lambda g: g[str_relevant].sum() / k)
  .apply(lambda g: (g[str_relevant].sum()))


{'Mean Precision@3': 0.6666666666666666,
 'Mean Recall@3': 1.0,
 'Mean F1@3': 0.8,
 'Mean Precision@5': 0.4,
 'Mean Recall@5': 1.0,
 'Mean F1@5': 0.5714285714285715,
 'Mean Precision@10': 0.2,
 'Mean Recall@10': 1.0,
 'Mean F1@10': 0.33333333333333337}