## Ablation Study for PaperSeek

This notebook presents an ablation study for the PaperSeek pipeline, examining how HyQe and HyDe affect the retrieval performance of the BM25 across various scientific topics.

1. The effect of combining rerankers and Reciprocal Rank Fusion (RPF).

In [216]:
from models import ReciprocalRankFusion
from collections import defaultdict
from typing import Literal
from textwrap import shorten
import plotly.express as px
import numpy as np
from glob import glob
import pandas as pd

In [217]:
eval_df = pd.read_excel("data/eval_cps.xlsx")
files = glob("ablation/rerankers/*.parquet")
rrf = ReciprocalRankFusion()

In [218]:
def get_stella_results(file: str) -> pd.DataFrame:
    file_name = file.split("\\")[1]
    path = f"ablation/final/{file_name}"
    df = pd.read_parquet(path, columns=["topic", "id"]).rename(
        {"topic": "Topic"}, axis=1
    )
    return df


def read_results(
    file: str,
    rerankers: list[
        Literal[
            "BM25Reranker",
            "MiniLmCE",
            "BGEReranker",
            "Qwen2",
        ]
    ],
    include_stella=True,
) -> pd.DataFrame:
    if include_stella:
        stella_df = get_stella_results(file)

    reranker_df = pd.read_parquet(file)
    topics = reranker_df["Topic"].unique()

    results = defaultdict(list)
    for topic in topics:
        topic_results = []
        top_n = reranker_df[reranker_df["Topic"] == topic].shape[0]
        results["Topic"].extend([topic] * top_n)
        for reranker in rerankers:
            reranker_ids = reranker_df[reranker_df["Topic"] == topic][reranker].tolist()
            results[reranker].extend(reranker_ids)
            topic_results.append(reranker_ids)

        if include_stella:
            stella_ids = (
                stella_df[stella_df["Topic"] == topic]["id"].head(top_n).tolist()
            )
            results["Stella"].extend(stella_ids)
            topic_results.append(stella_ids)
        results["ReciprocalRankFusion"].extend(
            list(rrf.rerank(topic_results).keys())[:top_n]
        )

    return pd.DataFrame(results)


def parse_results(
    files: list[str],
    rerankers: list[
        Literal[
            "BM25Reranker",
            "MiniLmCE",
            "BGEReranker",
            "Qwen2",
        ]
    ],
) -> pd.DataFrame:
    results = defaultdict(list)

    for file in files:
        df = read_results(file, rerankers)
        topics = df["Topic"].unique()
        for topic in topics:
            topic_df = df[df["Topic"] == topic]
            eval_cores = set(eval_df[eval_df["topic"] == topic]["id"].tolist())

            recalls = []
            for reranker in rerankers + [
                "Stella",
                "ReciprocalRankFusion",
            ]:  # Always include Stella and RRF
                recall = len(
                    set(topic_df[reranker].tolist()).intersection(eval_cores)
                ) / len(eval_cores)
                recalls.append(recall)
                # if reranker == "BM25Reranker" and topic =="Software Process Line":
                #     print(recall)
            results[topic].append(recalls)
    return pd.DataFrame({k: np.mean(v, axis=0) for k, v in results.items()})


def show_results(
    files: list[str],
    rerankers: list[
        Literal[
            "BM25Reranker",
            "MiniLmCE",
            "BGEReranker",
            "Qwen2",
        ]
    ],
):
    to_keep = [  # under 5k
        "Software Process Line",
        "Pharmacokinetics and...",
        "The rodent object-in-...",
        "Cerebral Small Vessel...",
        "Business Process Meta Models",
        "Data Stream Processing Latency",
        "Specialized psychotherapies...",
    ]

    df = parse_results(files, rerankers).T.reset_index(names="Topic")
    rerankers.extend(["Stella", "ReciprocalRankFusion"])
    df.rename({i: rerankers[i] for i in range(len(rerankers))}, axis=1, inplace=True)
    df["Topic"]= df["Topic"].apply(lambda x: shorten(x, width=30, placeholder="..."))
    df = df[df["Topic"].isin(to_keep)]

    fig = px.imshow(
        df.set_index("Topic"),
        color_continuous_scale="RdBu",
        aspect="auto",
    )
    fig.update_xaxes(title_text="Topic")
    fig.update_yaxes(title_text="Recall")
    fig.update_layout(title_x=0.5)
    fig.update_traces(texttemplate="%{z:.2f}", textfont={"size": 12})
    fig.show()
    print("--" * 50)

    print("Overall average:")
    display(
        df.mean(axis=0, numeric_only=True)
        .to_frame()
        .T.style.format("{:.2f}", subset=rerankers)
        .highlight_max(axis=1, color="green")
    )


In [222]:
show_results(
    files, ["BM25Reranker", "MiniLmCE", "BGEReranker", "Qwen2"]
)

----------------------------------------------------------------------------------------------------
Overall average:


Unnamed: 0,BM25Reranker,MiniLmCE,BGEReranker,Qwen2,Stella,ReciprocalRankFusion
0,0.4,0.41,0.38,0.62,0.61,0.6
