In [None]:
import json
import numpy as np
from collections import Counter, defaultdict
import pandas as pd
import matplotlib.pyplot as plt

# allow notebook to find project modules
import sys
sys.path.append("..")

from indexing.retrieve_chunks import retrieve


In [None]:
eval_queries = [
    # Docs-style questions
    "How does autograd work in PyTorch?",
    "What is the difference between nn.Module and functional API?",
    "How do I write a custom autograd Function?",
    "How does backward propagation compute gradients?",

    # Issue-style questions
    "Why does backward throw a RuntimeError?",
    "How do I debug NaNs during backpropagation?",
    "Autograd is slow on large tensors, why?",
    "Gradient not flowing through custom function"
]


In [None]:
TOP_K = 5
rows = []

for query in eval_queries:
    # Base retrieval
    base_results = retrieve(query, top_k=TOP_K, rerank=False)
    for rank, r in enumerate(base_results, 1):
        rows.append({
            "query": query,
            "stage": "base",
            "rank": rank,
            "strategy": r["chunk_strategy"],
            "source": r["source"],
            "score": r["score"]
        })

    # Reranked retrieval
    rerank_results = retrieve(query, top_k=TOP_K, rerank=True)
    for rank, r in enumerate(rerank_results, 1):
        rows.append({
            "query": query,
            "stage": "rerank",
            "rank": rank,
            "strategy": r["chunk_strategy"],
            "source": r["source"],
            "score": r.get("rerank_score", r["score"])
        })

df = pd.DataFrame(rows)
df.head()



In [None]:
print("Rows:", len(df))
print(df["stage"].value_counts())
print(df["rank"].value_counts())


In [None]:
strategy_stage = pd.crosstab(df["strategy"], df["stage"])
strategy_stage


In [None]:
rank1 = df[df["rank"] == 1]
pd.crosstab(rank1["strategy"], rank1["stage"])


In [None]:
pd.crosstab(
    [df["strategy"], df["stage"]],
    df["rank"]
)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.countplot(data=rank1, x="strategy", hue="stage")
plt.title("Rank-1 Strategy Distribution (Before vs After Rerank)")
plt.show()


In [None]:
rank1[["query", "stage", "strategy", "source", "score"]].sort_values(
    by=["query", "stage"]
)


In [None]:
winners = rank1.pivot_table(
    index="query",
    columns="stage",
    values="strategy",
    aggfunc="first"
)
winners


In [None]:
changed = (winners["base"] != winners["rerank"]).sum()
total = len(winners)

print(f"Reranking changed top-1 strategy for {changed}/{total} queries")


In [None]:
df.to_csv("strategy_rerank_comparison.csv", index=False)
