## Load Dataset

In [1]:
from rag.load import load_benchmark_corpus_sample, corpus_to_texts_metadatas


benchmark, corpus = load_benchmark_corpus_sample(subset='maud')
texts, metadatas = corpus_to_texts_metadatas(corpus)

## Split Into Chunks

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter(
    separators=['\n\n', '\n', '!', '?', '.', ':', ';', ',', ' ', ''],
    chunk_size=500,
    chunk_overlap=0,
    add_start_index=True,
)

documents = text_splitter.create_documents(texts, metadatas=metadatas)

## Embed Chunks

In [3]:
import torch

from rag.embed import compute_similarities, get_query_strings, get_document_contents


similarity_cache_path = "data/cache/04_similarities_maud.pt"
try:
    similarities = torch.load(similarity_cache_path)
except:
    similarities = compute_similarities(
        "Qwen/Qwen3-Embedding-8B",
        queries=get_query_strings(benchmark),
        documents=get_document_contents(documents),
    )
    torch.save(similarities, similarity_cache_path)

In [4]:
from rag.metrics import similarities_to_ranks
from rag.rerank import rerank


ranks = similarities_to_ranks(similarities)
reranks = rerank(benchmark, documents, ranks)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]



In [5]:
from rag.metrics import print_evaluations

print("Baseline Evaluation")
print_evaluations(benchmark, documents, ranks)

print("\nReranked evaluation")
print_evaluations(benchmark, documents, reranks)

Baseline Evaluation
precision @ 1 :  0.1935, recall @ 1 :  0.0918
precision @ 2 :  0.1687, recall @ 2 :  0.1278
precision @ 4 :  0.1368, recall @ 4 :  0.2127
precision @ 8 :  0.1173, recall @ 8 :  0.3382
precision @ 16:  0.0766, recall @ 16:  0.4198
precision @ 32:  0.0522, recall @ 32:  0.5222
precision @ 64:  0.0352, recall @ 64:  0.6287
AUC: 0.05023092285649827

Reranked evaluation
precision @ 1 :  0.2991, recall @ 1 :  0.1622
precision @ 2 :  0.1987, recall @ 2 :  0.2171
precision @ 4 :  0.1469, recall @ 4 :  0.2733
precision @ 8 :  0.1154, recall @ 8 :  0.3654
precision @ 16:  0.0760, recall @ 16:  0.4171
precision @ 32:  0.0522, recall @ 32:  0.5222
precision @ 64:  0.0522, recall @ 64:  0.5222
AUC: 0.06840639745141963
