In [None]:
import os
import pyterrier as pt
from tira.third_party_integrations import ensure_pyterrier_is_loaded
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from pathlib import Path
import ir_datasets
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
ensure_pyterrier_is_loaded()
ds_id = "radboud-validation-20251114-training"
dataset = pt.datasets.get_dataset(f"irds:ir-lab-wise-2025/{ds_id}")
topics = dataset.get_topics("title")
qrels = dataset.get_qrels()

In [4]:
index_path = "./output/indexes/radboud-validation"
abs_index_path = os.path.abspath(index_path)

if not os.path.exists(abs_index_path + "/data.properties"):
    print("Building index...")
    irds_ds = ir_datasets.load(f"ir-lab-wise-2025/{ds_id}")
    
    def doc_iter():
        for doc in tqdm(irds_ds.docs_iter(), desc="Indexing"):
            yield {'docno': doc.doc_id, 'text': doc.default_text()}
            
    indexer = pt.IterDictIndexer(abs_index_path, meta={'docno': 100})
    index_ref = indexer.index(doc_iter())
    index = pt.IndexFactory.of(index_ref)
else:
    print("Loading existing index...")
    index = pt.IndexFactory.of(abs_index_path)

print(f"Index loaded: {index}")

Loading existing index...
Index loaded: <org.terrier.structures.Index at 0x77a70be92b60 jclass=org/terrier/structures/Index jself=<LocalRef obj=0x5868a92d7010 at 0x77a70bb06690>>


In [None]:
BACKBONE_MODEL = "flan-t5-base"

PROMPT = """Explain the following search query in one simple sentence containing synonyms:
Query: //query_text//
Explanation:"""

BACKBONE_MODEL = "flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(f"google/{BACKBONE_MODEL}")
model = AutoModelForSeq2SeqLM.from_pretrained(f"google/{BACKBONE_MODEL}")

def optimize_query():
    
    def _rewrite_row(row):
        query_text = row['query']
        
        input_text = PROMPT.replace("//query_text//", query_text)
        inputs = tokenizer(input_text, return_tensors="pt")
        
        outputs = model.generate(
            **inputs, 
            max_new_tokens=60, 
            num_beams=5,  
            repetition_penalty=1.5,
            temperature=0.7,      
            do_sample=True,      
            early_stopping=True
        )
        
        return tokenizer.decode(outputs[0], skip_special_tokens=True).replace("Keywords:", "").strip()

    return pt.apply.query(_rewrite_row)

In [42]:
import requests
import json
import pyterrier as pt

OLLAMA_MODEL = "gemma3:4b" 
OLLAMA_API_URL = "http://host.docker.internal:11434/api/generate"

EXPANSION_PROMPT = """You are a search engine optimizer.
Task: Generate 3-5 relevant synonyms or technical terms for the given search query to improve retrieval.
Query: "{query}"
Output only the keywords separated by spaces. Do not write full sentences.
Keywords:"""

def optimize_query():
    
    def _call_ollama(prompt):
        payload = {
            "model": OLLAMA_MODEL,
            "prompt": prompt,
            "stream": False,
            "options": {
                "temperature": 0.3, 
                "num_predict": 50  
            }
        }
        try:
            response = requests.post(OLLAMA_API_URL, json=payload)
            response.raise_for_status()
            return response.json().get('response', '').strip()
        except requests.exceptions.RequestException as e:
            print(f"Ollama Error: {e}")
            return "" 

    def _rewrite_row(row):
        query_text = row['query']
        
        prompt = EXPANSION_PROMPT.replace("{query}", query_text)
        
        expansion_keywords = _call_ollama(prompt)
        expanded_query = f"{query_text} {query_text} {expansion_keywords}"
        
        # return expanded_query.replace("\n", " ").strip()
        return expansion_keywords

    return pt.apply.query(_rewrite_row)

In [43]:
print("Previewing query optimization:")
llm_pipeline = optimize_query()
preview = llm_pipeline.transform(topics.head(10))
display(preview)

Previewing query optimization:


Unnamed: 0,qid,query_0,query
0,3,split ergo keyboard,split keyboard 60% 75% 96%
1,4,metoo Hollywood,Hollywood scandal sexual abuse film industry
2,7,gastritis symptoms,stomach inflammation symptoms peptic ulcer dis...
3,8,What is privacy by design(PbD)?,Privacy by design PbD data protection proactiv...
4,13,Impact of Exercise on Depression,Physical activity depression mental health exe...
5,15,Autonomous car ethics in unavoidable accidents,"self-driving ethics, accident algorithms, mora..."
6,16,mahler sixth symphony concert 2025,"Symphony, Mahler, performance, concert hall, 2025"
7,18,Climate change effects on agriculture,Agricultural impacts climate change crop yield...
8,20,History of urban rooftop gardening in European...,Rooftop agriculture urban farming green roofs ...
9,23,excel sum cells,sum formula excel addition cell range


In [None]:
bm25 = pt.BatchRetrieve(index, wmodel="BM25")

retrieval_llm_pipeline = optimize_query() >> bm25
pipeline_interpolated = (
    (bm25) * 0.8
    + 
    (llm_pipeline >> bm25) * 0.2
)
rm3_pipe = bm25 >> pt.rewrite.RM3(index, fb_terms=15, fb_docs=5) >> bm25

  bm25 = pt.BatchRetrieve(index, wmodel="BM25")


In [55]:
pt.Experiment(
    [bm25, rm3_pipe],
    names=['BM25', 'RM3'],
    topics=topics,
    qrels=qrels,
    eval_metrics=['ndcg_cut_10', 'P_10', 'recall_100', 'map']
)

Unnamed: 0,name,map,P_10,recall_100,ndcg_cut_10
0,BM25,0.381395,0.432143,0.681198,0.451635
1,RM3,0.313525,0.357143,0.635062,0.381691


In [65]:
from tirex_tracker import tracking, ExportFormat

output = Path("./output")

def run_tira_experiment(pipeline, pipeline_name, dataset_id, topics, description):
    target_dir = output / "runs" / dataset_id / pipeline_name
    target_file = target_dir / "run.txt.gz"

    if target_file.exists():
        print(f"Run existiert bereits: {target_dir}")
        return

    target_dir.mkdir(parents=True, exist_ok=True)

    print(f"--- Starte: {pipeline_name} ---")

    with tracking(
        export_file_path=target_dir / "ir-metadata.yml", 
        export_format=ExportFormat.IR_METADATA, 
        system_description=description, 
        system_name=pipeline_name
    ):
        run = pipeline.transform(topics)

    pt.io.write_results(run, str(target_file))
    print(f"Gespeichert in: {target_file}")

In [None]:
run_tira_experiment(
    pipeline=bm25,
    pipeline_name="pyterrier-bm25-baseline",
    dataset_id=ds_id,
    topics=topics,
    description="Standard BM25 retrieval without query expansion."
)

run_tira_experiment(
    pipeline=retrieval_llm_pipeline,
    pipeline_name="pyterrier-flan-t5-bm25",
    dataset_id=ds_id,
    topics=topics,
    description="Query rewriting using FLAN-T5-small followed by BM25."
)

Run existiert bereits: output/runs/radboud-validation-20251114-training/pyterrier-bm25-baseline
Run existiert bereits: output/runs/radboud-validation-20251114-training/pyterrier-flan-t5-bm25


## Reranker

In [56]:
irds_ds = ir_datasets.load(f"ir-lab-wise-2025/{ds_id}")

doc_text_map = {doc.doc_id: doc.default_text() for doc in tqdm(irds_ds.docs_iter(), desc="Loading Docs")}

def _lookup_text(row):
    return doc_text_map.get(row['docno'], "")

text_getter = pt.apply.text(_lookup_text)

Loading Docs: 0it [00:00, ?it/s]

Loading Docs: 63621it [01:16, 829.02it/s] 


In [57]:
from sentence_transformers import CrossEncoder

model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2', device="cpu")

def _cross_encoder_score(row):
    return float(model.predict([(row['query'], row['text'])])[0])

pipeline_rerank = (bm25 % 100) >> text_getter >> pt.apply.doc_score(_cross_encoder_score)

In [None]:
pt.Experiment(
    [bm25, pipeline_rerank],
    names=['BM25', 'pipeline rerank'],
    topics=topics,
    qrels=qrels,
    eval_metrics=['ndcg_cut_10', 'P_10', 'recall_100', 'map']
)

KeyboardInterrupt: 

In [None]:
pt.Experiment(
    [bm25, retrieval_llm_pipeline, pipeline_rerank],
    names=['BM25', 'LLM_optimized_queries', "Rerank only"],
    topics=topics,
    qrels=qrels,
    eval_metrics=['ndcg_cut_10', 'P_10', 'recall_100', 'map']
)

Unnamed: 0,name,map,P_10,recall_100,ndcg_cut_10
0,BM25,0.381395,0.432143,0.681198,0.451635
1,LLM_optimized_queries,0.381321,0.428571,0.681198,0.454508
2,Rerank only,0.383241,0.460714,0.681198,0.49417
3,Rewrite_and_rerank,0.389933,0.464286,0.681198,0.501276


In [14]:
run_tira_experiment(
    pipeline=combined_pipeline,
    pipeline_name="combined_rewrite_reranker",
    dataset_id=ds_id,
    topics=topics,
    description="query rewrite with BM25 retrieval (top 100) followed by reranking."
)

Run existiert bereits: output/runs/radboud-validation-20251114-training/combined_rewrite_reranker


## Best working solution

In [None]:
bm25 = pt.BatchRetrieve(index, wmodel="BM25")
get_text = pt.apply.text(_lookup_text)
reranker = pt.apply.doc_score(_cross_encoder_score)

pipeline_simple_rerank = (
    bm25 % 50
    >> get_text
    >> reranker
)

print("Starte Simple Re-Ranking Experiment...")
pt.Experiment(
    [bm25, pipeline_simple_rerank],
    topics,
    qrels,
    eval_metrics=["map", "ndcg_cut_10", "P_10"],
    names=["BM25 Baseline", "BM25 + Cross-Encoder (Top 50)"],
    verbose=True
)

  bm25 = pt.BatchRetrieve(index, wmodel="BM25")


Starte Simple Re-Ranking Experiment...


pt.Experiment: 100%|██████████| 2/2 [04:32<00:00, 136.32s/system]


Unnamed: 0,name,map,P_10,ndcg_cut_10
0,BM25 Baseline,0.381395,0.432143,0.451635
1,BM25 + Cross-Encoder (Top 100),0.383241,0.460714,0.49417


In [71]:
run_tira_experiment(
    pipeline=pipeline_simple_rerank,
    pipeline_name="simple_rerank",
    dataset_id=ds_id,
    topics=topics,
    description="Standard BM25 retrieval with ms-marco reranker."
)

--- Starte: simple_rerank ---


Detected a hypervisor/virtualization technology. Some metrics might not be available due to configuration or availability of virtual hardware features.

=====  Processor information  =====
Linux arch_perfmon flag  : yes
Hybrid processor         : no
IBRS and IBPB supported  : yes
STIBP supported          : yes
Spec arch caps supported : yes
Max CPUID level          : 27
CPU model number         : 140
PCM Error: can't open MSR handle for core 0 (No such file or directory)
Try no-MSR mode by setting env variable PCM_NO_MSR=1
Can not access CPUs Model Specific Registers (MSRs).
execute 'modprobe msr' as root user, then execute pcm as root user.


Gespeichert in: output/runs/radboud-validation-20251114-training/simple_rerank/run.txt.gz
