The profiles can be viewed by running in the Linux command line:
```
tuna path/to/rerank_ff.prof --port=8000
```

In [1]:
from pathlib import Path
import ir_datasets
import torch

### PARAMETERS SETTINGS
device_name = "cuda" if torch.cuda.is_available() else "cpu"
k_s = 1000
in_memory = False
index_path = Path("/home/bvdb9/indices/msm-psg/ff/ff_index_TCTColBERT_opq.h5")
sparse_ranking_path = Path("/home/bvdb9/sparse_rankings/msmarco-passage-test2019-sparse10000.txt")
dataset = ir_datasets.load("msmarco-passage/trec-dl-2019")

In [2]:
import os
import logging
import pyterrier as pt

os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-21-openjdk-amd64"
logging.basicConfig(level=logging.INFO)

if not pt.started():
    pt.init(tqdm="notebook")

# Create profile directory
mem = "mem" if in_memory else "disk"
profile_dir = f"profiles/{index_path}/{device_name}_k{k_s}_{mem}/"
if not os.path.exists(profile_dir):
    os.makedirs(profile_dir)

PyTerrier 0.10.1 has loaded Terrier 5.10 (built by craigm on 2024-08-22 17:33) and terrier-helper 0.0.8



In [3]:
from fast_forward import OnDiskIndex, Mode, Ranking
from fast_forward.encoder import TCTColBERTQueryEncoder
import pstats

q_encoder = TCTColBERTQueryEncoder(
    "castorini/tct_colbert-msmarco", 
    device=device_name
)
ff_index = OnDiskIndex.load(
    index_path,
    query_encoder=q_encoder, 
    mode=Mode.MAXP
)

if in_memory:
    ff_index = ff_index.to_memory()

INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.
Loading index: 100%|██████████| 8841823/8841823 [00:17<00:00, 499557.78it/s]


In [4]:
sparse_ranking = Ranking.from_file(
    sparse_ranking_path,
    {q.query_id: q.text for q in dataset.queries_iter()},
)

In [5]:
import cProfile

# standard re-ranking, probably takes a few min
with cProfile.Profile() as profile:
    ff_out = ff_index(sparse_ranking.cut(k_s))

stats = pstats.Stats(profile)
stats.sort_stats(pstats.SortKey.TIME)
stats.dump_stats(profile_dir + "rerank_ff.prof")

INFO:fast_forward.index:_compute_scores: create df with unique queries and ids 0 ... n
INFO:fast_forward.index:_compute_scores: _get_vectors
Getting vectors: 100%|██████████| 42791/42791 [00:00<00:00, 228714.23it/s]
Processing vectors: 100%|██████████| 42791/42791 [00:00<00:00, 331449.90it/s]
Reading vectors: 100%|██████████| 42/42 [00:12<00:00,  3.44it/s]
Computing scores: 100%|██████████| 43000/43000 [00:00<00:00, 113938.93it/s]
INFO:fast_forward.index:_compute_scores: compute all dot products (scores)
INFO:fast_forward.index:_compute_scores: calculate each query-doc pair's ff_score
INFO:fast_forward.index:computed scores in 20.548856031000014 seconds


In [6]:
# re-ranking with early stopping, also takes a few min
with cProfile.Profile() as profile:
    ff_out_es = ff_index(
        sparse_ranking.cut(k_s),
        early_stopping=10,
        early_stopping_alpha=0.2,
        early_stopping_depths=(800, 5000),
    )

stats = pstats.Stats(profile)
stats.sort_stats(pstats.SortKey.TIME)
stats.dump_stats(profile_dir + "rerank_ff_es.prof")

INFO:fast_forward.index:depth 800: 43 queries left
INFO:fast_forward.index:_compute_scores: create df with unique queries and ids 0 ... n
INFO:fast_forward.index:_compute_scores: _get_vectors
Getting vectors: 100%|██████████| 34273/34273 [00:00<00:00, 147974.27it/s]
Processing vectors: 100%|██████████| 34273/34273 [00:00<00:00, 270990.24it/s]
Reading vectors: 100%|██████████| 34/34 [00:05<00:00,  6.02it/s]
Computing scores: 100%|██████████| 34400/34400 [00:00<00:00, 110890.49it/s]
INFO:fast_forward.index:_compute_scores: compute all dot products (scores)
INFO:fast_forward.index:_compute_scores: calculate each query-doc pair's ff_score
INFO:fast_forward.index:depth 5000: 14 queries left
INFO:fast_forward.index:_compute_scores: create df with unique queries and ids 0 ... n
INFO:fast_forward.index:_compute_scores: _get_vectors
Getting vectors: 100%|██████████| 2799/2799 [00:00<00:00, 112963.62it/s]
Processing vectors: 100%|██████████| 2799/2799 [00:00<00:00, 162455.64it/s]
Reading vectors

In [7]:
from ir_measures import calc_aggregate, AP, RR, nDCG
from fast_forward.util import to_ir_measures

alpha: float = 0.2
eval_metrics = [AP(rel=2)@1000, RR(rel=2)@10, nDCG@10]
print(
    "Lexical retrieval without re-ranking:\n",
    calc_aggregate(
        eval_metrics, 
        dataset.qrels_iter(), 
        to_ir_measures(sparse_ranking)
    ),
    f"\n\n... with fast-forward re-ranking (alpha={alpha}):\n",
    calc_aggregate(
        eval_metrics,
        dataset.qrels_iter(),
        to_ir_measures(sparse_ranking.interpolate(ff_out, alpha)),
    ),
    f"\n\n... with fast-forward re-ranking AND early stopping (alpha={alpha}):\n",
    calc_aggregate(
        eval_metrics,
        dataset.qrels_iter(),
        to_ir_measures(sparse_ranking.interpolate(ff_out_es, alpha)),
    ),
)

Lexical retrieval without re-ranking:
 {AP(rel=2)@1000: 0.30128706043561426, nDCG@10: 0.5058310024399073, RR(rel=2)@10: 0.7024178663713547} 

... with fast-forward re-ranking (alpha=0.2):
 {AP(rel=2)@1000: 0.42688547107019914, nDCG@10: 0.6929843235417926, RR(rel=2)@10: 0.8748615725359912} 

... with fast-forward re-ranking AND early stopping (alpha=0.2):
 {AP(rel=2)@1000: 0.4250957223020153, nDCG@10: 0.6929843235417926, RR(rel=2)@10: 0.8748615725359912}
