In [1]:
from ranx import Qrels, Run

In [2]:
qrels = Qrels.from_ir_datasets("beir/scifact/test")

In [3]:
from rerankers import Reranker

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import srsly

corpus = [x for x in srsly.read_jsonl("./data/scifact/corpus.jsonl")]
queries = [x for x in srsly.read_jsonl("./data/scifact/queries.jsonl")]

corpus[0]

{'_id': '4983',
 'title': 'Microstructural development of human newborn cerebral white matter assessed in vivo by diffusion tensor magnetic resonance imaging.',
 'text': 'Alterations of the architecture of cerebral white matter in the developing human brain can affect cortical development and result in functional disabilities. A line scan diffusion-weighted magnetic resonance imaging (MRI) sequence with diffusion tensor analysis was applied to measure the apparent diffusion coefficient, to calculate relative anisotropy, and to delineate three-dimensional fiber architecture in cerebral white matter in preterm (n = 17) and full-term infants (n = 7). To assess effects of prematurity on cerebral white matter development, early gestation preterm infants (n = 10) were studied a second time at term. In the central white matter the mean apparent diffusion coefficient at 28 wk was high, 1.8 microm2/ms, and decreased toward term to 1.2 microm2/ms. In the posterior limb of the internal capsule, t

In [5]:
ranker = Reranker(
    "castorini/monot5-base-msmarco-10k", device="cuda", batch_size=128, verbose=0
)

T5Ranker
{'TransformerRanker': <class 'rerankers.models.transformer_ranker.TransformerRanker'>, 'APIRanker': <class 'rerankers.models.api_rankers.APIRanker'>, 'T5Ranker': <class 'rerankers.models.t5ranker.T5Ranker'>}


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [6]:
top100 = srsly.read_json("data/scifact/scifact_top_100.json")

In [7]:
corpus_map = {x["_id"]: f"{x['title']} {x['text']}" for x in corpus}

In [8]:
from tqdm import tqdm

qrels_dict = dict(qrels)
queries = [q for q in queries if q["_id"] in qrels_dict]

scores = {}
for q in tqdm(queries):
    doc_ids = top100[q["_id"]]
    docs = [corpus_map[x] for x in doc_ids]
    scores[q["_id"]] = ranker.rank(q["text"], docs, doc_ids=doc_ids)

100%|██████████| 300/300 [04:44<00:00,  1.06it/s]


In [17]:
scores_dict = {}
for q_id, ranked_results in scores.items():
    top_10_results = ranked_results.top_k(10)
    scores_dict[q_id] = {result.doc_id: result.score for result in top_10_results}
run = Run(scores_dict)

In [18]:
from ranx import evaluate

evaluation_score = evaluate(qrels, run, "ndcg@10")
litterature_result = 0.734  # From RankGPT Paper https://arxiv.org/pdf/2304.09542.pdf
if abs(evaluation_score - litterature_result) > 0.01:
    print(
        f"Score {evaluation_score:0.3f} differs by more than 0.01 from the the reported score."
    )
else:
    print("Score is within 0.01 NDCG@10 of the reported score!")

Score 0.731 is within 0.005 NDCG@10 of the reported score!
