# Experiment 1 - Evaluating

In [1]:
BATCH_SIZE = 16 # How many triples in a mini-batch

STORAGE_DIR = r'Experiment_1' # Where to get and store artefacts
CORPUS = r'msmarco_passage' # What documents can we retrieve?
EVAL_DATASET = r'trec-deep-learning-passages' # What is our test set?
VARIANT = 'test-2019' # What variant of our test set are we evaluating on e.g test-2019 or test-2020
OUTPUT_NAME = f'{VARIANT}.csv'

In [2]:
import pyterrier as pt
import os
from pyterrier_t5 import MonoT5ReRanker
from pyterrier.measures import RR, MAP, NDCG
import argparse
import logging
import pandas as pd
from tqdm import tqdm_notebook as tqdm

os.environ['JAVA_HOME'] = "/usr/lib/jvm/jdk-17"
pt.init()

  from .autonotebook import tqdm as notebook_tqdm
PyTerrier 0.9.2 has loaded Terrier 5.7 (built by craigm on 2022-11-10 18:30) and terrier-helper 0.0.7

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [3]:
os.makedirs(os.path.join(STORAGE_DIR, 'results'), exist_ok=True)

In [4]:
bm25 = pt.BatchRetrieve.from_dataset(CORPUS, 'terrier_stemmed_text', wmodel='BM25', metadata=['docno', 'text'])
dataset = pt.get_dataset(EVAL_DATASET)

20:55:03.904 [main] WARN org.terrier.structures.BaseCompressingMetaIndex - Structure meta reading data file directly from disk (SLOW) - try index.meta.data-source=fileinmem in the index properties file. 1.9 GiB of memory would be required.


In [5]:
def msmarco_generate():
    dataset = pt.get_dataset("trec-deep-learning-passages")
    with pt.io.autoopen(dataset.get_corpus()[0], 'rt') as corpusfile:
        for l in corpusfile:
            docno, passage = l.split("\t")
            yield {'docno' : docno, 'text' : passage}

iter_indexer = pt.IterDictIndexer("./passage_index")
indexref = iter_indexer.index(msmarco_generate(), meta={'docno' : 20, 'text': 4096})
index = pt.IndexFactory.of(indexref)

  indexref = iter_indexer.index(msmarco_generate(), meta={'docno' : 20, 'text': 4096})


21:05:05.482 [ForkJoinPool-1-worker-1] WARN org.terrier.structures.indexing.Indexer - Indexed 5 empty documents


In [6]:
BASELINE_DIR = f'{STORAGE_DIR}/model_base'
NEGATIVE_DIR = f'{STORAGE_DIR}/model_new'

In [9]:
baselinet5 = bm25 >> pt.text.get_text(index, "text") >> MonoT5ReRanker(model=BASELINE_DIR, batch_size=BATCH_SIZE)
truenegative_T5 = bm25 >> pt.text.get_text(index, "text") >> MonoT5ReRanker(model=NEGATIVE_DIR, batch_size=BATCH_SIZE)

res = pt.Experiment(
  [baselinet5, truenegative_T5],
  dataset.get_topics(variant=VARIANT),
  dataset.get_qrels(variant=VARIANT),
  eval_metrics=[RR(rel=2), MAP(rel=2), NDCG(cutoff=10)],
  baseline=0,
  names=["Standard MonoT5", "Hard Negative MonoT5"]
)

monoT5: 100%|████████████████████████| 12078/12078 [09:18<00:00, 21.64batches/s]
monoT5: 100%|████████████████████████| 12078/12078 [09:21<00:00, 21.50batches/s]


## Save the result

In [10]:
res.to_csv(os.path.join(STORAGE_DIR, 'results', OUTPUT_NAME))