In [1]:
from beir import util, LoggingHandler
from beir.retrieval import models
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES

  from tqdm.autonotebook import tqdm


In [2]:
import logging
import pathlib, os

#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout

In [4]:
import os
import pathlib
from beir import util

dataset = "scifact"
url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset)
out_dir = os.path.join(pathlib.Path('.').absolute(), "datasets")
data_path = util.download_and_unzip(url, out_dir)

2024-02-10 21:49:56 - Downloading scifact.zip ...


/Users/chinmayshrivastava/Documents/GitHub/RepoStateManager/evaluation/BeIR/datasets/scifact.zip: 100%|██████████| 2.69M/2.69M [00:00<00:00, 3.69MiB/s]


2024-02-10 21:49:58 - Unzipping scifact.zip ...


In [5]:
#### Provide the data_path where scifact has been downloaded and unzipped
corpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split="test")

2024-02-10 21:50:15 - Loading Corpus...


100%|██████████| 5183/5183 [00:00<00:00, 72532.01it/s]

2024-02-10 21:50:15 - Loaded 5183 TEST Documents.
2024-02-10 21:50:15 - Doc Example: {'text': 'Alterations of the architecture of cerebral white matter in the developing human brain can affect cortical development and result in functional disabilities. A line scan diffusion-weighted magnetic resonance imaging (MRI) sequence with diffusion tensor analysis was applied to measure the apparent diffusion coefficient, to calculate relative anisotropy, and to delineate three-dimensional fiber architecture in cerebral white matter in preterm (n = 17) and full-term infants (n = 7). To assess effects of prematurity on cerebral white matter development, early gestation preterm infants (n = 10) were studied a second time at term. In the central white matter the mean apparent diffusion coefficient at 28 wk was high, 1.8 microm2/ms, and decreased toward term to 1.2 microm2/ms. In the posterior limb of the internal capsule, the mean apparent diffusion coefficients at both times were similar (1.2 vers




In [17]:
len(corpus)

5183

In [16]:
from IPython.display import display, Markdown

display(Markdown(corpus['4983']['title']))

display(Markdown(corpus['4983']['text']))

Microstructural development of human newborn cerebral white matter assessed in vivo by diffusion tensor magnetic resonance imaging.

Alterations of the architecture of cerebral white matter in the developing human brain can affect cortical development and result in functional disabilities. A line scan diffusion-weighted magnetic resonance imaging (MRI) sequence with diffusion tensor analysis was applied to measure the apparent diffusion coefficient, to calculate relative anisotropy, and to delineate three-dimensional fiber architecture in cerebral white matter in preterm (n = 17) and full-term infants (n = 7). To assess effects of prematurity on cerebral white matter development, early gestation preterm infants (n = 10) were studied a second time at term. In the central white matter the mean apparent diffusion coefficient at 28 wk was high, 1.8 microm2/ms, and decreased toward term to 1.2 microm2/ms. In the posterior limb of the internal capsule, the mean apparent diffusion coefficients at both times were similar (1.2 versus 1.1 microm2/ms). Relative anisotropy was higher the closer birth was to term with greater absolute values in the internal capsule than in the central white matter. Preterm infants at term showed higher mean diffusion coefficients in the central white matter (1.4 +/- 0.24 versus 1.15 +/- 0.09 microm2/ms, p = 0.016) and lower relative anisotropy in both areas compared with full-term infants (white matter, 10.9 +/- 0.6 versus 22.9 +/- 3.0%, p = 0.001; internal capsule, 24.0 +/- 4.44 versus 33.1 +/- 0.6% p = 0.006). Nonmyelinated fibers in the corpus callosum were visible by diffusion tensor MRI as early as 28 wk; full-term and preterm infants at term showed marked differences in white matter fiber organization. The data indicate that quantitative assessment of water diffusion by diffusion tensor MRI provides insight into microstructural development in cerebral white matter in living infants.

In [18]:
len(queries)

300

In [19]:
queries

{'1': '0-dimensional biomaterials show inductive properties.',
 '3': '1,000 genomes project enables mapping of genetic sequence variation consisting of rare variants with larger penetrance effects than common variants.',
 '5': '1/2000 in UK have abnormal PrP positivity.',
 '13': '5% of perinatal mortality is due to low birth weight.',
 '36': 'A deficiency of vitamin B12 increases blood levels of homocysteine.',
 '42': 'A high microerythrocyte count raises vulnerability to severe anemia in homozygous alpha (+)- thalassemia trait subjects.',
 '48': 'A total of 1,000 people in the UK are asymptomatic carriers of vCJD infection.',
 '49': 'ADAR1 binds to Dicer to cleave pre-miRNA.',
 '50': 'AIRE is expressed in some skin tumors.',
 '51': 'ALDH1 expression is associated with better breast cancer outcomes.',
 '53': 'ALDH1 expression is associated with poorer prognosis in breast cancer.',
 '54': 'AMP-activated protein kinase (AMPK) activation increases inflammation-related fibrosis in the lung

In [20]:
qrels

{'1': {'31715818': 1},
 '3': {'14717500': 1},
 '5': {'13734012': 1},
 '13': {'1606628': 1},
 '36': {'5152028': 1, '11705328': 1},
 '42': {'18174210': 1},
 '48': {'13734012': 1},
 '49': {'5953485': 1},
 '50': {'12580014': 1},
 '51': {'45638119': 1},
 '53': {'45638119': 1},
 '54': {'49556906': 1},
 '56': {'4709641': 1},
 '57': {'4709641': 1},
 '70': {'5956380': 1, '4414547': 1},
 '72': {'6076903': 1},
 '75': {'4387784': 1},
 '94': {'1215116': 1},
 '99': {'18810195': 1},
 '100': {'4381486': 1},
 '113': {'6157837': 1},
 '115': {'33872649': 1},
 '118': {'6372244': 1},
 '124': {'4883040': 1},
 '127': {'21598000': 1},
 '128': {'8290953': 1},
 '129': {'27768226': 1},
 '130': {'27768226': 1},
 '132': {'7975937': 1},
 '133': {'38485364': 1,
  '6969753': 1,
  '17934082': 1,
  '16280642': 1,
  '12640810': 1},
 '137': {'26016929': 1},
 '141': {'6955746': 1, '14437255': 1},
 '142': {'10582939': 1},
 '143': {'10582939': 1},
 '146': {'10582939': 1},
 '148': {'1084345': 1},
 '163': {'18872233': 1},
 '1

In [21]:
#### Load the SBERT model and retrieve using cosine-similarity
model = DRES(models.SentenceBERT("msmarco-distilbert-base-tas-b"), batch_size=16)
retriever = EvaluateRetrieval(model, score_function="dot") # or "cos_sim" for cosine similarity
results = retriever.retrieve(corpus, queries)

2024-02-10 21:53:38 - Load pretrained SentenceTransformer: msmarco-distilbert-base-tas-b


modules.json: 100%|██████████| 229/229 [00:00<00:00, 307kB/s]
config_sentence_transformers.json: 100%|██████████| 122/122 [00:00<00:00, 184kB/s]
README.md: 100%|██████████| 3.99k/3.99k [00:00<00:00, 13.6MB/s]
sentence_bert_config.json: 100%|██████████| 53.0/53.0 [00:00<00:00, 217kB/s]
config.json: 100%|██████████| 548/548 [00:00<00:00, 1.77MB/s]
pytorch_model.bin: 100%|██████████| 265M/265M [00:09<00:00, 27.7MB/s] 
  return self.fget.__get__(instance, owner)()
tokenizer_config.json: 100%|██████████| 547/547 [00:00<00:00, 948kB/s]
vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 4.25MB/s]
tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 7.88MB/s]
special_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 195kB/s]
1_Pooling/config.json: 100%|██████████| 190/190 [00:00<00:00, 388kB/s]


2024-02-10 21:53:52 - Use pytorch device_name: cpu
2024-02-10 21:53:52 - Encoding Queries...


Batches: 100%|██████████| 19/19 [00:10<00:00,  1.81it/s]


2024-02-10 21:54:03 - Sorting Corpus by document length (Longest first)...
2024-02-10 21:54:03 - Scoring Function: Dot Product (dot)
2024-02-10 21:54:03 - Encoding Batch 1/1...


Batches: 100%|██████████| 324/324 [37:28<00:00,  6.94s/it]


In [22]:
#### Evaluate your model with NDCG@k, MAP@K, Recall@K and Precision@K  where k = [1,3,5,10,100,1000] 
ndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)

2024-02-10 22:52:46 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2024-02-10 22:52:46 - 

2024-02-10 22:52:46 - NDCG@1: 0.5333
2024-02-10 22:52:46 - NDCG@3: 0.5990
2024-02-10 22:52:46 - NDCG@5: 0.6215
2024-02-10 22:52:46 - NDCG@10: 0.6428
2024-02-10 22:52:46 - NDCG@100: 0.6698
2024-02-10 22:52:46 - NDCG@1000: 0.6811
2024-02-10 22:52:46 - 

2024-02-10 22:52:46 - MAP@1: 0.5086
2024-02-10 22:52:46 - MAP@3: 0.5730
2024-02-10 22:52:46 - MAP@5: 0.5892
2024-02-10 22:52:46 - MAP@10: 0.5992
2024-02-10 22:52:46 - MAP@100: 0.6046
2024-02-10 22:52:46 - MAP@1000: 0.6049
2024-02-10 22:52:46 - 

2024-02-10 22:52:46 - Recall@1: 0.5086
2024-02-10 22:52:46 - Recall@3: 0.6473
2024-02-10 22:52:46 - Recall@5: 0.6998
2024-02-10 22:52:46 - Recall@10: 0.7615
2024-02-10 22:52:46 - Recall@100: 0.8910
2024-02-10 22:52:46 - Recall@1000: 0.9833
2024-02-10 22:52:46 - 

2024-02-10 22:52:46 - P@1: 0.5333
2024-02-10 22:52:46