In [1]:
from beir import util, LoggingHandler
from beir.retrieval import models
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES

import logging
import pathlib, os

#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])


from torchdr import PCA, TSNE
from sentence_transformers import SentenceTransformer
import pandas as pd


  from tqdm.autonotebook import tqdm


In [None]:
model_name_list =[
    # 'all-mpnet-base-v2', #  0.5481, 0.2312, 0.2559 (1 mins) (2312, 2559)
    # # 'all-mpnet-base-v2', #[full 10] 2414 -> 2684kwo, 2440kw, 2298kp, 2740kpo, 2352kso, 2399ks
    # 'sentence-t5-xl', #[full] 0.6754, 0.2543, 0.2990

    'all-MiniLM-L12-v1',
    'all-mpnet-base-v2',
]

reduction_classes = {
    'pca': PCA,
    # 'tsne': TSNE
}

class idenity_reduction:

    def __init__(self, *args, **kwargs):
        pass

    def fit(self, x):
        pass

    def transform(self, x):
        return x

class ST_wrapper(SentenceTransformer):
    def __init__(self, model_name, reduction_type = 'x', reduction_kwargs={}, *args, **kwargs):
        super(ST_wrapper, self).__init__(model_name, *args, **kwargs)
        if reduction_type == 'x':
            self.reduction = idenity_reduction()
        else:
            self.reduction = reduction_classes[reduction_type](**reduction_kwargs)

    def encode_queries(self, queries, *args, **kwargs):
        embeddings = self.encode(queries, *args, **kwargs)
        self.reduction.fit(embeddings)
        return self.reduction.transform(embeddings)

    def encode_corpus(self, corpus, *args, **kwargs):
        embeddings = self.encode(corpus, *args, **kwargs)
        print(embeddings.shape)
        return self.reduction.transform(embeddings)


reduction_kwargs_choices = {
    'x': {},
    'pca': {'n_components': 128},
    # 'tsne': {'perplexity': 30}
}

results_dict = {}

out_dir = "./beir"
os.mkdir(out_dir) if not os.path.exists(out_dir) else None
#### /print debug information to stdout

#### Download scifact.zip dataset and unzip the dataset
dataset_list = ['scifact', 'scidocs', 'climate-fever', 'hotpotqa']

df_list = []
for dataset in dataset_list:

    # dataset = "scidocs"
    url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset)

    data_path = util.download_and_unzip(url, out_dir)

    #### Provide the data_path where scifact has been downloaded and unzipped
    corpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split="test")


    for model_name in model_name_list:
        for reduction_type in ['pca', 'x', ]:

            model = ST_wrapper(model_name, reduction_type, reduction_kwargs_choices[reduction_type])
            model = DRES(model, batch_size=128)

            retriever = EvaluateRetrieval(model, score_function="dot") # or "cos_sim" for cosine similarity
            results = retriever.retrieve(corpus, queries)
            ndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)
            results_dict[(model_name, reduction_type)] = {}
            for result in [ndcg, _map, recall, precision]:
                for k, v in result.items():
                    results_dict[(model_name, reduction_type)][k] = v

        df = pd.DataFrame(results_dict)
        df_list.append(df)

2025-02-03 02:45:15 - Loading Corpus...


  0%|          | 0/5183 [00:00<?, ?it/s]

2025-02-03 02:45:15 - Loaded 5183 TEST Documents.
2025-02-03 02:45:15 - Doc Example: {'text': 'Alterations of the architecture of cerebral white matter in the developing human brain can affect cortical development and result in functional disabilities. A line scan diffusion-weighted magnetic resonance imaging (MRI) sequence with diffusion tensor analysis was applied to measure the apparent diffusion coefficient, to calculate relative anisotropy, and to delineate three-dimensional fiber architecture in cerebral white matter in preterm (n = 17) and full-term infants (n = 7). To assess effects of prematurity on cerebral white matter development, early gestation preterm infants (n = 10) were studied a second time at term. In the central white matter the mean apparent diffusion coefficient at 28 wk was high, 1.8 microm2/ms, and decreased toward term to 1.2 microm2/ms. In the posterior limb of the internal capsule, the mean apparent diffusion coefficients at both times were similar (1.2 vers

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2025-02-03 02:45:17 - Sorting Corpus by document length (Longest first)...
2025-02-03 02:45:17 - Scoring Function: Dot Product (dot)
2025-02-03 02:45:17 - Encoding Batch 1/1...


Batches:   0%|          | 0/41 [00:00<?, ?it/s]

torch.Size([5183, 384])
2025-02-03 02:45:20 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-02-03 02:45:20 - 

2025-02-03 02:45:20 - NDCG@1: 0.4700
2025-02-03 02:45:20 - NDCG@3: 0.5350
2025-02-03 02:45:20 - NDCG@5: 0.5602
2025-02-03 02:45:20 - NDCG@10: 0.5929
2025-02-03 02:45:20 - NDCG@100: 0.6354
2025-02-03 02:45:20 - NDCG@1000: 0.6445
2025-02-03 02:45:20 - 

2025-02-03 02:45:20 - MAP@1: 0.4521
2025-02-03 02:45:20 - MAP@3: 0.5117
2025-02-03 02:45:20 - MAP@5: 0.5287
2025-02-03 02:45:20 - MAP@10: 0.5439
2025-02-03 02:45:20 - MAP@100: 0.5537
2025-02-03 02:45:20 - MAP@1000: 0.5540
2025-02-03 02:45:20 - 

2025-02-03 02:45:20 - Recall@1: 0.4521
2025-02-03 02:45:20 - Recall@3: 0.5762
2025-02-03 02:45:20 - Recall@5: 0.6373
2025-02-03 02:45:20 - Recall@10: 0.7329
2025-02-03 02:45:20 - Recall@100: 0.9233
2025-02-03 02:45:20 - Recall@1000: 0.9967
2025-02-03 02:45:20 - 

2025-02-03 02:45:20 - P@1: 0.

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2025-02-03 02:45:20 - Sorting Corpus by document length (Longest first)...
2025-02-03 02:45:20 - Scoring Function: Dot Product (dot)
2025-02-03 02:45:20 - Encoding Batch 1/1...


Batches:   0%|          | 0/41 [00:00<?, ?it/s]

torch.Size([5183, 384])
2025-02-03 02:45:23 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-02-03 02:45:23 - 

2025-02-03 02:45:23 - NDCG@1: 0.4967
2025-02-03 02:45:23 - NDCG@3: 0.5694
2025-02-03 02:45:23 - NDCG@5: 0.5942
2025-02-03 02:45:23 - NDCG@10: 0.6217
2025-02-03 02:45:23 - NDCG@100: 0.6599
2025-02-03 02:45:23 - NDCG@1000: 0.6677
2025-02-03 02:45:23 - 

2025-02-03 02:45:23 - MAP@1: 0.4814
2025-02-03 02:45:23 - MAP@3: 0.5446
2025-02-03 02:45:23 - MAP@5: 0.5616
2025-02-03 02:45:23 - MAP@10: 0.5754
2025-02-03 02:45:23 - MAP@100: 0.5838
2025-02-03 02:45:23 - MAP@1000: 0.5841
2025-02-03 02:45:23 - 

2025-02-03 02:45:23 - Recall@1: 0.4814
2025-02-03 02:45:23 - Recall@3: 0.6159
2025-02-03 02:45:23 - Recall@5: 0.6769
2025-02-03 02:45:23 - Recall@10: 0.7540
2025-02-03 02:45:23 - Recall@100: 0.9300
2025-02-03 02:45:23 - Recall@1000: 0.9917
2025-02-03 02:45:23 - 

2025-02-03 02:45:23 - P@1: 0.

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

2025-02-03 02:45:34 - Encoding Queries...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2025-02-03 02:45:34 - Sorting Corpus by document length (Longest first)...
2025-02-03 02:45:34 - Scoring Function: Dot Product (dot)
2025-02-03 02:45:34 - Encoding Batch 1/1...


Batches:   0%|          | 0/41 [00:00<?, ?it/s]

torch.Size([5183, 768])
2025-02-03 02:45:48 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-02-03 02:45:48 - 

2025-02-03 02:45:48 - NDCG@1: 0.4800
2025-02-03 02:45:48 - NDCG@3: 0.5610
2025-02-03 02:45:48 - NDCG@5: 0.5847
2025-02-03 02:45:48 - NDCG@10: 0.6107
2025-02-03 02:45:48 - NDCG@100: 0.6548
2025-02-03 02:45:48 - NDCG@1000: 0.6618
2025-02-03 02:45:48 - 

2025-02-03 02:45:48 - MAP@1: 0.4587
2025-02-03 02:45:48 - MAP@3: 0.5316
2025-02-03 02:45:48 - MAP@5: 0.5481
2025-02-03 02:45:48 - MAP@10: 0.5607
2025-02-03 02:45:48 - MAP@100: 0.5710
2025-02-03 02:45:48 - MAP@1000: 0.5713
2025-02-03 02:45:48 - 

2025-02-03 02:45:48 - Recall@1: 0.4587
2025-02-03 02:45:48 - Recall@3: 0.6148
2025-02-03 02:45:48 - Recall@5: 0.6731
2025-02-03 02:45:48 - Recall@10: 0.7480
2025-02-03 02:45:48 - Recall@100: 0.9450
2025-02-03 02:45:48 - Recall@1000: 1.0000
2025-02-03 02:45:48 - 

2025-02-03 02:45:48 - P@1: 0.

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2025-02-03 02:45:49 - Sorting Corpus by document length (Longest first)...
2025-02-03 02:45:49 - Scoring Function: Dot Product (dot)
2025-02-03 02:45:49 - Encoding Batch 1/1...


Batches:   0%|          | 0/41 [00:00<?, ?it/s]

torch.Size([5183, 768])
2025-02-03 02:46:02 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-02-03 02:46:02 - 

2025-02-03 02:46:02 - NDCG@1: 0.4967
2025-02-03 02:46:02 - NDCG@3: 0.5821
2025-02-03 02:46:02 - NDCG@5: 0.6121
2025-02-03 02:46:02 - NDCG@10: 0.6331
2025-02-03 02:46:02 - NDCG@100: 0.6738
2025-02-03 02:46:02 - NDCG@1000: 0.6803
2025-02-03 02:46:02 - 

2025-02-03 02:46:02 - MAP@1: 0.4779
2025-02-03 02:46:02 - MAP@3: 0.5533
2025-02-03 02:46:02 - MAP@5: 0.5737
2025-02-03 02:46:02 - MAP@10: 0.5843
2025-02-03 02:46:02 - MAP@100: 0.5942
2025-02-03 02:46:02 - MAP@1000: 0.5945
2025-02-03 02:46:02 - 

2025-02-03 02:46:02 - Recall@1: 0.4779
2025-02-03 02:46:02 - Recall@3: 0.6381
2025-02-03 02:46:02 - Recall@5: 0.7118
2025-02-03 02:46:02 - Recall@10: 0.7694
2025-02-03 02:46:02 - Recall@100: 0.9483
2025-02-03 02:46:02 - Recall@1000: 0.9967
2025-02-03 02:46:02 - 

2025-02-03 02:46:02 - P@1: 0.

  0%|          | 0/25657 [00:00<?, ?it/s]

2025-02-03 02:46:03 - Loaded 25657 TEST Documents.
2025-02-03 02:46:03 - Doc Example: {'text': 'An evolutionary recurrent network which automates the design of recurrent neural/fuzzy networks using a new evolutionary learning algorithm is proposed in this paper. This new evolutionary learning algorithm is based on a hybrid of genetic algorithm (GA) and particle swarm optimization (PSO), and is thus called HGAPSO. In HGAPSO, individuals in a new generation are created, not only by crossover and mutation operation as in GA, but also by PSO. The concept of elite strategy is adopted in HGAPSO, where the upper-half of the best-performing individuals in a population are regarded as elites. However, instead of being reproduced directly to the next generation, these elites are first enhanced. The group constituted by the elites is regarded as a swarm, and each elite corresponds to a particle within it. In this regard, the elites are enhanced by PSO, an operation which mimics the maturing pheno

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2025-02-03 02:46:04 - Sorting Corpus by document length (Longest first)...
2025-02-03 02:46:04 - Scoring Function: Dot Product (dot)
2025-02-03 02:46:04 - Encoding Batch 1/1...


Batches:   0%|          | 0/201 [00:00<?, ?it/s]

In [5]:
pd.concat(df_list)

Unnamed: 0_level_0,all-MiniLM-L12-v1,all-MiniLM-L12-v1
Unnamed: 0_level_1,pca,NaN
NDCG@1,0.38667,0.49667
NDCG@3,0.46472,0.56936
NDCG@5,0.48924,0.59417
NDCG@10,0.51689,0.6217
NDCG@100,0.5653,0.6599
NDCG@1000,0.57961,0.66767
MAP@1,0.36872,0.48139
MAP@3,0.43694,0.54461
MAP@5,0.45279,0.56156
MAP@10,0.46607,0.57537
