# Initialize Elasticsearch with data
**Prerequisite: Elasticsearch must be installed**

In [None]:
! pip install datasets
! pip install elasticsearch
! pip install sentencepiece
! pip install sentence-transformers==2.7.0
! pip install huggingface_hub

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py38-none-any.whl.metadata (7.1 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
Downloading dill-0.3.8-py3-none-any.whl (116 kB)
Downloading fsspec-2024.9.0-py3-none-any.whl (179 kB)
Downloading multiprocess-0.70.16-py38-none-any.whl (132 kB)
Downloading pyarrow-17.0.0-cp38-cp38-manylinux

In [None]:
# Preparation - Creating the index, loading the model, and loading the dataset
from datasets import load_dataset
#from huggingface_hub import login
#login('hf_xxxxx')

# Load dataset from Leviatan's account. You can also use your own dataset if needed.
dataset = load_dataset('LeviatanAIResearch/cross-encoder-binary-context-quesion-v3', split='test')

# Filter dataset to keep only samples with label == 1
filtered_dataset = dataset.filter(lambda example: example['label'] == 1)

In [None]:
from elasticsearch import Elasticsearch, helpers
import time

# Configure Elasticsearch connection (update URL based on your setup)
elasticsearch_client = Elasticsearch('http://0.0.0.0:9200', connections_per_node=2)

# Wait until Elasticsearch is responsive
while True:
    if elasticsearch_client.ping():
        break
    else:
        time.sleep(2)

In [None]:
# Verify if the index exists; if not, create it
EMBEDDINGS_DIMENSION = 1024
index_name = "my_rag_index"

mapping = {
        "index_name": index_name,
        "mappings": {
            "properties": {
                "content": {"type": "text"},
                "embeddings": {"type": "dense_vector","dims": EMBEDDINGS_DIMENSION},
                "source": {"type": "text"}
            }
        }
    }

if not elasticsearch_client.indices.exists(index=index_name):
    # If not exists create it
    elasticsearch_client.indices.create(index=index_name, body={"mappings": mapping['mappings']})
    print(f'Index "{index_name}" created successfully.')

Index "my_rag_index" created successfully.


In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer

# Load pre-trained Sentence Transformer model
MODEL = SentenceTransformer('intfloat/multilingual-e5-large')
MODEL.max_seq_length = 512

def get_vector_embeddings(content: str) -> np.ndarray:
    """
    Get embeddings for the content.

    Parameters:
    ----------
    content : str
        The input text to process.

    Returns:
    -------
    np.ndarray:
        A ndarray of embeddings, where each element
        corresponds to the embeddings of the content.
        Returns None if the computation fails.
    """
    embeddings_vector = MODEL.encode(content)
    assert embeddings_vector.shape == (EMBEDDINGS_DIMENSION,)
    return embeddings_vector

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


In [None]:
def get_embeddings_data(content, source):
    """
    Prepare data for Elasticsearch indexing.

    Parameters
    ----------
    content : str
        The textual content to be indexed.
    source : str
        The source of the content.

    Returns
    -------
    dict
        A dictionary containing the index name, content, source, and generated embeddings.
    """
    return {
        '_index': index_name,
        'content': content,
        'source': source,
        'embeddings': get_vector_embeddings(content)
    }

In [None]:
# Check dataset statistics before processing
print(len(list(set(filtered_dataset['text1']))))  # Unique contexts
print(len(filtered_dataset['text2']))  # Number of questions

5723
17071


In [None]:
# Filter dataset to retain only specific sources
filtered_dataset = filtered_dataset.filter(lambda example: example['source'] in ['piaf', 'fquad'])

# Display dataset statistics after filtering
print(len(filtered_dataset))
print(len(list(set(filtered_dataset['text1']))))
print(len(list(set(filtered_dataset.filter(lambda example: example['source'] in ['fquad'])['text2']))))

4339
996
3184


In [None]:
index_data = []
context_list = []

# Iterate through the dataset and prepare data for indexing
for index in range(len(filtered_dataset)):
    try:
        item = filtered_dataset[index]
        if index % 2000 == 0:
            print(f'--{index}, --{round(index/len(filtered_dataset)*100, 2)}%')

        # Avoid duplicate contexts
        if item['text1'] not in context_list:
            context_list.append(item['text1'])
            index_data.append(get_embeddings_data(item['text1'], item['source']))
    except Exception as error:
        print(index)
        raise error

--0, --0.0%
--2000, --46.09%
--4000, --92.19%


In [None]:
# Bulk insert indexed data into Elasticsearch
write_bulk_res = helpers.bulk(elasticsearch_client, index_data)

# Benchmark for Knowledge Retrieval in RAG-based Systems

In [None]:
! pip install cohere

Collecting cohere
  Downloading cohere-5.13.2-py3-none-any.whl.metadata (3.5 kB)
Collecting fastavro<2.0.0,>=1.9.4 (from cohere)
  Downloading fastavro-1.9.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting httpx>=0.21.2 (from cohere)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx-sse==0.4.0 (from cohere)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting parameterized<0.10.0,>=0.9.0 (from cohere)
  Downloading parameterized-0.9.0-py2.py3-none-any.whl.metadata (18 kB)
Collecting types-requests<3.0.0,>=2.0.0 (from cohere)
  Downloading types_requests-2.32.0.20241016-py3-none-any.whl.metadata (1.9 kB)
Collecting httpcore==1.* (from httpx>=0.21.2->cohere)
  Downloading httpcore-1.0.7-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx>=0.21.2->cohere)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Collecting urllib3<3,>=1.21.1 (from

In [None]:
# Define the Elasticsearch index name
index_name = "my_rag_index"

def get_candidates(sent_embeddings, source=None, nbr_candidate=30):
    """
    Retrieve candidate documents from Elasticsearch based on semantic similarity.

    Parameters
    ----------
    sent_embeddings : list
        The embedding vector of the query sentence.
    source : str, optional
        The specific dataset source to filter the documents.
    nbr_candidate : int, optional
        The number of top candidate documents to retrieve (default: 30).

    Returns
    -------
    list
        A list of best-matching documents with their ID, score, and content.
    """
    if source:
        script_query = {
            "size": nbr_candidate,
            "_source": {"includes": ["content"]},
            "query": {
                "script_score": {
                    "query": {
                        "bool": {
                            "must": {
                                "term": {"source": source}
                            }
                        }
                    },
                    "script": {
                        "source": f"cosineSimilarity(params.query_vector, 'embeddings') + 1.0",
                        "params": {"query_vector": sent_embeddings}
                        }
                    },
                }
            }
    else:
        script_query = {
            "size": nbr_candidate,
            "_source": {"includes": ["content"]},
            "query": {
                "script_score": {
                    "query": {"match_all": {}},
                    "script": {
                        "source": f"cosineSimilarity(params.query_vector, 'embeddings') + 1.0",
                        "params": {"query_vector": sent_embeddings}
                        }
                    },
                }
            }

    search_results = elasticsearch_client.search(index=index_name, body=script_query)
    best_documents = [
            {
                'document': document['_id'],
                'score': document['_score'],
                'content': document['_source']['content']
            }
            for document in search_results['hits']['hits']
        ]
    return best_documents

In [None]:
# Cohere reranker setup
COHERE_KEY='Your_cohere_key'
COHERE_RERANKER_MODEL='rerank-multilingual-v2.0'

from sentence_transformers.cross_encoder import CrossEncoder
import cohere

# Load different Cross-Encoder models for reranking
cross_model = CrossEncoder('dangvantuan/CrossEncoder-camembert-large', max_length=512)
cross_model_ft_bert = CrossEncoder('LeviatanAIResearch/cross-encoder-bert-base-fr-v1', max_length=512)
cross_model_ft_1 = CrossEncoder('LeviatanAIResearch/cross-encoder-context-question-fr-v1', max_length=512)
cross_model_ft_2 = CrossEncoder('LeviatanAIResearch/cross-encoder-context-question-fr-v2', max_length=512)
cross_model_ft_3 = CrossEncoder('LeviatanAIResearch/cross-encoder-context-question-fr-v3', max_length=512)

In [None]:
import copy

def get_reranker_result(sentence, similarity_candidates, reranker_model, number=10):
   """
    Rerank retrieved documents using different reranker models.

    Parameters
    ----------
    sentence : str
        The query sentence.
    similarity_candidates : list
        A list of retrieved documents from Elasticsearch.
    reranker_model : str
        The reranker model to use ('cohere' or different cross-encoder models).
    number : int, optional
        The number of top-ranked documents to return (default: 10).

    Returns
    -------
    list
        A sorted list of documents based on reranked relevance scores.
    """

    if reranker_model.lower() in [
        'cohere', 'cross-encoder-camembert', 'cross-encoder-ft-vbert',
        'cross-encoder-ft-v1', 'cross-encoder-ft-v2', 'cross-encoder-ft-v3'
    ]:
        candidates = copy.deepcopy(similarity_candidates)

        if reranker_model.lower() == 'cohere':
            cohere_client = cohere.Client(COHERE_KEY)
            # Extract the content of each advertisement from the provided list
            documents = [document['content'] for document in candidates]
            # Perform reranking using Cohere API
            rerank_results = cohere_client.rerank(
                query=sentence,
                documents=documents,
                top_n=number,
                model=COHERE_RERANKER_MODEL
            )
            # Update original recommendation with rerank index and relevance score
            for rerank_index in range(len(rerank_results.results)):
                rerank_result = rerank_results.results[rerank_index]

                original_index = rerank_result.index

                relevance_score = rerank_result.relevance_score
                candidates[original_index]['rerank_index'] = rerank_index
                candidates[original_index]['relevance_score'] = relevance_score
            # Filter out recommendations that didn't get reranked
            new_candidates = [recom for recom in candidates if 'rerank_index' in recom.keys()]
            sorted_data = sorted(new_candidates, key=lambda x: x['relevance_score'], reverse=True)
            return sorted_data

        else:
            documents = [document['content'] for document in candidates]
            if reranker_model.lower() == 'cross-encoder-camembert':
                # Predict relevance scores using cross-model prediction
                scores = cross_model.predict([
                    (sentence, document)
                    for document in documents
                ])
            elif reranker_model.lower() == 'cross-encoder-ft-vbert':
                scores = cross_model_ft_bert.predict([
                    (sentence, document)
                    for document in documents
                ])
            elif reranker_model.lower() == 'cross-encoder-ft-v1':
                scores = cross_model_ft_1.predict([
                    (sentence, document)
                    for document in documents
                ])
            elif reranker_model.lower() == 'cross-encoder-ft-v2':
                scores = cross_model_ft_2.predict([
                    (sentence, document)
                    for document in documents
                ])
            else:
                scores = cross_model_ft_3.predict([
                    (sentence, document)
                    for document in documents
                ])

            # Update original recommendation with relevance scores
            for index in range(len(candidates)):
                candidates[index]['relevance_score'] = float(scores[index])  # float32 to float for JSON serializability
            # Sort recommendations based on relevance scores
            sorted_reranker_result = sorted(
                candidates,
                key=lambda x: x['relevance_score'],
                reverse=True
                )
            # Select top recommendations based on nbr_recommendation
            sorted_reranker_result = sorted_reranker_result[:number]

            for index, item in enumerate(sorted_reranker_result):
                item['rerank_index'] = index
            del candidates
            return sorted_reranker_result
    else:
        raise ValueError(f'Unknown reranker model: {reranker_model}')


In [None]:
from tqdm import tqdm
import time

IS_TEST_ENV = True

def get_reranker_documents(dataset, source=None, context_tag="text1", question_tag="text2"):
    """
    Retrieve and rerank documents for a given dataset using different ranking models.

    Parameters
    ----------
    dataset : datasets.Dataset
        The dataset containing questions and contexts.
    source : str, optional
        The specific source dataset to filter (default: None, which includes all sources).
    context_tag : str, optional
        The key in the dataset that represents the context (default: "text1").
    question_tag : str, optional
        The key in the dataset that represents the question (default: "text2").

    Returns
    -------
    list
        A list of dictionaries, each containing the question, original context,
        and ranked results from various models.
    """
    reranker_documents = []

    # Filter dataset based on the source if specified
    if source:
        filtered_dataset_by_source = dataset.filter(lambda example: example['source'] == source)
    else:
        filtered_dataset_by_source = dataset

    # Reduce dataset size in test mode for faster processing
    if IS_TEST_ENV:
        filtered_dataset_by_source = filtered_dataset_by_source.select(range(0,20))

    print(filtered_dataset_by_source)
    print('dataset len is ', len(filtered_dataset_by_source))

    # Process each data entry
    for one_data in tqdm(filtered_dataset_by_source):
        question = one_data[question_tag]

        sent_embedding = get_vector_embeddings(question)
        # Retrieve candidates from Elasticsearch
        similarity_candidates = get_candidates(sent_embedding, source)
        documents_no_reranker = get_candidates(sent_embedding, source, nbr_candidate=30)

        cross_model_documents_for_one_question = get_reranker_result(
            question,
            similarity_candidates,
            'cross-encoder-camembert',
            number=10
        )

        cross_model_documents_for_one_question = [
            item['content']
            for item in cross_model_documents_for_one_question
        ]


        # LeviatanAIResearch/cross-encoder-bert-base-fr-v1
        cross_model_ft_bert_documents_for_one_question = get_reranker_result(
            question,
            similarity_candidates,
            'cross-encoder-ft-vbert',
            number=10
        )

        cross_model_ft_bert_documents_for_one_question = [
            item['content']
            for item in cross_model_ft_bert_documents_for_one_question
        ]

        # LeviatanAIResearch/cross-encoder-context-question-fr-v1
        cross_model_ft_1_documents_for_one_question = get_reranker_result(
            question,
            similarity_candidates,
            'cross-encoder-ft-v1',
            number=10
        )

        cross_model_ft_1_documents_for_one_question = [
            item['content']
            for item in cross_model_ft_1_documents_for_one_question
        ]


        # LeviatanAIResearch/cross-encoder-context-question-fr-v2
        cross_model_ft_2_documents_for_one_question = get_reranker_result(
            question,
            similarity_candidates,
            'cross-encoder-ft-v2',
            number=10
        )

        cross_model_ft_2_documents_for_one_question = [
            item['content']
            for item in cross_model_ft_2_documents_for_one_question
        ]

        # LeviatanAIResearch/cross-encoder-context-question-fr-v3
        cross_model_ft_3_documents_for_one_question = get_reranker_result(
            question,
            similarity_candidates,
            'cross-encoder-ft-v3',
            number=10
        )
        cross_model_ft_3_documents_for_one_question = [
            item['content']
            for item in cross_model_ft_3_documents_for_one_question
        ]

        # cohere
        cohere_documents_for_one_question = get_reranker_result(
            question,
            similarity_candidates,
            'cohere',
            number=10
        )

        cohere_documents_for_one_question = [
            item['content']
            for item in cohere_documents_for_one_question
        ]

        # Construct final result dictionary
        element = {
            "question":question,
            "context":one_data[context_tag],
            "no_reranker": documents_no_reranker,
            "cohere": cohere_documents_for_one_question,
            "cross_encoder": cross_model_documents_for_one_question,
            "cross-encoder-ft-vbert": cross_model_ft_bert_documents_for_one_question,
            "cross-encoder-ft-v1": cross_model_ft_1_documents_for_one_question,
            "cross-encoder-ft-v2": cross_model_ft_2_documents_for_one_question,
            "cross-encoder-ft-v3": cross_model_ft_3_documents_for_one_question
        }

        # Append result to the list
        reranker_documents.append(element)

        # Optional: Save intermediate results to a file
        """
        with open(f'temp_reranker_document_{source}.json', 'a') as temp_file:
            output_utf8 = json.dumps(element, indent=4, ensure_ascii=False).encode('utf8')
            temp_file.write(output_utf8.decode())
        """
    return reranker_documents

In [None]:
filtered_dataset

Dataset({
    features: ['text1', 'text2', 'label', 'source'],
    num_rows: 4339
})

In [None]:
# Test
source = 'fquad'
filtered_dataset.filter(lambda example: example['source'] == source)
reranker_documents = get_reranker_documents(
    filtered_dataset,
    source,
    list(filtered_dataset.features.keys())[0],   # context_tag
    list(filtered_dataset.features.keys())[1]    # question_tag
)

Dataset({
    features: ['text1', 'text2', 'label', 'source'],
    num_rows: 20
})
dataset len is  20


  0%|          | 0/20 [00:00<?, ?it/s]  5%|▌         | 1/20 [00:03<01:09,  3.65s/it] 10%|█         | 2/20 [00:05<00:46,  2.56s/it] 15%|█▌        | 3/20 [00:07<00:37,  2.23s/it] 20%|██        | 4/20 [00:08<00:32,  2.02s/it] 25%|██▌       | 5/20 [00:11<00:31,  2.11s/it] 30%|███       | 6/20 [00:12<00:27,  1.93s/it] 35%|███▌      | 7/20 [00:14<00:23,  1.84s/it] 40%|████      | 8/20 [00:16<00:21,  1.83s/it] 45%|████▌     | 9/20 [00:18<00:20,  1.84s/it] 50%|█████     | 10/20 [00:19<00:18,  1.81s/it] 55%|█████▌    | 11/20 [00:21<00:16,  1.88s/it] 60%|██████    | 12/20 [00:23<00:14,  1.87s/it] 65%|██████▌   | 13/20 [00:25<00:12,  1.86s/it] 70%|███████   | 14/20 [00:27<00:11,  1.85s/it] 75%|███████▌  | 15/20 [00:29<00:09,  1.84s/it] 80%|████████  | 16/20 [00:30<00:07,  1.81s/it] 85%|████████▌ | 17/20 [00:32<00:05,  1.80s/it] 90%|█████████ | 18/20 [00:34<00:03,  1.81s/it] 95%|█████████▌| 19/20 [00:36<00:01,  1.81s/it]100%|██████████| 20/20 [00:38<00:00,  1.75s/it]100%|████

In [None]:
#for source in ['fquad', 'pandora', 'piaf']:
#for source in ['pandora', 'piaf']:
for source in ['piaf']:
    reranker_documents = get_reranker_documents(filtered_dataset, source)

    with open(f'temp_camenbert_reranker_document_{source}.json', 'w') as file:
        output_utf8 = json.dumps(reranker_documents, indent=4, ensure_ascii=False).encode('utf8')
        file.write(output_utf8.decode())


dataset len is  1151


  0%|          | 0/1151 [00:00<?, ?it/s]  0%|          | 1/1151 [00:00<05:36,  3.42it/s]  0%|          | 2/1151 [00:00<06:16,  3.05it/s]  0%|          | 3/1151 [00:00<05:46,  3.32it/s]  0%|          | 4/1151 [00:01<05:26,  3.51it/s]  0%|          | 5/1151 [00:01<05:25,  3.52it/s]  1%|          | 6/1151 [00:01<04:58,  3.84it/s]  1%|          | 7/1151 [00:01<04:42,  4.05it/s]  1%|          | 8/1151 [00:02<04:40,  4.07it/s]  1%|          | 9/1151 [00:02<04:33,  4.17it/s]  1%|          | 10/1151 [00:02<04:33,  4.18it/s]  1%|          | 11/1151 [00:02<04:26,  4.27it/s]  1%|          | 12/1151 [00:03<04:28,  4.24it/s]  1%|          | 13/1151 [00:03<04:34,  4.15it/s]  1%|          | 14/1151 [00:03<04:49,  3.93it/s]  1%|▏         | 15/1151 [00:03<04:56,  3.83it/s]  1%|▏         | 16/1151 [00:04<04:50,  3.90it/s]  1%|▏         | 17/1151 [00:04<04:55,  3.84it/s]  2%|▏         | 18/1151 [00:04<04:48,  3.93it/s]  2%|▏         | 19/1151 [00:04<05:00,  3.77it/s]  2%|▏         | 

In [None]:
len(reranker_documents[0]['no_reranker'])

30

In [None]:
####### Pour évaluer
import json

#with open('reranker_document_piaf.json', 'r') as file:
#    reranker_documents = json.load(file)

for item in reranker_documents:
    # no_reranker est un obj dict, qui contient les élements document id, score et content.
    # Car no reranker est un resultat de ES. Il faut nettoyer avant de faire l'étape suivante.
    no_reranker = item['no_reranker']
    no_reranker_cleaned = [res['content'] for res in no_reranker]
    item['no_reranker'] = no_reranker_cleaned


In [None]:
def evaluate_algorithm(algorithm_key, k=10):
    """
    Evaluate a ranking algorithm based on Precision@K and MRR.

    Parameters
    ----------
    algorithm_key : str
        The key corresponding to the ranking algorithm results.
    k : int, optional
        The number of top documents to consider for evaluation (default: 10).

    Returns
    -------
    tuple
        Precision@K and Mean Reciprocal Rank (MRR) scores.
    """
    total_questions = len(reranker_documents)
    hits_at_k = 0
    sum_reciprocal_rank = 0

    for entry in reranker_documents:
        correct_context = entry["context"]
        predicted_contexts = entry[algorithm_key][:k]  # Top-k documents
        #print('correct_context:',correct_context ,', predicted_contexts:',predicted_contexts)
        # Check if the correct context is within the top-k predictions
        if correct_context in predicted_contexts:
            hits_at_k += 1
            rank = predicted_contexts.index(correct_context) + 1
            sum_reciprocal_rank += 1 / rank
    #print(hits_at_k)
    precision_at_k = hits_at_k / total_questions
    mrr = sum_reciprocal_rank / total_questions
    return precision_at_k, mrr

In [None]:
top_k = 10
precision_at_k_no_reranker, mrr_no_reranker = evaluate_algorithm('no_reranker', k=top_k)
precision_at_k_cohere, mrr_cohere = evaluate_algorithm('cohere', k=top_k)
precision_at_k_cross_encoder, mrr_cross_encoder = evaluate_algorithm('cross_encoder', k=top_k)
precision_at_k_cross_encoder_ft_vbert, mrr_cross_encoder_ft_vbert = evaluate_algorithm('cross-encoder-ft-vbert', k=top_k)
precision_at_k_cross_encoder_ft_v1, mrr_cross_encoder_ft_v1 = evaluate_algorithm('cross-encoder-ft-v1', k=top_k)
precision_at_k_cross_encoder_ft_v2, mrr_cross_encoder_ft_v2 = evaluate_algorithm('cross-encoder-ft-v2', k=top_k)
precision_at_k_cross_encoder_ft_v3, mrr_cross_encoder_ft_v3 = evaluate_algorithm('cross-encoder-ft-v3', k=top_k)

In [None]:
print('precision_at_k_no_reranker:', precision_at_k_no_reranker, ', mrr_no_reranker:', mrr_no_reranker)

print('precision_at_k_cohere:', precision_at_k_cohere, ', mrr_cohere:', mrr_cohere)
print('precision_at_k_cross_encoder:', precision_at_k_cross_encoder, ', mrr_cross_encoder:', mrr_cross_encoder)
print('precision_at_k_cross_encoder_ft_vbert:', precision_at_k_cross_encoder_ft_vbert, ', mrr_cross_encoder_ft_vbert:', mrr_cross_encoder_ft_vbert)
print('precision_at_k_cross_encoder_ft_v1:', precision_at_k_cross_encoder_ft_v1, ', mrr_cross_encoder_ft_vbert:', mrr_cross_encoder_ft_v1)
print('precision_at_k_cross_encoder_ft_v2:', precision_at_k_cross_encoder_ft_v2, ', mrr_cross_encoder_ft_v2:', mrr_cross_encoder_ft_v2)
print('precision_at_k_cross_encoder_ft_v3:', precision_at_k_cross_encoder_ft_v3, ', mrr_cross_encoder_ft_v3:', mrr_cross_encoder_ft_v3)

precision_at_k_no_reranker: 1.0 , mrr_no_reranker: 0.757142857142857
precision_at_k_cohere: 1.0 , mrr_cohere: 0.95
precision_at_k_cross_encoder: 0.7 , mrr_cross_encoder: 0.25720238095238096
precision_at_k_cross_encoder_ft_vbert: 1.0 , mrr_cross_encoder_ft_vbert: 0.8488095238095237
precision_at_k_cross_encoder_ft_v1: 0.55 , mrr_cross_encoder_ft_vbert: 0.17152777777777778
precision_at_k_cross_encoder_ft_v2: 0.4 , mrr_cross_encoder_ft_v2: 0.12547619047619046
precision_at_k_cross_encoder_ft_v3: 0.95 , mrr_cross_encoder_ft_v3: 0.6116666666666667
