## Installation

In [None]:
!pip install ranx

Collecting ranx
  Downloading ranx-0.3.19-py3-none-any.whl (99 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting ir-datasets (from ranx)
  Downloading ir_datasets-0.5.7-py3-none-any.whl (337 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.9/337.9 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Collecting orjson (from ranx)
  Downloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m142.5/142.5 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lz4 (from ranx)
  Downloading lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cbor2 (from ranx)
  Downloading cbor2-5.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux20

### Imports

In [None]:
import os
import gzip
import logging
from collections import defaultdict
from ranx import Qrels, Run, fuse, optimize_fusion, evaluate
from google.colab import drive
from ranx import fuse, optimize_fusion, evaluate
import numpy as np
from google.colab import drive
import os
from ranx import Run

## Some Preparation

### Get qrels

In [None]:
data_folder = 'trec2019-data'
os.makedirs(data_folder, exist_ok=True)

!wget https://msmarco.z22.web.core.windows.net/msmarcoranking/queries.tar.gz
!tar -xvzf queries.tar.gz

model_save_path = "/content/gdrive/MyDrive/cross-encoder-reranker-ir-course-2023/finetuned_models/cross-encoder-distilbert-distilroberta-base-2024-05-12_07-39-41"

queries = {}
queries_filepath = os.path.join(data_folder, 'msmarco-test2019-queries.tsv.gz')
if not os.path.exists(queries_filepath):
    logging.info("Download " + os.path.basename(queries_filepath))
    !wget https://msmarco.z22.web.core.windows.net/msmarcoranking/msmarco-test2019-queries.tsv.gz -O {queries_filepath}

with gzip.open(queries_filepath, 'rt', encoding='utf8') as fIn:
    for line in fIn:
        qid, query = line.strip().split("\t")
        queries[qid] = query

relevant_docs = defaultdict(lambda: defaultdict(int))
qrels_filepath = os.path.join(data_folder, '2019qrels-pass.txt')

if not os.path.exists(qrels_filepath):
    logging.info("Download " + os.path.basename(qrels_filepath))
    !wget https://trec.nist.gov/data/deep/2019qrels-pass.txt -O {qrels_filepath}

with open(qrels_filepath) as fIn:
    for line in fIn:
        qid, _, pid, score = line.strip().split()
        score = int(score)
        if score > 0:
            relevant_docs[qid][pid] = score

relevant_qid = [qid for qid in queries if len(relevant_docs[qid]) > 0]

qrels = Qrels(relevant_docs)

--2024-05-17 17:34:01--  https://msmarco.z22.web.core.windows.net/msmarcoranking/queries.tar.gz
Resolving msmarco.z22.web.core.windows.net (msmarco.z22.web.core.windows.net)... 20.150.34.1
Connecting to msmarco.z22.web.core.windows.net (msmarco.z22.web.core.windows.net)|20.150.34.1|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 18882551 (18M) [application/gzip]
Saving to: ‘queries.tar.gz’


2024-05-17 17:34:01 (33.7 MB/s) - ‘queries.tar.gz’ saved [18882551/18882551]

queries.dev.tsv
queries.eval.tsv
queries.train.tsv
--2024-05-17 17:34:02--  https://msmarco.z22.web.core.windows.net/msmarcoranking/msmarco-test2019-queries.tsv.gz
Resolving msmarco.z22.web.core.windows.net (msmarco.z22.web.core.windows.net)... 20.150.34.1
Connecting to msmarco.z22.web.core.windows.net (msmarco.z22.web.core.windows.net)|20.150.34.1|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4276 (4.2K) [application/x-gzip]
Saving to: ‘trec2019-data/msmarco-test2019-

### Get Run Files

In [None]:

drive.mount('/content/gdrive')

base_path = "/content/gdrive/MyDrive/cross-encoder-reranker-ir-course-2023/"

file_paths = [
    "finetuned_models/cross-encoder-cross-encoder-ms-marco-MiniLM-L-2-v2-2024-05-10_20-46-58ranking.run",
    "finetuned_models/cross-encoder-cross-encoder-ms-marco-TinyBERT-L-2-v2-2024-05-11_07-11-32ranking.run",
    "finetuned_models/cross-encoder-distilbert-distilroberta-base-2024-05-12_07-39-41ranking.run"
]

runs = {}
runs['mini_tiny'] = []
runs['mini_dist'] = []
runs['tiny_dist'] = []
for file_path in file_paths:
    full_path = os.path.join(base_path, file_path)
    run = Run.from_file(full_path, kind="trec")
    # runs.append(run)
    if 'MiniLM' in file_path:
        runs['mini_tiny'].append(run)
        runs['mini_dist'].append(run)
    elif 'distilroberta' in file_path:
        runs['mini_dist'].append(run)
        runs['tiny_dist'].append(run)
    else:
        runs['mini_tiny'].append(run)
        runs['tiny_dist'].append(run)

common_qids = set(qrels.qrels.keys()).intersection(*[set(run.keys()) for run_list in runs.values() for run in run_list])

filtered_qrels = {qid: {doc_id: score for doc_id, score in qrels[qid].items()} for qid in common_qids}
filtered_runs = {key: [{qid: run[qid] for qid in common_qids} for run in run_list] for key, run_list in runs.items()}

qrels = Qrels(filtered_qrels)
runs = {key: [Run(run) for run in run_list] for key, run_list in filtered_runs.items()}

Mounted at /content/gdrive


### Get best_params

## Ensemble Methods

### 1. RRF (Reciprocal Rank Fusion, Rank-based Method)

In [None]:
# 1. RRF (Reciprocal Rank Fusion, Rank-based Method)
for key, run_list in runs.items():
    print(key,":")

    best_params = optimize_fusion(
        qrels=qrels,
        runs=run_list,
        norm="min-max",
        method="rrf",
        metric="ndcg@100"
    )

    combined_run = fuse(
        runs=run_list,
        norm="min-max",
        method="rrf",
        params=best_params,
    )

    metrics = evaluate(
        qrels=qrels,
        run=combined_run,
        metrics=["ndcg@10", "recall@100", "map@1000"]
    )

    print(metrics)

    ndcg_10 = metrics['ndcg@10'] * 100
    recall_100 = metrics['recall@100'] * 100
    map_1000 = metrics['map@1000'] * 100

    print(f"Queries: {len(common_qids)}")
    print(f"NDCG@10: {ndcg_10:.2f}")
    print(f"Recall@100: {recall_100:.2f}")
    print(f"MAP@1000: {map_1000:.2f}")

mini_tiny :


  normalized_run[i] = _min_max_norm(run[q_ids[i]])


Output()

Output()

{'ndcg@10': 0.7017241244092186, 'recall@100': 0.5107037217852476, 'map@1000': 0.46292597954111364}
Queries: 43
NDCG@10: 70.17
Recall@100: 51.07
MAP@1000: 46.29
mini_dist :


Output()

{'ndcg@10': 0.6948928461219978, 'recall@100': 0.5087785428110266, 'map@1000': 0.45208456390151586}
Queries: 43
NDCG@10: 69.49
Recall@100: 50.88
MAP@1000: 45.21
tiny_dist :


{'ndcg@10': 0.6931552509738508, 'recall@100': 0.5102368802947537, 'map@1000': 0.45373965281928735}
Queries: 43
NDCG@10: 69.32
Recall@100: 51.02
MAP@1000: 45.37


### 2. BayesFuse

In [None]:
# 2. BayesFuse
for key, run_list in runs.items():
    print(key,":")

    best_params = optimize_fusion(
        qrels=qrels,
        runs=run_list,
        norm="min-max",
        method="bayesfuse",
        metric="ndcg@100"
    )
    combined_run = fuse(
        runs=run_list,
        norm="min-max",
        method="bayesfuse",
        params=best_params,
    )

    metrics = evaluate(
        qrels=qrels,
        run=combined_run,
        metrics=["ndcg@10", "recall@100", "map@1000"]
    )

    print(metrics)

    ndcg_10 = metrics['ndcg@10'] * 100
    recall_100 = metrics['recall@100'] * 100
    map_1000 = metrics['map@1000'] * 100

    print(f"Queries: {len(common_qids)}")
    print(f"NDCG@10: {ndcg_10:.2f}")
    print(f"Recall@100: {recall_100:.2f}")
    print(f"MAP@1000: {map_1000:.2f}")

mini_tiny :
{'ndcg@10': 0.6975316118384749, 'recall@100': 0.5087120014533808, 'map@1000': 0.45051896402929853}
Queries: 43
NDCG@10: 69.75
Recall@100: 50.87
MAP@1000: 45.05
mini_dist :
{'ndcg@10': 0.6903775108035254, 'recall@100': 0.507881034349528, 'map@1000': 0.4458595220134241}
Queries: 43
NDCG@10: 69.04
Recall@100: 50.79
MAP@1000: 44.59
tiny_dist :
{'ndcg@10': 0.6914192912924512, 'recall@100': 0.5105875935564295, 'map@1000': 0.44935994823557374}
Queries: 43
NDCG@10: 69.14
Recall@100: 51.06
MAP@1000: 44.94
