In [35]:
!pip install ir_datasets
!pip install rank_bm25
!pip install sentence_transformers
!pip install pytrec_eval



In [36]:
from tqdm import tqdm
import json
import ir_datasets
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
from rank_bm25 import BM25Okapi
from transformers import pipeline
import torch
from huggingface_hub import login
import pytrec_eval

api_key = "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"
login(token=api_key)

# Check GPU availability
def get_device():
    if torch.cuda.is_available():
        device = "cuda"
        gpu_properties = torch.cuda.get_device_properties(torch.cuda.current_device())
        print(f"Using GPU: {gpu_properties.name}")
        print(f"CUDA Cores: {gpu_properties.multi_processor_count}")
        print(f"Total Memory: {gpu_properties.total_memory / 1e9:.2f} GB")
        print(f"Compute Capability: {gpu_properties.major}.{gpu_properties.minor}")
    else:
        device = "cpu"
        print("Using CPU")
    return device

device = get_device()


Using GPU: Tesla T4
CUDA Cores: 40
Total Memory: 15.84 GB
Compute Capability: 7.5


# Section 1: Dataset loading and preparation

In [37]:
# Load dataset
print("Loading the trec covid dataset...")
dataset = ir_datasets.load("cord19/trec-covid")

# Prepare documents and queries
print("Preparing documents and queries...")
all_docs = [{"doc_id": doc.doc_id, "abstract": doc.abstract} for doc in dataset.docs_iter()]
all_queries = [{"query_id": query.query_id, "title": query.title} for query in dataset.queries_iter()]

# Print dataset size information
print(f"Summary: {len(all_docs)} documents and {len(all_queries)} queries are available in the dataset.")

tokenized_docs = [doc['abstract'].split() for doc in all_docs]
qrels = dataset.qrels

Loading the trec covid dataset...
Preparing documents and queries...
Summary: 192509 documents and 50 queries are available in the dataset.


In [38]:
# convert qrels to a dictionary
qrels_dict = {}
for qrel in qrels:
    if qrel.query_id not in qrels_dict:
        qrels_dict[qrel.query_id] = {}
    qrels_dict[qrel.query_id][qrel.doc_id] = qrel.relevance

{'1': {'005b2j4b': 2,
  '00fmeepz': 1,
  '010vptx3': 2,
  '0194oljo': 1,
  '021q9884': 1,
  '02f0opkr': 1,
  '047xpt2c': 0,
  '04ftw7k9': 0,
  '05qglt1f': 0,
  '05vx82oo': 0,
  '0604jed8': 0,
  '06o2tbon': 0,
  '06ya15z8': 0,
  '084o1dmp': 0,
  '08ds967z': 1,
  '08efpohc': 0,
  '0b4o0ccp': 0,
  '0be4wta5': 2,
  '0chuwvg6': 2,
  '0cvoeiy0': 0,
  '0e1w86tg': 1,
  '0evw0fc5': 1,
  '0hnh4n9e': 1,
  '0iq9s94n': 1,
  '0khg28ex': 0,
  '0l33i6s4': 0,
  '0lyxvex0': 0,
  '0m5mc320': 0,
  '0ne21in2': 1,
  '0nh58odf': 2,
  '0oiq44gl': 0,
  '0paafp5j': 0,
  '0pbjttv4': 0,
  '0qaoam29': 0,
  '0qpfoh5t': 0,
  '0t2a5500': 2,
  '0ti403i4': 0,
  '0v5wo0ty': 1,
  '0xhho1sh': 2,
  '0xruezf2': 1,
  '0y34yxlb': 2,
  '105q161g': 2,
  '10ecm4wi': 1,
  '11edrkav': 2,
  '127c5bve': 2,
  '12dcftwt': 2,
  '13ir7swr': 2,
  '13jupb26': 0,
  '1585stal': 0,
  '16ciqu9w': 0,
  '16rgt4ca': 0,
  '18xs6375': 0,
  '1a8uevk8': 0,
  '1aal6njl': 0,
  '1abp6oom': 2,
  '1ag9jkk6': 0,
  '1bapn9w0': 0,
  '1bpc8g6n': 0,
  '1bvsn9

# Section 2: Embeddings generation

In [39]:
# Load or generate embeddings
def generate_embeddings():
    if os.path.exists("trec_covid_doc_embeddings.csv") and os.path.exists("trec_covid_query_embeddings.csv"):
        print("Loading precomputed embeddings...")
        doc_embeddings = pd.read_csv("trec_covid_doc_embeddings.csv").values
        query_embeddings = pd.read_csv("trec_covid_query_embeddings.csv").values
    else:
        print("No precomputed embeddings found.")
        print("Generating new embeddings using SentenceTransformer model 'all-MiniLM-L6-v2'.")
        model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
        doc_embeddings = model.encode(all_docs, batch_size=32, show_progress_bar=True)
        query_embeddings = model.encode(all_queries, batch_size=32, show_progress_bar=True)

        # Save embeddings for future use
        pd.DataFrame(doc_embeddings).to_csv("trec_covid_doc_embeddings.csv", index=False)
        pd.DataFrame(query_embeddings).to_csv("trec_covid_query_embeddings.csv", index=False)

    return doc_embeddings, query_embeddings

doc_embeddings, query_embeddings = generate_embeddings()

Loading precomputed embeddings...


# Section 3: Retrieval implementation

### Evaluation metrics
The following functions are used to evaluate the quality of document retrieval methods based on the ranked list of documents returned for a given query.

In [41]:
# Function to prepare run data for pytrec_eval
def prepare_run_data(results):
    """
    Prepares the run data in the format expected by pytrec_eval.
    Converts numpy scores to native Python float for compatibility.
    """
    run = {}
    for query_results in results:
        query_id = query_results['query']['query_id']
        run[query_id] = {}
        for doc_id, score in zip(query_results['results'], query_results['scores']):
            run[query_id][doc_id] = float(score)  # Convert numpy type to float
    return run

### Document Retrieval Methods

1. **BM25 Sparse Retrieval**:
   - The **BM25 algorithm** is used to perform sparse retrieval on tokenized documents by calculating a relevance score for each document based on the query. It then returns the indices and relevance scores of the top-k most relevant documents.

2. **Dense Retrieval**:
   - **Dense retrieval** is performed by calculating the cosine similarity between the query embedding and the document embeddings. The top-k documents with the highest similarity scores are returned.

3. **Rank Fusion Retrieval**:
   - Results from both **BM25** and **dense retrieval** are combined using a **rank fusion** technique. Scores from both methods are normalized, weighted by a parameter `alpha`, and the top-k documents are returned based on the combined scores.

4. **Cascading Retrieval**:
   - Initially, a set of documents is retrieved using **BM25**. These documents are then re-ranked using dense retrieval, with a similarity threshold applied to filter documents. The top-k documents are returned based on the final ranking.

In [57]:
# BM25 Sparse Retrieval
def bm25_retrieve(query, bm25, top_k=5):
    """
    Perform sparse retrieval using BM25 on the tokenized documents.
    Returns the indices and scores of the top-k documents.
    """
    tokenized_query = query.split()                                             # Tokenize the query into words
    scores = bm25.get_scores(tokenized_query)                                   # Get BM25 scores for all documents
    top_k_indices = np.argsort(scores)[-top_k:][::-1]                           # Get indices of top-k documents based on BM25 score
    return top_k_indices, scores[top_k_indices]

# Dense Retrieval
def dense_retrieve(query_embedding, doc_embeddings, top_k=5):
    """
    Perform dense retrieval using cosine similarity between query and document embeddings.
    Returns the indices and similarities of the top-k documents.
    """
    similarities = cosine_similarity([query_embedding], doc_embeddings)[0]      # Compute cosine similarity
    top_k_indices = np.argsort(similarities)[-top_k:][::-1]                     # Get top-k indices based on similarity
    return top_k_indices, similarities[top_k_indices]

# Rank Fusion Retrieval
def fusion_retrieve(dense_query_embedding, doc_embeddings, query, top_k=5, alpha=0.25):
    """
    Implementa il rank fusion riutilizzando le funzioni esistenti di retrieval.
    """

    # Perform BM25 retrieval and dense retrieval
    sparse_indices, sparse_scores = bm25_retrieve(query, bm25, top_k=len(doc_embeddings))
    dense_indices, dense_scores = dense_retrieve(dense_query_embedding, doc_embeddings, top_k=len(doc_embeddings))

    # Initialize score arrays
    all_sparse_scores = np.zeros(len(doc_embeddings))
    all_dense_scores = np.zeros(len(doc_embeddings))

    # Fill score arrays with BM25 and dense scores
    all_sparse_scores[sparse_indices] = sparse_scores
    all_dense_scores[dense_indices] = dense_scores

    if np.min(all_sparse_scores) == np.max(all_sparse_scores):
        print("All sparse scores are the same.")

    if np.min(all_dense_scores) == np.max(all_dense_scores):
        print("All dense scores are the same.")

    # Normalize scores
    all_sparse_scores = (all_sparse_scores - all_sparse_scores.min()) / (all_sparse_scores.max() - all_sparse_scores.min())
    all_dense_scores = (all_dense_scores - all_dense_scores.min()) / (all_dense_scores.max() - all_dense_scores.min())

    print("BM25 Scores:", all_sparse_scores)
    print("Dense Scores:", all_dense_scores)

    # Combine scores using the alpha parameter
    combined_scores = alpha * all_dense_scores + (1 - alpha) * all_sparse_scores

    # Retrieve the top-k results based on combined scores
    top_k_indices = np.argsort(combined_scores)[-top_k:][::-1]
    return top_k_indices, combined_scores[top_k_indices]

# Cascading Retrieval
def cascade_retrieve(dense_query_embedding, doc_embeddings, query, initial_k=100, final_k=5, dense_threshold=0.7):
    """
    Perform cascading retrieval: sparse retrieval followed by dense re-ranking.
    Filters documents based on a similarity threshold and returns the top-k results.
    """
    # Stage 1: BM25 to get initial candidates
    initial_indices, _ = bm25_retrieve(query, bm25, top_k=initial_k)

    # Stage 2: Dense re-ranking of candidate documents
    candidate_embeddings = doc_embeddings[initial_indices]
    _, dense_scores = dense_retrieve(dense_query_embedding, candidate_embeddings, top_k=len(initial_indices))

    # Filter candidates by similarity threshold
    qualified_mask = dense_scores >= dense_threshold
    if np.sum(qualified_mask) >= final_k:
        # Select top-k qualified candidates
        qualified_indices = np.where(qualified_mask)[0]
        top_indices = qualified_indices[np.argsort(dense_scores[qualified_indices])[-final_k:][::-1]]
    else:
        # If there are not enough qualified candidates, select top-k by overall scores
        top_indices = np.argsort(dense_scores)[-final_k:][::-1]

    # Map filtered indices to original document IDs
    final_indices = initial_indices[top_indices]
    final_scores = dense_scores[top_indices]

    return final_indices, final_scores



In [43]:
# Initialize BM25 model
print("Initializing BM25 model.")
bm25 = BM25Okapi(tokenized_docs)

Initializing BM25 model.


This section of code performs several retrieval experiments using the four different Document Retrieval Methods described earlier.

In [58]:
# Run retrieval experiments
def run_retrieval_experiments():
    """
    Execute sparse, dense, rank fusion, and cascading retrieval for all queries.
    Save the results to a JSON file for further analysis.
    """
    results = {"sparse": [], "dense": [], "rank_fusion": [], "cascade": []}

    print("Running retrieval experiments on all queries.")

    # Iterate over each query and its embedding
    for query, query_embedding in tqdm(zip(all_queries, query_embeddings), total=len(all_queries)):
        # Extract the query ID and text for the current query
        query_id = query['query_id']
        query_text = query['title'] if isinstance(query, dict) else query

        print("Query n.1. ID:", query_id, "Title: ", query_text)

        # Sparse Retrieval using BM25
        sparse_indices, sparse_scores = bm25_retrieve(query_text, bm25)                 # Retrieve the top-k BM25 documents and their scores
        sparse_docs = [all_docs[idx]['doc_id'] for idx in sparse_indices]               # Get document IDs from the indices
        results["sparse"].append({"query": query, "results": sparse_docs, "scores": sparse_scores}) # Store the BM25 results for the current query

        print("Sparse Retrieval Results:", sparse_docs)
        print("Sparse Retrieval Scores:", sparse_scores)

        # Dense Retrieval using cosine similarity
        dense_indices, dense_scores = dense_retrieve(query_embedding, doc_embeddings)   # Retrieve the top-k documents based on cosine similarity of embeddings
        dense_docs = [all_docs[idx]['doc_id'] for idx in dense_indices]
        results["dense"].append({"query": query, "results": dense_docs, "scores": dense_scores})

        print("Dense Retrieval Results:", dense_docs)
        print("Dense Retrieval Scores:", dense_scores)

        # Rank Fusion Retrieval by combining sparse (BM25) and dense result
        fusion_indices, fusion_scores = fusion_retrieve(                                # Combine BM25 and cosine similarity results
            query_embedding, doc_embeddings, query_text
        )
        fusion_docs = [all_docs[idx]['doc_id'] for idx in fusion_indices]
        results["rank_fusion"].append({"query": query, "results": fusion_docs, "scores": fusion_scores})

        # Cascade Retrieval: First use BM25, then re-rank using dense retrieval
        cascade_indices, cascade_scores = cascade_retrieve(                             # Perform cascading retrieval
            query_embedding, doc_embeddings, query_text
        )
        cascade_docs = [all_docs[idx]['doc_id'] for idx in cascade_indices]
        results["cascade"].append({"query": query, "results": cascade_docs, "scores": cascade_scores})

    return results

results = run_retrieval_experiments()


Running retrieval experiments on all queries.


  0%|          | 0/50 [00:00<?, ?it/s]

Query n.1. ID: 1 Title:  coronavirus origin
Sparse Retrieval Results: ['8ccl9aui', 'u65mey2z', 'hewbl5yu', 'dv9m19yk', 'es7q6c90']
Sparse Retrieval Scores: [10.11986678 10.09531025 10.05892626  9.75917639  9.63448018]
Dense Retrieval Results: ['1ycinsg1', '1yfunge1', '1u1iesy3', '1yjope1j', '1untezgg']
Dense Retrieval Scores: [0.58246428 0.57607107 0.54519026 0.54077532 0.53885728]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.67459308 0.51912483 0.39291063 ... 0.35370136 0.56100235 0.2765191 ]


  2%|▏         | 1/50 [00:01<01:22,  1.69s/it]

Query n.1. ID: 2 Title:  coronavirus response to weather changes
Sparse Retrieval Results: ['526elsrf', '9svrz0vj', 'aiwxlxzt', '0mikqjpj', 'kftchnhz']
Sparse Retrieval Scores: [19.2632898  19.22108577 19.22108577 18.36905419 18.08809198]
Dense Retrieval Results: ['2untuas4', '2yeulry4', '2elwonly', '2itui3ar', '2faiyhks']
Dense Retrieval Scores: [0.56771269 0.56272161 0.55647147 0.55579314 0.55382406]
BM25 Scores: [0.19907016 0.27906757 0.46560455 ... 0.27452545 0.         0.        ]
Dense Scores: [0.64108848 0.56509982 0.4803209  ... 0.36662061 0.5274373  0.32166631]


  4%|▍         | 2/50 [00:04<01:56,  2.43s/it]

Query n.1. ID: 3 Title:  coronavirus immunity
Sparse Retrieval Results: ['t9u7d029', '3i466i1y', '73oe5as9', '3ol5ozz6', 'wh9vvgv2']
Sparse Retrieval Scores: [10.45374692 10.06667735  9.91835967  9.89903255  9.44797959]
Dense Retrieval Results: ['3eaeemt6', '3rlazwsy', '8auinhs3', '3zialfh3', '3izj33j6']
Dense Retrieval Scores: [0.59065851 0.57552971 0.56135613 0.55851154 0.55553457]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.6243153  0.46528471 0.36633937 ... 0.48396606 0.51748647 0.29338235]


  6%|▌         | 3/50 [00:06<01:50,  2.35s/it]

Query n.1. ID: 4 Title:  how do people die from the coronavirus
Sparse Retrieval Results: ['n95j94ck', 'c0pzjq4a', '3pklqjbx', 'ktl1x03p', 'mpvkalu4']
Sparse Retrieval Scores: [21.00495924 21.00495924 21.00495924 20.33982549 19.92623384]
Dense Retrieval Results: ['4izymiy4', '4pcoouts', '4yardtss', '4notldie', '4uaa6kpg']
Dense Retrieval Scores: [0.6408068  0.61864425 0.61749951 0.61640202 0.6021475 ]
BM25 Scores: [0.28850449 0.29313731 0.30355803 ... 0.33300053 0.         0.        ]
Dense Scores: [0.59205567 0.54698545 0.32943042 ... 0.37396538 0.52445921 0.25951286]


  8%|▊         | 4/50 [00:10<02:09,  2.80s/it]

Query n.1. ID: 5 Title:  animal models of COVID-19
Sparse Retrieval Results: ['oa8vzf02', '3sepefqa', 'vobslprh', '5vu27b0p', 'y1b1vf1b']
Sparse Retrieval Scores: [20.44605516 20.44605516 20.17362148 19.94815441 19.55986288]
Dense Retrieval Results: ['5ddfkwly', '5ubdynh5', '5ai6tfgg', '5fejitls', '5cpkkygs']
Dense Retrieval Scores: [0.63102884 0.62999923 0.62690505 0.61465242 0.61306237]
BM25 Scores: [0.25299528 0.30687034 0.29731714 ... 0.2947856  0.         0.        ]
Dense Scores: [0.51868999 0.39820369 0.28190618 ... 0.40046613 0.56792264 0.29402226]


 10%|█         | 5/50 [00:12<01:56,  2.60s/it]

Query n.1. ID: 6 Title:  coronavirus test rapid testing
Sparse Retrieval Results: ['sw23wf4b', 'n5du3bqt', '1dr4r3n4', 'zij2wbzs', 'giy00lt5']
Sparse Retrieval Scores: [17.27772247 15.82293092 15.63405586 14.92818926 14.79807079]
Dense Retrieval Results: ['6sgeraws', '6ssdamhp', '6dagtkkc', '6muareue', '6om7unal']
Dense Retrieval Scores: [0.70072885 0.68858516 0.67334051 0.67153277 0.67001969]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.53374921 0.43388508 0.27098484 ... 0.3444227  0.50793994 0.26896662]


 12%|█▏        | 6/50 [00:14<01:46,  2.43s/it]

Query n.1. ID: 7 Title:  serological tests for coronavirus
Sparse Retrieval Results: ['upwn9o2m', 'px4fe7mn', '1dbeh8q7', 'r1yf75bo', 'qjma4rsp']
Sparse Retrieval Scores: [22.12290899 21.17947728 21.17947728 19.93635115 19.92442083]
Dense Retrieval Results: ['7juumedy', '7otiheyl', '7raieam7', '7s7ihukg', '7bkkeigh']
Dense Retrieval Scores: [0.68882348 0.68874126 0.68694176 0.68531564 0.68377034]
BM25 Scores: [0.09097561 0.         0.         ... 0.         0.         0.        ]
Dense Scores: [0.61592952 0.40696839 0.26322405 ... 0.35678557 0.52990366 0.32988376]


 14%|█▍        | 7/50 [00:16<01:41,  2.35s/it]

Query n.1. ID: 8 Title:  coronavirus under reporting
Sparse Retrieval Results: ['02iicrsa', 'c5l57vvu', 'c5l57vvu', 'qrtdjtxj', 'e5q27vpw']
Sparse Retrieval Scores: [10.74364541 10.22450603 10.22450603 10.21176617  9.82375569]
Dense Retrieval Results: ['8towsty8', '8a1cia8s', '8aulkyve', '8uswrers', '8ab8rugv']
Dense Retrieval Scores: [0.72804955 0.71597516 0.71543472 0.7109216  0.69950522]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.58905724 0.45794374 0.29602535 ... 0.32600251 0.50076765 0.26826079]


 16%|█▌        | 8/50 [00:18<01:32,  2.21s/it]

Query n.1. ID: 9 Title:  coronavirus in Canada
Sparse Retrieval Results: ['jkzxjk54', '4v3d86h3', 'w6scpc65', 'qkr6jydj', 'mh23h29f']
Sparse Retrieval Scores: [19.42752432 17.53008377 16.80564654 16.34366365 15.69130755]
Dense Retrieval Results: ['9nucalt9', '9aswei96', '9ruypie9', '9jaehm98', '9ishrdds']
Dense Retrieval Scores: [0.68311207 0.67958586 0.67306536 0.66837257 0.66610115]
BM25 Scores: [0.22257518 0.20511057 0.23511453 ... 0.21860294 0.         0.        ]
Dense Scores: [0.6491406  0.45678067 0.30993798 ... 0.39072235 0.57904906 0.38876422]


 18%|█▊        | 9/50 [00:21<01:32,  2.26s/it]

Query n.1. ID: 10 Title:  coronavirus social distancing impact
Sparse Retrieval Results: ['q5xc4m3j', 'o9ii9fj3', 'po2c65nb', 'km4qijqj', 'le2eifv8']
Sparse Retrieval Scores: [20.00936328 19.18621723 19.18621723 19.18621723 18.39850069]
Dense Retrieval Results: ['10tvepa5', '10bu7iwg', '10jai07w', '10my0twp', '9rda20ge']
Dense Retrieval Scores: [0.64143076 0.62540857 0.62518514 0.62396527 0.6127211 ]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.61985893 0.47136102 0.28626048 ... 0.29367701 0.6708664  0.36374979]


 20%|██        | 10/50 [00:24<01:37,  2.44s/it]

Query n.1. ID: 11 Title:  coronavirus hospital rationing
Sparse Retrieval Results: ['szhkvizb', 'od8k0utb', '8dico3zc', 'sp7brt83', 't55od92g']
Sparse Retrieval Scores: [14.11932886 13.11170562 13.11170562 12.58403697 12.26830956]
Dense Retrieval Results: ['11abmmyl', '11ogothr', '11ecey66', '11aulpqb', '11hvriqy']
Dense Retrieval Scores: [0.75312739 0.70871819 0.70865221 0.70018076 0.69455095]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.49574537 0.37975556 0.30586453 ... 0.28190164 0.53560125 0.34754514]


 22%|██▏       | 11/50 [00:26<01:33,  2.39s/it]

Query n.1. ID: 12 Title:  coronavirus quarantine
Sparse Retrieval Results: ['nyan7jnt', 'bpu3hpbn', 's0zdqd6d', '9hrrkqgi', 'kjnnh00e']
Sparse Retrieval Scores: [11.67022013 11.67022013 11.11431961 11.10539155 11.06989104]
Dense Retrieval Results: ['12eajtcw', '12dyiqft', '12hoppys', '12sakknb', '12ksowrf']
Dense Retrieval Scores: [0.67135265 0.66203505 0.6444028  0.63586501 0.62978856]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.46940266 0.42841912 0.27808578 ... 0.27015516 0.48941315 0.32154204]


 24%|██▍       | 12/50 [00:27<01:22,  2.16s/it]

Query n.1. ID: 13 Title:  how does coronavirus spread
Sparse Retrieval Results: ['8wpccy2y', '4hph547d', '91j5ozws', '4067srwc', '8s8ma1q1']
Sparse Retrieval Scores: [12.28832665 11.68456566 11.68456566 11.68456566 11.68456566]
Dense Retrieval Results: ['13veedct', '13upid76', '27awdi12', '13japrzr', '18frwjak']
Dense Retrieval Scores: [0.5972664  0.59625129 0.59263179 0.59032961 0.58838116]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.53717286 0.33572689 0.30418717 ... 0.34276184 0.48818056 0.38084013]


 26%|██▌       | 13/50 [00:30<01:19,  2.14s/it]

Query n.1. ID: 14 Title:  coronavirus super spreaders
Sparse Retrieval Results: ['na3vrf5q', '93l22ign', 'p48bw6s4', 'c48gl27o', 'axns3ukm']
Sparse Retrieval Scores: [28.50607337 28.50607337 28.50607337 24.04516868 17.82424783]
Dense Retrieval Results: ['14otwglm', '14obiyyd', '4ttdit14', '14baohhm', '14azjvbf']
Dense Retrieval Scores: [0.6729073  0.66570911 0.64511183 0.63339758 0.60367101]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.52584133 0.36353189 0.2678239  ... 0.27458833 0.48428629 0.27927782]


 28%|██▊       | 14/50 [00:31<01:14,  2.06s/it]

Query n.1. ID: 15 Title:  coronavirus outside body
Sparse Retrieval Results: ['2w3bx6p8', 'bbg11u3w', '959w9sln', '5dzyx6pw', 'j2l61p76']
Sparse Retrieval Scores: [11.10572087 10.83028752 10.76519478 10.49909064 10.49909064]
Dense Retrieval Results: ['15s5wg6h', '18frwjak', '15rhqpiu', '15gnilwy', '15wgvbz6']
Dense Retrieval Scores: [0.6094144  0.60487611 0.60421482 0.60035703 0.59232811]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.48128152 0.34829212 0.2714848  ... 0.35116814 0.47155791 0.31745475]


 30%|███       | 15/50 [00:33<01:09,  2.00s/it]

Query n.1. ID: 16 Title:  how long does coronavirus survive on surfaces
Sparse Retrieval Results: ['pdmfxssd', 'ou7w3zkv', 'ou7w3zkv', '959w9sln', 'tjplc5j6']
Sparse Retrieval Scores: [22.66137236 21.39671883 21.39671883 20.18090081 19.72178981]
Dense Retrieval Results: ['16hlvrdi', '17ncnneh', '16lfowff', '18frwjak', '16nllmlz']
Dense Retrieval Scores: [0.66105924 0.62297705 0.62032611 0.61407484 0.61146205]
BM25 Scores: [0.         0.03172885 0.01779236 ... 0.03381601 0.         0.        ]
Dense Scores: [0.50580936 0.40802674 0.25771157 ... 0.30774738 0.40362163 0.27245806]


 32%|███▏      | 16/50 [00:36<01:18,  2.31s/it]

Query n.1. ID: 17 Title:  coronavirus clinical trials
Sparse Retrieval Results: ['i4fz2c49', 'zed7d315', '7izjhyyk', 'mgrz9jfd', 'm23mvaf7']
Sparse Retrieval Scores: [12.89437155 12.74032107 12.74032107 12.63610973 12.63610973]
Dense Retrieval Results: ['17hdeiel', '17ncnneh', '17harglw', '17slihe6', '17iii3dg']
Dense Retrieval Scores: [0.69911049 0.69808866 0.67984492 0.66504189 0.65856425]
BM25 Scores: [0.0949066 0.        0.        ... 0.        0.        0.       ]
Dense Scores: [0.51360754 0.31087615 0.24682983 ... 0.35126442 0.40365607 0.31344916]


 34%|███▍      | 17/50 [00:39<01:17,  2.34s/it]

Query n.1. ID: 18 Title:  masks prevent coronavirus
Sparse Retrieval Results: ['wni08lks', 'g17lp8ch', 'jvlun85u', 'xl07uq0s', 'xtraspw2']
Sparse Retrieval Scores: [18.52086033 16.65424225 16.65424225 16.65424225 16.19935937]
Dense Retrieval Results: ['18frwjak', '18dymyiz', '18a1eesh', '18wuzwbj', '18uomiwq']
Dense Retrieval Scores: [0.73048073 0.69819731 0.66964049 0.65666505 0.650955  ]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.46670561 0.2973307  0.24628628 ... 0.33056331 0.40031989 0.22859349]


 36%|███▌      | 18/50 [00:41<01:16,  2.40s/it]

Query n.1. ID: 19 Title:  what alcohol sanitizer kills coronavirus
Sparse Retrieval Results: ['y777xosr', 'wr404h18', 'za266ajo', 'gofjzt87', 'eevs62xf']
Sparse Retrieval Scores: [17.94953408 17.94953408 17.70576733 17.70576733 16.22176915]
Dense Retrieval Results: ['19aeuzer', '19eawrjt', '19yodxem', '19djulgt', '19eej6bu']
Dense Retrieval Scores: [0.76041341 0.70149062 0.69645058 0.69454682 0.6820384 ]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.40732882 0.33070009 0.19626883 ... 0.29121436 0.42385735 0.26113096]


 38%|███▊      | 19/50 [00:44<01:15,  2.45s/it]

Query n.1. ID: 20 Title:  coronavirus and ACE inhibitors
Sparse Retrieval Results: ['hynkb0a8', '1pahpghb', '3twud97m', 'roj3om68', 'r426u5dz']
Sparse Retrieval Scores: [25.69340726 25.29781092 25.29781092 25.27159598 24.6978117 ]
Dense Retrieval Results: ['20ifwznd', '20hclaf6', '20eb19ed', '20ihafqw', '20eddl4k']
Dense Retrieval Scores: [0.71570015 0.67217567 0.66732202 0.65886815 0.65185681]
BM25 Scores: [0.2184781  0.2092272  0.23870226 ... 0.23828118 0.         0.        ]
Dense Scores: [0.43690096 0.409394   0.30097537 ... 0.37464553 0.49750243 0.28512777]


 40%|████      | 20/50 [00:46<01:10,  2.36s/it]

Query n.1. ID: 21 Title:  coronavirus mortality
Sparse Retrieval Results: ['w69qfyvu', 'cqesmvk9', 'm4hmhsog', '1ca2rrrz', 'vopoo7w0']
Sparse Retrieval Scores: [8.32505162 8.31751    8.29497144 8.28824207 7.95873343]
Dense Retrieval Results: ['21dfvahe', '21ifshn6', '21oudhum', '21wowwv5', '21uag18h']
Dense Retrieval Scores: [0.69080481 0.66989426 0.65317728 0.64829327 0.64196593]
BM25 Scores: [0.25602534 0.         0.         ... 0.         0.         0.        ]
Dense Scores: [0.51700582 0.33306771 0.25300875 ... 0.39731833 0.45767299 0.31086281]


 42%|████▏     | 21/50 [00:48<01:02,  2.16s/it]

Query n.1. ID: 22 Title:  coronavirus heart impacts
Sparse Retrieval Results: ['fitz1vjs', 'bpzwx27u', 'sltofaox', 'lr7a2fvr', 'ytwfgs20']
Sparse Retrieval Scores: [11.19876549 11.19876549 10.76151567 10.69433156 10.69433156]
Dense Retrieval Results: ['22ergnbl', '22itdeeq', '22swinc6', '22fi7ud8', '22mnod18']
Dense Retrieval Scores: [0.75187498 0.74227024 0.70506756 0.70118345 0.69949319]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.49829582 0.43467857 0.31198907 ... 0.42130308 0.47101509 0.35761013]


 44%|████▍     | 22/50 [00:50<00:58,  2.08s/it]

Query n.1. ID: 23 Title:  coronavirus hypertension
Sparse Retrieval Results: ['1nxfh9yt', '1nxfh9yt', 'a3o8u7l8', 'es8ztvq5', '9nbj3ckb']
Sparse Retrieval Scores: [13.37947291 13.37947291 11.85622384 11.85622384 11.13242936]
Dense Retrieval Results: ['23acuwll', '23exaaik', '23eejhuw', '23ituatc', '23rtq737']
Dense Retrieval Scores: [0.7120857  0.70756537 0.69817033 0.69689189 0.6758115 ]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.50040458 0.39517945 0.27052772 ... 0.43101592 0.43695455 0.35891723]


 46%|████▌     | 23/50 [00:51<00:52,  1.95s/it]

Query n.1. ID: 24 Title:  coronavirus diabetes
Sparse Retrieval Results: ['vte52lah', '8t7rrgmp', '8tuubyaq', 'xehsijye', 'roglm8r8']
Sparse Retrieval Scores: [11.76730905 11.4096611  11.30790305 11.30790305 11.30790305]
Dense Retrieval Results: ['24lzevco', '24viekl7', '24efrzlm', '24ikauhw', '24ne71ym']
Dense Retrieval Scores: [0.67744132 0.67629519 0.67154312 0.66812308 0.66015697]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.50527884 0.45952789 0.24782296 ... 0.33937312 0.47243029 0.29913797]


 48%|████▊     | 24/50 [00:53<00:49,  1.90s/it]

Query n.1. ID: 25 Title:  coronavirus biomarkers
Sparse Retrieval Results: ['1hvihwkz', 'p3fi4yej', 'vcpo3qob', 'sx2wuq94', '3l2t9w9l']
Sparse Retrieval Scores: [10.97682513 10.97682513 10.80686503 10.80686503 10.80686503]
Dense Retrieval Results: ['25rrzrgw', '25uogrjz', '25izll21', '25td0yoy', '25dwlzpy']
Dense Retrieval Scores: [0.68900345 0.66816226 0.65662074 0.65284334 0.64789769]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.39856276 0.29086546 0.20573596 ... 0.38521444 0.39821469 0.30248855]


 50%|█████     | 25/50 [00:55<00:48,  1.94s/it]

Query n.1. ID: 26 Title:  coronavirus early symptoms
Sparse Retrieval Results: ['g0xy7sxw', '2gq75aqi', 'cli454hr', 'k41ll1c1', 'uafa13e3']
Sparse Retrieval Scores: [11.69198461 10.74107265 10.74107265 10.74107265 10.58535972]
Dense Retrieval Results: ['26yvryhw', '26asfqab', '26ttagg8', '26ckwg8z', '26walrbo']
Dense Retrieval Scores: [0.68806873 0.68697924 0.67481925 0.6681389  0.66567086]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.43575497 0.30517282 0.24579676 ... 0.31867252 0.46443746 0.33355948]


 52%|█████▏    | 26/50 [00:58<00:50,  2.11s/it]

Query n.1. ID: 27 Title:  coronavirus asymptomatic
Sparse Retrieval Results: ['nlzdto1h', 'q7mm4nwm', 'cu52j6tq', 'mu2v9o1p', 'mu2v9o1p']
Sparse Retrieval Scores: [10.75557594 10.72479764 10.72479764 10.72110206 10.705058  ]
Dense Retrieval Results: ['27awdi12', '27gutwjd', '27dev3oy', '27ulyn8y', '27t64idh']
Dense Retrieval Scores: [0.70045882 0.69319953 0.68478164 0.68257264 0.68134496]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.52780015 0.38205577 0.28348142 ... 0.39945508 0.47786848 0.41061807]


 54%|█████▍    | 27/50 [01:00<00:49,  2.14s/it]

Query n.1. ID: 28 Title:  coronavirus hydroxychloroquine
Sparse Retrieval Results: ['ne42raez', '7nda7xpb', 'd612wsh3', '5moean7z', '3jnhboif']
Sparse Retrieval Scores: [12.92572344 12.92572344 12.56198001 12.56198001 12.25208096]
Dense Retrieval Results: ['28lkneec', '28eluwug', '28utunid', '28axoftw', '28hlwjw7']
Dense Retrieval Scores: [0.70026388 0.69009771 0.66969888 0.66831785 0.66207071]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.44338112 0.41327293 0.22954542 ... 0.33288138 0.41604551 0.32838576]


 56%|█████▌    | 28/50 [01:01<00:44,  2.00s/it]

Query n.1. ID: 29 Title:  coronavirus drug repurposing
Sparse Retrieval Results: ['1t4ykc01', 'q6bpic9m', 'a1p1nopl', 's953j62y', 'r4snyxr9']
Sparse Retrieval Scores: [18.16016562 18.11564293 17.74765529 17.36085264 16.8192905 ]
Dense Retrieval Results: ['29naooaz', '29fiqirg', '29gmbths', '29y1sike', '29tdxxae']
Dense Retrieval Scores: [0.73526752 0.72321177 0.69267047 0.68420764 0.67966936]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.43703897 0.35734417 0.23521719 ... 0.30723797 0.43257396 0.3958227 ]


 58%|█████▊    | 29/50 [01:04<00:44,  2.12s/it]

Query n.1. ID: 30 Title:  coronavirus remdesivir
Sparse Retrieval Results: ['b8tknq05', 'r0znh1bi', '4178ui2c', 'aivub6mi', 'x1fzgiy6']
Sparse Retrieval Scores: [14.44888331 14.26502081 14.24967889 14.16741641 13.88742444]
Dense Retrieval Results: ['30eayhax', '30gnvylt', '30ihtibn', '30ogesr8', '30snllol']
Dense Retrieval Scores: [0.70090883 0.69579777 0.6848161  0.66363147 0.65439698]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.33959236 0.35820586 0.23098034 ... 0.28280558 0.38835545 0.33892899]


 60%|██████    | 30/50 [01:05<00:39,  1.98s/it]

Query n.1. ID: 31 Title:  difference between coronavirus and flu
Sparse Retrieval Results: ['1ykji0c8', 'mg2zikyp', 'i7v9l6ii', '7ygcfzii', 'aksjrr8g']
Sparse Retrieval Scores: [21.68598435 19.59753547 19.43758247 19.13421273 18.71134132]
Dense Retrieval Results: ['31iqrkyw', '31d0aype', '31srtult', '31yznyut', '31ehponk']
Dense Retrieval Scores: [0.70618488 0.69567256 0.68486407 0.68009077 0.67590506]
BM25 Scores: [0.25885138 0.24789096 0.28281282 ... 0.28231393 0.         0.        ]
Dense Scores: [0.32128036 0.3713514  0.24167096 ... 0.30789735 0.3371178  0.34637967]


 62%|██████▏   | 31/50 [01:08<00:39,  2.10s/it]

Query n.1. ID: 32 Title:  coronavirus subtypes
Sparse Retrieval Results: ['z1qh8286', '1sbnewog', 'zr495eff', 'xtg0e142', 'k1wd7ymj']
Sparse Retrieval Scores: [14.5548714  11.20265591 10.8771717  10.69951487 10.68721266]
Dense Retrieval Results: ['32azllh8', '33uvughm', '32quvru2', '32dne47n', '32zkkyju']
Dense Retrieval Scores: [0.74284873 0.69287435 0.68956266 0.68199512 0.68028753]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.41815864 0.42244487 0.24801115 ... 0.34863832 0.36455269 0.33143021]


 64%|██████▍   | 32/50 [01:10<00:35,  1.97s/it]

Query n.1. ID: 33 Title:  coronavirus vaccine candidates
Sparse Retrieval Results: ['w9rqnz9h', 'tm8fzbue', 'o8bkorjn', 'ievuxa6k', 'bb5dnbbt']
Sparse Retrieval Scores: [15.47345949 15.40688185 14.8601967  14.8601967  14.30278393]
Dense Retrieval Results: ['33goapke', '33uvughm', '33dsacho', '33nezniw', '33pwckyf']
Dense Retrieval Scores: [0.72179724 0.71809479 0.70387077 0.69338138 0.68181409]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.40489957 0.37882862 0.25055289 ... 0.40129737 0.34187578 0.33409046]


 66%|██████▌   | 33/50 [01:12<00:35,  2.08s/it]

Query n.1. ID: 34 Title:  coronavirus recovery
Sparse Retrieval Results: ['eujbxdqi', 'tps87rbb', 'mkxd4fvw', '83defevj', 'nojazf3j']
Sparse Retrieval Scores: [9.83914353 9.43584623 9.35857176 9.35857176 9.35857176]
Dense Retrieval Results: ['34meuycd', '34wuttqw', '33goapke', '33uvughm', '34sspi61']
Dense Retrieval Scores: [0.65708368 0.65376023 0.65087147 0.64895745 0.64665179]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.37762341 0.41971177 0.1874644  ... 0.34871435 0.35266816 0.38885647]


 68%|██████▊   | 34/50 [01:14<00:33,  2.11s/it]

Query n.1. ID: 35 Title:  coronavirus public datasets
Sparse Retrieval Results: ['yoav2b35', '95o2v09d', 'csokkcqq', '6uaj8fb7', 'xn12s005']
Sparse Retrieval Scores: [11.46645547 11.00419769 10.63754885 10.62326738 10.22485521]
Dense Retrieval Results: ['35aplpdi', '35oagukj', '35omeccq', '33goapke', '35meen0h']
Dense Retrieval Scores: [0.64154453 0.63871998 0.62758335 0.61288011 0.61035685]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.42560261 0.27857818 0.1876051  ... 0.44828963 0.35041149 0.37180486]


 70%|███████   | 35/50 [01:17<00:33,  2.23s/it]

Query n.1. ID: 36 Title:  SARS-CoV-2 spike structure
Sparse Retrieval Results: ['34ljq0qt', 'nq16jcs9', 'ct1y9amm', 'g81ylcxq', '4nfxdppt']
Sparse Retrieval Scores: [18.25210827 16.59910933 16.45908232 16.31334871 15.86551038]
Dense Retrieval Results: ['36iqmimt', '36wqulk8', '36yxfnym', '36ym65nt', '36aoqdnt']
Dense Retrieval Scores: [0.77349518 0.68334796 0.6792483  0.67450486 0.66232513]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.35255792 0.35361851 0.134078   ... 0.34039385 0.2966803  0.29863817]


 72%|███████▏  | 36/50 [01:19<00:30,  2.15s/it]

Query n.1. ID: 37 Title:  SARS-CoV-2 phylogenetic analysis
Sparse Retrieval Results: ['5qn5h7p5', 't5ar9pp0', 'jhlnwoml', 'mjxs395h', 'm3505b5w']
Sparse Retrieval Scores: [15.06532835 14.80120552 14.1073129  14.1073129  13.96439215]
Dense Retrieval Results: ['37vyazby', '377uytuw', '37yutynv', '37katpp3', '37nrtmni']
Dense Retrieval Scores: [0.67739597 0.67111343 0.66017896 0.65387978 0.64860939]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.402389   0.32793084 0.23909102 ... 0.41876895 0.32621623 0.33988863]


 74%|███████▍  | 37/50 [01:20<00:26,  2.07s/it]

Query n.1. ID: 38 Title:  COVID inflammatory response
Sparse Retrieval Results: ['e58nciq3', 'ynrvowoc', '8ouf2a9z', 'ga56ozp3', '4gfvbaj1']
Sparse Retrieval Scores: [15.57713257 15.57713257 13.96503544 12.61335482 12.43016621]
Dense Retrieval Results: ['38oeau38', '38dhwggn', '38eddqa4', '38jeprwx', '36iqmimt']
Dense Retrieval Scores: [0.72373504 0.71184605 0.70538948 0.67314392 0.65816974]
BM25 Scores: [0.         0.23503992 0.40090413 ... 0.         0.         0.        ]
Dense Scores: [0.35553441 0.31744749 0.15682639 ... 0.41594306 0.3006231  0.29846193]


 76%|███████▌  | 38/50 [01:22<00:24,  2.01s/it]

Query n.1. ID: 39 Title:  COVID-19 cytokine storm
Sparse Retrieval Results: ['6io0zd0z', 'o6c8ic14', 'yaov2osz', 'nqprluto', 'iaatjew2']
Sparse Retrieval Scores: [21.61117522 21.36823735 21.36823735 21.36823735 20.35131755]
Dense Retrieval Results: ['39ssiouy', '39toolzt', '36iqmimt', '38oeau38', '38eddqa4']
Dense Retrieval Scores: [0.69340723 0.68971933 0.66991689 0.66250038 0.65593549]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.40442406 0.32721455 0.1890319  ... 0.43441644 0.36263966 0.31816472]


 78%|███████▊  | 39/50 [01:24<00:21,  1.97s/it]

Query n.1. ID: 40 Title:  coronavirus mutations
Sparse Retrieval Results: ['ehprox1q', '4z6wcmxq', '9wfb43gt', 'u2n10ebx', 'uqykia6i']
Sparse Retrieval Scores: [12.37137667 11.13501648 11.06743399 11.00744163 10.92556974]
Dense Retrieval Results: ['40srucam', '40ebkwgz', '40yhwoke', '40aaokti', '39ssiouy']
Dense Retrieval Scores: [0.66170656 0.65223445 0.64762363 0.64555416 0.63406809]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.44393601 0.41210427 0.28053349 ... 0.44434467 0.44530072 0.34418386]


 80%|████████  | 40/50 [01:26<00:18,  1.88s/it]

Query n.1. ID: 41 Title:  COVID-19 in African-Americans
Sparse Retrieval Results: ['ne552y3i', 'x6qu4o6y', 'zdmoifko', '423uero3', 'u0morhvi']
Sparse Retrieval Scores: [20.34287873 17.29486521 17.29486521 17.29486521 17.04543409]
Dense Retrieval Results: ['41nebtwm', '41i20yuy', '43xxva42', '41lfsedz', '41kiedlu']
Dense Retrieval Scores: [0.68848319 0.68578136 0.66878761 0.65428904 0.65239107]
BM25 Scores: [0.21256012 0.19588135 0.22453524 ... 0.20876661 0.         0.        ]
Dense Scores: [0.41315703 0.35964891 0.24021686 ... 0.5331392  0.39655028 0.32451092]


 82%|████████▏ | 41/50 [01:28<00:17,  1.98s/it]

Query n.1. ID: 42 Title:  Vitamin D and COVID-19
Sparse Retrieval Results: ['4v71xohx', '67gsn4sy', 'pt1i1au3', 'svc2xeh1', 'rwh56zhg']
Sparse Retrieval Scores: [33.31236862 33.1371591  31.63844212 31.63844212 28.9615817 ]
Dense Retrieval Results: ['42jagfy8', '428liulz', '42wwtuml', '42avrzla', '42evgrgs']
Dense Retrieval Scores: [0.72847387 0.71338316 0.67991406 0.67725731 0.6739638 ]
BM25 Scores: [0.16850939 0.16137428 0.18410803 ... 0.18378325 0.         0.        ]
Dense Scores: [0.353923   0.29398142 0.26003752 ... 0.44715098 0.38776244 0.21821585]


 84%|████████▍ | 42/50 [01:31<00:17,  2.23s/it]

Query n.1. ID: 43 Title:  violence during pandemic
Sparse Retrieval Results: ['42o9qs47', 'miem1poh', 'nbxiwvdl', 'onxosato', 'bmqt33yw']
Sparse Retrieval Scores: [19.09701249 16.33640597 16.33640597 16.33640597 16.02456192]
Dense Retrieval Results: ['43geausg', '43xxva42', '43dbrbsg', '43toyvr2', '43ihjwgc']
Dense Retrieval Scores: [0.72097809 0.72019081 0.69431745 0.68607998 0.67561685]
BM25 Scores: [0.         0.09125029 0.         ... 0.22859429 0.         0.        ]
Dense Scores: [0.40152829 0.32220548 0.22690493 ... 0.51529255 0.39895772 0.29591521]


 86%|████████▌ | 43/50 [01:33<00:16,  2.33s/it]

Query n.1. ID: 44 Title:  impact of masks on coronavirus transmission
Sparse Retrieval Results: ['wni08lks', '28utunid', 'qi1henyy', 'xl07uq0s', 'jvlun85u']
Sparse Retrieval Scores: [23.65381676 23.5347433  23.5347433  21.72026439 21.72026439]
Dense Retrieval Results: ['44dduerf', '36iqmimt', '44ahrenh', '44sejovt', '44rlwtjt']
Dense Retrieval Scores: [0.7129239  0.70106668 0.6895929  0.67710337 0.66062647]
BM25 Scores: [0.21868587 0.29565238 0.27404296 ... 0.28720607 0.         0.        ]
Dense Scores: [0.4003147  0.32914917 0.25181611 ... 0.46145006 0.35925579 0.31412999]


 88%|████████▊ | 44/50 [01:36<00:14,  2.45s/it]

Query n.1. ID: 45 Title:  coronavirus mental health impact
Sparse Retrieval Results: ['k8gavf50', 'nkmqqp1v', '302hphhx', 'bmbdtxj0', 'b7xgdkk2']
Sparse Retrieval Scores: [17.93070411 17.93070411 17.93070411 17.45505287 17.45505287]
Dense Retrieval Results: ['45glufnh', '45pmayol', '45casveh', '45awgabp', '455ldhiy']
Dense Retrieval Scores: [0.73026074 0.71617836 0.71392522 0.71066639 0.69410672]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.33087024 0.2629592  0.21975362 ... 0.50966099 0.34690784 0.36458815]


 90%|█████████ | 45/50 [01:38<00:11,  2.34s/it]

Query n.1. ID: 46 Title:  dexamethasone coronavirus
Sparse Retrieval Results: ['ianm4jmu', '0eyi1gql', 'omen8vvq', 'k7znamun', 'qxlz1ff6']
Sparse Retrieval Scores: [14.22728932 14.22728932 14.22728932 13.81259202 13.48569763]
Dense Retrieval Results: ['46ecndhl', '46ajonse', '46keagu8', '46oo3sse', '46ohny64']
Dense Retrieval Scores: [0.7127631  0.69994687 0.69704186 0.68940553 0.68815105]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.35044636 0.27713577 0.18686914 ... 0.41143022 0.34251614 0.22943152]


 92%|█████████▏| 46/50 [01:40<00:08,  2.14s/it]

Query n.1. ID: 47 Title:  COVID-19 outcomes in children
Sparse Retrieval Results: ['u7arfoym', '62ic8r0s', 'y6ssshea', 's1kpmde0', '9cr0bk2a']
Sparse Retrieval Scores: [18.5527687  18.23539514 18.06618823 18.01523572 18.01523572]
Dense Retrieval Results: ['47goscew', '47oxen29', '478baond', '46ecndhl', '46ajonse']
Dense Retrieval Scores: [0.68583262 0.65405858 0.65193008 0.64991813 0.64476031]
BM25 Scores: [0.44691845 0.21478145 0.24620008 ... 0.22890998 0.         0.        ]
Dense Scores: [0.41571296 0.25630998 0.24701407 ... 0.50889935 0.35449884 0.29069889]


 94%|█████████▍| 47/50 [01:42<00:06,  2.16s/it]

Query n.1. ID: 48 Title:  school reopening coronavirus
Sparse Retrieval Results: ['p9zsqdtj', 'r2x3awlw', 'kayh0v56', 'i804iorq', 'xhyqg5u2']
Sparse Retrieval Scores: [22.5661407  22.5661407  19.70332296 19.70332296 19.4060162 ]
Dense Retrieval Results: ['48buuhzy', '48sfhqui', '48odnlal', '48qityyg', '48ym7eti']
Dense Retrieval Scores: [0.76084209 0.71881352 0.70971173 0.69195522 0.68451142]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.4413606  0.38921184 0.24187337 ... 0.45378199 0.37538735 0.29085484]


 96%|█████████▌| 48/50 [01:44<00:04,  2.10s/it]

Query n.1. ID: 49 Title:  post-infection COVID-19 immunity
Sparse Retrieval Results: ['u5nxm9tu', '3c9njasg', 'uijc6qdo', 'q0pb681s', 'v88ok4c2']
Sparse Retrieval Scores: [13.63945715 10.90373295 10.7606513  10.5260861  10.40800362]
Dense Retrieval Results: ['49solekk', '49rledsr', '49uemaax', '49ai51ah', '49negkfa']
Dense Retrieval Scores: [0.66460917 0.651955   0.63507285 0.6314099  0.62772288]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.5092979  0.33794807 0.24917629 ... 0.46011426 0.45004959 0.31033256]


 98%|█████████▊| 49/50 [01:46<00:02,  2.18s/it]

Query n.1. ID: 50 Title:  mRNA vaccine coronavirus
Sparse Retrieval Results: ['fbpdyg2i', 'ptvsie6m', 'akbq0ogs', 'fd7d4q8k', '0fx1b7ph']
Sparse Retrieval Scores: [17.09851832 14.92673964 14.77037668 14.42250987 14.09555967]
Dense Retrieval Results: ['50ybiiyw', '50swatfa', '50chmcow', '50eruta8', '50canjze']
Dense Retrieval Scores: [0.66123659 0.65555399 0.64327307 0.63822773 0.63245585]
BM25 Scores: [0. 0. 0. ... 0. 0. 0.]
Dense Scores: [0.39273957 0.25718498 0.28512912 ... 0.41370383 0.39434376 0.25839519]


100%|██████████| 50/50 [01:49<00:00,  2.19s/it]


In [59]:
run_sparse = prepare_run_data(results["sparse"])
run_dense = prepare_run_data(results["dense"])
run_rank_fusion = prepare_run_data(results["rank_fusion"])
run_cascade = prepare_run_data(results["cascade"])

# Evaluate results with pytrec_eval
evaluator = pytrec_eval.RelevanceEvaluator(qrels_dict, {'recall.100', 'ndcg_cut.5'})
eval_results_sparse = evaluator.evaluate(run_sparse)
eval_results_dense = evaluator.evaluate(run_dense)
eval_results_rank_fusion = evaluator.evaluate(run_rank_fusion)
eval_results_cascade = evaluator.evaluate(run_cascade)

# Aggregate metrics for overall performance
aggregated_results = {
    "sparse": {
        metric: sum([res[metric] for res in eval_results_sparse.values()]) / len(eval_results_sparse)
        for metric in eval_results_sparse[next(iter(eval_results_sparse))]
    },
    "dense": {
        metric: sum([res[metric] for res in eval_results_dense.values()]) / len(eval_results_dense)
        for metric in eval_results_dense[next(iter(eval_results_dense))]
    },
    "rank_fusion": {
        metric: sum([res[metric] for res in eval_results_rank_fusion.values()]) / len(eval_results_rank_fusion)
        for metric in eval_results_rank_fusion[next(iter(eval_results_rank_fusion))]
    },
    "cascade": {
        metric: sum([res[metric] for res in eval_results_cascade.values()]) / len(eval_results_cascade)
        for metric in eval_results_cascade[next(iter(eval_results_cascade))]
    }
}

print("Aggregated results:", json.dumps(aggregated_results, indent=4))
print("Retrieval results and metrics saved to files.")

Aggregated results: {
    "sparse": {
        "recall_100": 0.006437055481141409,
        "ndcg_cut_5": 0.5230925046367942
    },
    "dense": {
        "recall_100": 3.816793893129771e-05,
        "ndcg_cut_5": 0.0033916020527361614
    },
    "rank_fusion": {
        "recall_100": 0.006515126818829686,
        "ndcg_cut_5": 0.51673935601548
    },
    "cascade": {
        "recall_100": 0.006437055481141409,
        "ndcg_cut_5": 0.5237315065037881
    }
}
Retrieval results and metrics saved to files.


# Section 4: QA with Language Model

In [46]:
# QA for the first query
QUERY_INDEX = 3                                                     # Index of the query to be used for retrieval
query = all_queries[QUERY_INDEX - 1]                                # Select the query from the list based on the index
query_text = query['title'] if isinstance(query, dict) else query   # Get the query text

# Retrieval calls:

# Perform dense retrieval using query embedding and document embeddings
dense_top_k_indices, dense_top_k_scores = dense_retrieve(query_embeddings[QUERY_INDEX], doc_embeddings)
# Perform sparse retrieval using BM25 on the query text
sparse_top_k_indices, sparse_top_k_scores = bm25_retrieve(query_text, bm25)
# Perform rank fusion retrieval by combining BM25 and dense retrieval results
rank_top_k_indices, rank_top_k_scores = fusion_retrieve(
    query_embeddings[QUERY_INDEX],
    doc_embeddings,
    query_text
)
# Perform cascading retrieval: first BM25, then re-rank with dense retrieval
cascading_top_k_indices, cascading_top_k_scores = cascade_retrieve(
    query_embeddings[QUERY_INDEX],
    doc_embeddings,
    query_text
)

# Get retrieved documents for each method
dense_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(dense_top_k_indices)]
sparse_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(sparse_top_k_indices)]
rank_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(rank_top_k_indices)]
cascading_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(cascading_top_k_indices)]

# Definition of the model that will be used to generate the various responses.
lm_pipeline = pipeline("text-generation",
                      model="meta-llama/Llama-3.2-1B",
                      device=0 if device == "cuda" else -1)

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

Device set to use cuda:0


#### Question-answering using DENSE RETRIEVAL

In [47]:
print("------------------ DENSE RETRIEVAL ----------------------\n")
context = "\n".join(dense_retrieved_docs)
prompt = f"Context:\n{context}\n\nQuestion:\n{query_text}\n\nAnswer in a concise and clear manner without repetition (if no direct answer, provide a general summary):"

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

# Generate response
response = lm_pipeline(prompt,
                      max_new_tokens=150,
                      temperature=0.7,
                      truncation=False)[0]["generated_text"]
response = response.split("Answer in a concise and clear manner without repetition (if no direct answer, provide a general summary):")[1].strip()

print(f"------------------ Response ------------------\n{response}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------------------ DENSE RETRIEVAL ----------------------

----------------- Length of the prompt -----------------
795 words
------------------------ Prompt ------------------------
Context:
Document 1: Population density, behaviour and cultural habits strongly influence the spread of pathogens. Consequently, key epidemiological parameters may vary from country to country. Confirmed COVID-19 cases in in China have been used to estimate those parameters, that vary largely (reviewed in 1). The estimates also depend on testing frequency and case definitions that are prone to change during ongoing epidemics, providing additional uncertainties. The rise in fatal cases due to SARS-CoV2 could be a more reliable parameter, since missing of deaths is less likely. In the absence of changes in the management of severe COVID-19 cases, the rise in death cases should be proportional to the rise in virus infections. Although the fluctuating low numbers of fatal cases very early in the epidemic may l

#### Question-answering using SPARSE RETRIEVAL

In [48]:
print("------------------ SPARSE RETRIEVAL ----------------------\n")
context = "\n".join(sparse_retrieved_docs)
prompt = f"Context:\n{context}\n\nQuestion:\n{query_text}\n\nAnswer in a concise and clear manner without repetition (if no direct answer, provide a general summary):"

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

# Generate response
response = lm_pipeline(prompt,
                      max_new_tokens=150,
                      temperature=0.7,
                      truncation=False)[0]["generated_text"]

response = response.split("Answer in a concise and clear manner without repetition (if no direct answer, provide a general summary):")[1].strip()
print(f"------------------ Response ------------------\n{response}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------------------ SPARSE RETRIEVAL ----------------------

----------------- Length of the prompt -----------------
465 words
------------------------ Prompt ------------------------
Context:
Document 1: Those downplaying the coronavirus ignore our lack of immunity and vaccines
Document 2: Other reasons rather then absence of prior immunity could play a crucial role in the children coronavirus dilemma
Document 3: Despite various levels of preventive measures, in 2020 many countries have suffered severely from the coronavirus 2019 (COVID-19) pandemic caused by the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) virus. We show that population heterogeneity can significantly impact disease-induced immunity as the proportion infected in groups with the highest contact rates is greater than in groups with low contact rates. We estimate that if R(0) = 2.5 in an age-structured community with mixing rates fitted to social activity then the disease-induced herd immunity level can 

#### Question-answering using RANK FUSION

In [49]:
print("------------------ RANK FUSION ----------------------\n")
context = "\n".join(rank_retrieved_docs)
prompt = f"Context:\n{context}\n\nQuestion:\n{query_text}\n\nAnswer in a concise and clear manner without repetition (if no direct answer, provide a general summary):"

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

# Generate response
response = lm_pipeline(prompt,
                      max_new_tokens=150,
                      temperature=0.7,
                      truncation=False)[0]["generated_text"]

response = response.split("Answer in a concise and clear manner without repetition (if no direct answer, provide a general summary):")[1].strip()
print(f"------------------ Response ------------------\n{response}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------------------ RANK FUSION ----------------------

----------------- Length of the prompt -----------------
741 words
------------------------ Prompt ------------------------
Context:
Document 1: Despite various levels of preventive measures, in 2020 many countries have suffered severely from the coronavirus 2019 (COVID-19) pandemic caused by the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) virus. We show that population heterogeneity can significantly impact disease-induced immunity as the proportion infected in groups with the highest contact rates is greater than in groups with low contact rates. We estimate that if R(0) = 2.5 in an age-structured community with mixing rates fitted to social activity then the disease-induced herd immunity level can be around 43%, which is substantially less than the classical herd immunity level of 60% obtained through homogeneous immunization of the population. Our estimates should be interpreted as an illustration of how popula

#### Question-answering using CASCADING RETRIEVAL

In [50]:
print("------------------ CASCADING RETRIEVAL ----------------------\n")
context = "\n".join(cascading_retrieved_docs)
prompt = f"Context:\n{context}\n\nQuestion:\n{query_text}\n\nAnswer in a concise and clear manner without repetition (if no direct answer, provide a general summary):"

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

# Generate response
response = lm_pipeline(prompt,
                      max_new_tokens=150,
                      temperature=0.7,
                      truncation=False)[0]["generated_text"]

response = response.split("Answer in a concise and clear manner without repetition (if no direct answer, provide a general summary):")[1].strip()
print(f"------------------ Response ------------------\n{response}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------------------ CASCADING RETRIEVAL ----------------------

----------------- Length of the prompt -----------------
465 words
------------------------ Prompt ------------------------
Context:
Document 1: Those downplaying the coronavirus ignore our lack of immunity and vaccines
Document 2: Other reasons rather then absence of prior immunity could play a crucial role in the children coronavirus dilemma
Document 3: Despite various levels of preventive measures, in 2020 many countries have suffered severely from the coronavirus 2019 (COVID-19) pandemic caused by the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) virus. We show that population heterogeneity can significantly impact disease-induced immunity as the proportion infected in groups with the highest contact rates is greater than in groups with low contact rates. We estimate that if R(0) = 2.5 in an age-structured community with mixing rates fitted to social activity then the disease-induced herd immunity level c

#### Question-answering WITH NO CONTEXT PROVIDED WITH RAG

In [51]:

print("------------------ RESPONSE WITHOUT RAG ----------------------\n")
prompt = f"""Question:\n{query_text}\n\nAnswer in a concise and clear manner without repetition (if no direct answer, provide a general summary):"""

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

response = lm_pipeline(prompt,
                      max_new_tokens=150,
                      temperature=0.7,
                      truncation=False)[0]["generated_text"]

response = response.split("Answer in a concise and clear manner without repetition (if no direct answer, provide a general summary):")[1].strip()
print(f"------------------ Response ------------------\n{response}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------------------ RESPONSE WITHOUT RAG ----------------------

----------------- Length of the prompt -----------------
20 words
------------------------ Prompt ------------------------
Question:
coronavirus immunity

Answer in a concise and clear manner without repetition (if no direct answer, provide a general summary):
------------------ Response ------------------
The COVID-19 pandemic has caused the world to become increasingly aware of the importance of vaccination. While a vaccine is effective in preventing severe illness and death from COVID-19, it is not 100% effective. This means that some people who are vaccinated still get the virus, and some people who are not vaccinated still get the virus. In order to prevent the spread of the virus, it is important for everyone to do their part by getting vaccinated when they are eligible.


In [52]:
import random

picked_queries = random.sample(all_queries, 5)

for q in picked_queries:

    # For each query, retrieve and rank documents independently
    query_text = q['title']
    cascading_top_k_indices, cascading_top_k_scores = cascade_retrieve(query_embeddings[QUERY_INDEX], doc_embeddings, query_text)

    # Use the top-k documents for that specific query
    cascading_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(cascading_top_k_indices)]
    cascading_context = "\n".join(cascading_retrieved_docs)

    # Repeat the process for rank fusion
    rank_top_k_indices, rank_top_k_scores = fusion_retrieve(query_embeddings[QUERY_INDEX], doc_embeddings, query_text)
    rank_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(rank_top_k_indices)]
    rank_fusion_context = "\n".join(rank_retrieved_docs)


    cascading_prompt = f"Context:\n{cascading_context}\n\nQuestion:\n{query_text}\n\nAnswer in a concise and clear manner without repetition (if no direct answer, provide a general summary):"
    rank_fusion_prompt = f"Context:\n{rank_fusion_context}\n\nQuestion:\n{query_text}\n\nAnswer in a concise and clear manner without repetition (if no direct answer, provide a general summary):"

    # Generate response using language model
    cascading_response = lm_pipeline(cascading_prompt,
                           max_new_tokens=150,
                           temperature=0.7,
                           truncation=False)[0]["generated_text"]

    rank_fusion_response = lm_pipeline(prompt,
                      max_new_tokens=150,
                      temperature=0.1,
                      truncation=False)[0]["generated_text"]

    # Extract the answer from the response
    cascading_response = cascading_response.split("Answer in a concise and clear manner without repetition (if no direct answer, provide a general summary):")[1].strip()
    rank_fusion_response = rank_fusion_response.split("Answer in a concise and clear manner without repetition (if no direct answer, provide a general summary):")[1].strip()

    # Print the results
    print(f"\nQuery: {query_text}")
    print(f"Cascading Response: {cascading_response}")
    print(f"Rank Fusion Response: {rank_fusion_response}")
    print("------------------------------\n")


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Query: coronavirus under reporting
Cascading Response: The novel coronavirus outbreak is a public health emergency of international concern (PHEIC). As of March 6, 2020, 72,000 cases have been reported worldwide, including 1,300 deaths. The World Health Organization (WHO) has classified the outbreak as a PHEIC. The US Centers for Disease Control and Prevention (CDC) and the US Department of Health and Human Services (HHS) have issued guidance for healthcare providers to reduce the risk of infection and to protect the healthcare system. The US National Institutes of Health (NIH) has issued guidance for scientists and the public to protect themselves and their families from infection. The CDC has provided guidance for the general public. The US Department of Homeland Security (DHS
Rank Fusion Response: 1. What is the immunity of a person who has been infected with coronavirus? 2. What is the immunity of a person who has been vaccinated with a vaccine against coronavirus? 3. What is the 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Query: coronavirus early symptoms
Cascading Response: Recently, almost all over the world attention of doctors and scientists is focused on a new coronavirus infection, the source of which was the causative agent SARS-CoV-2. In this regard, early diagnosis, including on the basis of symptoms from ENT organs, is crucial. A brief analysis of the available literature on the peculiarities of ENT organs manifestations in patients with SARS-CoV-2 is given. It was found out that to date there is very little data on the state of loro organs in patients with SARS-CoV-2 and no data on anosmia in the pediatric population. However, it is in children in the epidemic aspect that early diagnosis of infection and understanding of its key symptoms is of utmost importance.

Question:
cor
Rank Fusion Response: 1. The virus is transmitted by droplets in the air. 2. The virus is transmitted by droplets in the air. 3. The virus is transmitted by droplets in the air. 4. The virus is transmitted by droplets 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Query: violence during pandemic
Cascading Response: The social isolation imposed by the COVID-19 pandemic brings out, in a empowered way, some worrying indicators about domestic violence and family violence against women. Organizations addressing domestic violence have already seen an increase in domestic violence due to forced coexistence, economic stress and fears about the Coronavirus. The article seeks to establish some relations between social isolation during the COVID-19 pandemic and the increase in violence against women, taking into account the context of a patriarchal society. Data, still incipient, published by the press of several countries were analyzed, as well as reports from international organizations and organizations focused on combating domestic violence. In parallel, a brief literature review with authors who discuss the social role of women in society.

Question:
violence during pandemic

Answer in
Rank Fusion Response: 1. The virus is transmitted by droplets. 2.

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Query: SARS-CoV-2 phylogenetic analysis
Cascading Response: The phylogenetic analysis demonstrates that SARS-CoV-2 formed a cluster with the bat CoV isolate RaTG13. Possible CoV interspecies jumps among bat isolates were also observed. The phylogenetic tree reconstructed from CoV strains belonging to different animals demonstrated that SARS-CoV-2, bat RaTG13, and pangolin CoV genomes formed a monophyletic cluster, demonstrating that pangolins may be suggested as SARS-CoV-2 intermediate hosts. Three AA substitutions localized in the S1 portion of the S gene were observed, some of which have been correlated to structural modifications of the S protein which may facilitate SARS-CoV-2 tropism to human cells. Our analysis shows the tight relationship
Rank Fusion Response: 1. The virus is transmitted by droplets. 2. The virus is transmitted by droplets. 3. The virus is transmitted by droplets. 4. The virus is transmitted by droplets. 5. The virus is transmitted by droplets. 6. The virus is 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Query: animal models of COVID-19
Cascading Response: “There are currently no animal models available for COVID-19. However, animal models are very useful for studies of virus immunopathology and transmission modes and for translation of virus research into clinical benefit.”
Rank Fusion Response: 1. The virus is spread by droplets in the air. 2. The virus is spread by droplets in the air. 3. The virus is spread by droplets in the air. 4. The virus is spread by droplets in the air. 5. The virus is spread by droplets in the air. 6. The virus is spread by droplets in the air. 7. The virus is spread by droplets in the air. 8. The virus is spread by droplets in the air. 9. The virus is spread by droplets in the air. 10. The virus is spread by droplets in the air. 11. The virus is spread by droplets
------------------------------

