In [1]:
from tqdm import tqdm
import json
import ir_datasets
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
from rank_bm25 import BM25Okapi
from transformers import pipeline
import torch
from huggingface_hub import login

api_key = "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"
login(token=api_key)

# Check GPU availability
def get_device():
    if torch.cuda.is_available():
        device = "cuda"
        gpu_properties = torch.cuda.get_device_properties(torch.cuda.current_device())
        print(f"Using GPU: {gpu_properties.name}")
        print(f"CUDA Cores: {gpu_properties.multi_processor_count}")
        print(f"Total Memory: {gpu_properties.total_memory / 1e9:.2f} GB")
        print(f"Compute Capability: {gpu_properties.major}.{gpu_properties.minor}")    else:
        device = "cpu"
        print("Using CPU")
    return device

device = get_device()


Using GPU: NVIDIA L40S


# Section 1: Dataset loading and preparation

In [2]:
# Load dataset
print("Loading the trec covid dataset...")
dataset = ir_datasets.load("cord19/trec-covid")

# Prepare documents and queries
print("Preparing documents and queries...")
all_docs = [{"doc_id": doc.doc_id, "abstract": doc.abstract} for doc in dataset.docs_iter()]
all_queries = [{"query_id": query.query_id, "title": query.title} for query in dataset.queries_iter()]

# Print dataset size information
print(f"Summary: {len(all_docs)} documents and {len(all_queries)} queries are available in the dataset.")

tokenized_docs = [doc['abstract'].split() for doc in all_docs]
qrels = dataset.qrels

Loading the trec covid dataset...
Preparing documents and queries...


# Section 2: Embeddings generation

In [3]:
# Load or generate embeddings
def generate_embeddings():
    if os.path.exists("trec_covid_doc_embeddings.csv") and os.path.exists("trec_covid_query_embeddings.csv"):
        print("Loading precomputed embeddings...")
        doc_embeddings = pd.read_csv("trec_covid_doc_embeddings.csv").values
        query_embeddings = pd.read_csv("trec_covid_query_embeddings.csv").values
    else:
        print("No precomputed embeddings found.")
        print("Generating new embeddings using SentenceTransformer model 'all-MiniLM-L6-v2'.")
        model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
        doc_embeddings = model.encode(all_docs, batch_size=32, show_progress_bar=True)
        query_embeddings = model.encode(all_queries, batch_size=32, show_progress_bar=True)

        # Save embeddings for future use
        pd.DataFrame(doc_embeddings).to_csv("trec_covid_doc_embeddings.csv", index=False)
        pd.DataFrame(query_embeddings).to_csv("trec_covid_query_embeddings.csv", index=False)

    return doc_embeddings, query_embeddings

doc_embeddings, query_embeddings = generate_embeddings()

Loading precomputed embeddings...


# Section 3: Retrieval implementation

### Evaluation metrics
The following functions are used to evaluate the quality of document retrieval methods based on the ranked list of documents returned for a given query.

In [4]:
def dcg_at_k(scores, k):
    """Calculates the Discounted Cumulative Gain (DCG) at rank position k. It discounts relevance scores based on the rank, with lower-ranked documents receiving less weight."""
    # Select top-k scores for evaluation    
    scores = np.array(scores)[:k]
    # Calculate the discounts for each position in the ranking (logarithmic scaling)
    discounts = np.log2(np.arange(2, len(scores) + 2))  # i + 2 to avoid log2(0)
    # Return the sum of the discounted cumulative gains
    return np.sum(scores / discounts)

def ndcg_at_k(retrieved_scores, ideal_scores, k):
    """Computes the Normalized Discounted Cumulative Gain (nDCG) at rank k by comparing the DCG of the retrieved documents with the ideal DCG (sorted relevance scores)."""
    # Calculate the DCG of the retrieved documents
    dcg = dcg_at_k(retrieved_scores, k)
    # Calculate the ideal DCG by sorting the ideal scores in descending order
    idcg = dcg_at_k(sorted(ideal_scores, reverse=True), k)  # Ordinamento decrescente
    # Return the normalized DCG value
    return dcg / idcg if idcg > 0 else 0.0

def recall_at_k(retrieved_docs, relevant_docs, k):
    """Calculates Recall at rank k, measuring the proportion of relevant documents retrieved in the top-k results."""
    # Select the top-k retrieved document IDs
    retrieved_at_k = set(retrieved_docs[:k])
    # Calculate the recall as the ratio of relevant documents retrieved in the top-k
    return len(retrieved_at_k & relevant_docs) / len(relevant_docs) if relevant_docs else 0.0


### Document Retrieval Methods

1. **BM25 Sparse Retrieval**:
   - The **BM25 algorithm** is used to perform sparse retrieval on tokenized documents by calculating a relevance score for each document based on the query. It then returns the indices and relevance scores of the top-k most relevant documents.

2. **Dense Retrieval**:
   - **Dense retrieval** is performed by calculating the cosine similarity between the query embedding and the document embeddings. The top-k documents with the highest similarity scores are returned.

3. **Rank Fusion Retrieval**:
   - Results from both **BM25** and **dense retrieval** are combined using a **rank fusion** technique. Scores from both methods are normalized, weighted by a parameter `alpha`, and the top-k documents are returned based on the combined scores.

4. **Cascading Retrieval**:
   - Initially, a set of documents is retrieved using **BM25**. These documents are then re-ranked using dense retrieval, with a similarity threshold applied to filter documents. The top-k documents are returned based on the final ranking.

In [5]:
# BM25 Sparse Retrieval
def bm25_retrieve(query, bm25, top_k=5):
    """
    Perform sparse retrieval using BM25 on the tokenized documents.
    Returns the indices and scores of the top-k documents.
    """
    tokenized_query = query.split()                     # Tokenize the query into words
    scores = bm25.get_scores(tokenized_query)           # Get BM25 scores for all documents 
    top_k_indices = np.argsort(scores)[-top_k:][::-1]   # Get indices of top-k documents based on BM25 score
    return top_k_indices, scores[top_k_indices]

# Dense Retrieval
def dense_retrieve(query_embedding, doc_embeddings, top_k=5):
    """
    Perform dense retrieval using cosine similarity between query and document embeddings.
    Returns the indices and similarities of the top-k documents.
    """
    similarities = cosine_similarity([query_embedding], doc_embeddings)[0]  # Compute cosine similarity
    top_k_indices = np.argsort(similarities)[-top_k:][::-1]                 # Get indices of top-k most similar documents
    return top_k_indices, similarities[top_k_indices]

# Rank Fusion Retrieval
def fusion_retrieve(dense_query_embedding, doc_embeddings, query, top_k=5, alpha=0.5):
    """
    Implementa il rank fusion riutilizzando le funzioni esistenti di retrieval
    """
    # Perform BM25 retrieval and dense retrieval
    sparse_indices, sparse_scores = bm25_retrieve(query, bm25, top_k=len(doc_embeddings))
    dense_indices, dense_scores = dense_retrieve(dense_query_embedding, doc_embeddings, top_k=len(doc_embeddings))

    # Initialize score arrays
    all_sparse_scores = np.zeros(len(doc_embeddings))
    all_dense_scores = np.zeros(len(doc_embeddings))

    # Fill score arrays with BM25 and dense scores
    all_sparse_scores[sparse_indices] = sparse_scores
    all_dense_scores[dense_indices] = dense_scores
    
    # Normalize scores
    all_sparse_scores = (all_sparse_scores - all_sparse_scores.min()) / (all_sparse_scores.max() - all_sparse_scores.min())
    all_dense_scores = (all_dense_scores - all_dense_scores.min()) / (all_dense_scores.max() - all_dense_scores.min())
    
    # Combine scores using rank fusion (with alpha parameter)
    combined_scores = alpha * all_dense_scores + (1 - alpha) * all_sparse_scores
    
    # Retrieve top-k indices based on the combined scores
    top_k_indices = np.argsort(combined_scores)[-top_k:][::-1]
    return top_k_indices, combined_scores[top_k_indices]

# Cascading Retrieval
def cascade_retrieve(dense_query_embedding, doc_embeddings, query, initial_k=100, final_k=5, dense_threshold=0.7):
    """
    Perform cascading retrieval: sparse retrieval followed by dense re-ranking.
    Filters documents based on a similarity threshold and returns the top-k results.
    """
    # Stage 1: BM25 to get initial candidates
    initial_indices, _ = bm25_retrieve(query, bm25, top_k=initial_k)
    
    # Stage 2: Dense re-ranking of candidate documents
    candidate_embeddings = doc_embeddings[initial_indices]
    _, dense_scores = dense_retrieve(dense_query_embedding, candidate_embeddings, top_k=len(initial_indices))
    
    # Filter candidates by similarity threshold
    qualified_mask = dense_scores >= dense_threshold
    if np.sum(qualified_mask) >= final_k:
        # Select top-k qualified candidates
        qualified_indices = np.where(qualified_mask)[0]
        top_indices = qualified_indices[np.argsort(dense_scores[qualified_indices])[-final_k:][::-1]]
    else:
        # If there are not enough qualified candidates, select top-k by overall scores
        top_indices = np.argsort(dense_scores)[-final_k:][::-1]
    
    # Map filtered indices to original document IDs
    final_indices = initial_indices[top_indices]
    final_scores = dense_scores[top_indices]
    
    return final_indices, final_scores

Running retrieval experiments on all queries...


100%|██████████| 50/50 [00:58<00:00,  1.17s/it]

Saving results and metrics...
Retrieval results and metrics saved to files.





({'sparse': [{'query': {'query_id': '1', 'title': 'coronavirus origin'},
    'results': ['8ccl9aui', 'u65mey2z', 'hewbl5yu', 'dv9m19yk', 'es7q6c90']},
   {'query': {'query_id': '2',
     'title': 'coronavirus response to weather changes'},
    'results': ['526elsrf', 'aiwxlxzt', '9svrz0vj', '0mikqjpj', 'kftchnhz']},
   {'query': {'query_id': '3', 'title': 'coronavirus immunity'},
    'results': ['t9u7d029', '3i466i1y', '73oe5as9', '3ol5ozz6', 'wh9vvgv2']},
   {'query': {'query_id': '4',
     'title': 'how do people die from the coronavirus'},
    'results': ['3pklqjbx', 'c0pzjq4a', 'n95j94ck', 'ktl1x03p', 'mpvkalu4']},
   {'query': {'query_id': '5', 'title': 'animal models of COVID-19'},
    'results': ['3sepefqa', 'oa8vzf02', 'vobslprh', '5vu27b0p', 'y1b1vf1b']},
   {'query': {'query_id': '6', 'title': 'coronavirus test rapid testing'},
    'results': ['sw23wf4b', 'n5du3bqt', '1dr4r3n4', 'zij2wbzs', 'giy00lt5']},
   {'query': {'query_id': '7', 'title': 'serological tests for coronavir

This section of code performs several retrieval experiments using the four different Document Retrieval Methods described earlier.

In [None]:
# Initialize BM25 model
print("Initializing BM25 model.")
bm25 = BM25Okapi(tokenized_docs)

# Run retrieval experiments
def run_retrieval_experiments():
    """
    Execute sparse, dense, rank fusion, and cascading retrieval for all queries.
    Save the results to a JSON file for further analysis.
    """
    results = {
        "sparse": [],
        "dense": [],
        "rank_fusion": [],
        "cascade": []
    }
    metrics = {
        "sparse": [],
        "dense": [],
        "rank_fusion": [],
        "cascade": []
    }

    print("Running retrieval experiments on all queries.")

    # Iterate over each query and its embedding
    for query, query_embedding in tqdm(zip(all_queries, query_embeddings), total=len(all_queries)):
        # Extract the query ID and text for the current query
        query_id = query['query_id']
        query_text = query['title'] if isinstance(query, dict) else query

        # Get the set of relevant document IDs for the current query based on the relevance annotations in 'qrels'
        relevant_docs = {qrel.doc_id for qrel in qrels if qrel.query_id == query_id and qrel.relevance > 0}
        ideal_scores = [1] * len(relevant_docs)  # Relevant documents have an ideal score of 1
        
        # Sparse Retrieval using BM25
        sparse_indices, sparse_scores = bm25_retrieve(query_text, bm25)                 # Retrieve the top-k BM25 documents and their scores
        sparse_docs = [all_docs[idx]['doc_id'] for idx in sparse_indices]               # Get document IDs from the indices
        results["sparse"].append({"query": query, "results": sparse_docs})              # Store the BM25 results for the current query
        
        sparse_dcg = dcg_at_k(sparse_scores, k=5)
        sparse_ndcg = ndcg_at_k(sparse_scores, ideal_scores, k=5)
        sparse_recall = recall_at_k(sparse_docs, relevant_docs, k=5)
        metrics["sparse"].append({"dcg@5": sparse_dcg, "ndcg@5": sparse_ndcg, "recall@5": sparse_recall})

        # Dense Retrieval using cosine similarity
        dense_indices, dense_scores = dense_retrieve(query_embedding, doc_embeddings)   # Retrieve the top-k documents based on cosine similarity of embeddings
        dense_docs = [all_docs[idx]['doc_id'] for idx in dense_indices]
        results["dense"].append({"query": query, "results": dense_docs})
        
        dense_dcg = dcg_at_k(dense_scores, k=5)
        dense_ndcg = ndcg_at_k(dense_scores, ideal_scores, k=5)
        dense_recall = recall_at_k(dense_docs, relevant_docs, k=5)
        metrics["dense"].append({"dcg@5": dense_dcg, "ndcg@5": dense_ndcg, "recall@5": dense_recall})

        # Rank Fusion Retrieval by combining sparse (BM25) and dense result
        fusion_indices, fusion_scores = fusion_retrieve(                                # Combine BM25 and cosine similarity results
            query_embedding, doc_embeddings, query_text
        )
        fusion_docs = [all_docs[idx]['doc_id'] for idx in fusion_indices]
        results["rank_fusion"].append({"query": query, "results": fusion_docs})
        
        fusion_dcg = dcg_at_k(fusion_scores, k=5)
        fusion_ndcg = ndcg_at_k(fusion_scores, ideal_scores, k=5)
        fusion_recall = recall_at_k(fusion_docs, relevant_docs, k=5)
        metrics["rank_fusion"].append({"dcg@5": fusion_dcg, "ndcg@5": fusion_ndcg, "recall@5": fusion_recall})

        # Cascade Retrieval: First use BM25, then re-rank using dense retrieval
        cascade_indices, cascade_scores = cascade_retrieve(                             # Perform cascading retrieval
            query_embedding, doc_embeddings, query_text
        )
        cascade_docs = [all_docs[idx]['doc_id'] for idx in cascade_indices]
        results["cascade"].append({"query": query, "results": cascade_docs})
        
        cascade_dcg = dcg_at_k(cascade_scores, k=5)
        cascade_ndcg = ndcg_at_k(cascade_scores, ideal_scores, k=5)
        cascade_recall = recall_at_k(cascade_docs, relevant_docs, k=5)
        metrics["cascade"].append({"dcg@5": cascade_dcg, "ndcg@5": cascade_ndcg, "recall@5": cascade_recall})

    # Save results and metrics to JSON files
    print("Saving results and metrics.")
    with open("retrieval_results.json", "w") as f:
        json.dump(results, f, indent=4)
    with open("retrieval_metrics.json", "w") as f:
        json.dump(metrics, f, indent=4)
    print("Retrieval results and metrics saved to files.")
    
    return results, metrics


run_retrieval_experiments()

# Section 4: QA with Language Model

In [16]:
# QA for the first query
QUERY_INDEX = 3                                                     # Index of the query to be used for retrieval
query = all_queries[QUERY_INDEX - 1]                                # Select the query from the list based on the index
query_text = query['title'] if isinstance(query, dict) else query   # Get the query text

# Retrieval calls:

# Perform dense retrieval using query embedding and document embeddings
dense_top_k_indices, dense_top_k_scores = dense_retrieve(query_embeddings[QUERY_INDEX], doc_embeddings)
# Perform sparse retrieval using BM25 on the query text
sparse_top_k_indices, sparse_top_k_scores = bm25_retrieve(query_text, bm25)
# Perform rank fusion retrieval by combining BM25 and dense retrieval results
rank_top_k_indices, rank_top_k_scores = fusion_retrieve(
    query_embeddings[QUERY_INDEX], 
    doc_embeddings, 
    query_text
)
# Perform cascading retrieval: first BM25, then re-rank with dense retrieval
cascading_top_k_indices, cascading_top_k_scores = cascade_retrieve(
    query_embeddings[QUERY_INDEX], 
    doc_embeddings, 
    query_text
)

# Get retrieved documents for each method
dense_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(dense_top_k_indices)]
sparse_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(sparse_top_k_indices)]
rank_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(rank_top_k_indices)]
cascading_retrieved_docs = [f"Document {i+1}: {all_docs[idx]['abstract']}" for i, idx in enumerate(cascading_top_k_indices)]

# Definition of the model that will be used to generate the various responses.
lm_pipeline = pipeline("text-generation", 
                      model="meta-llama/Llama-3.2-1B",
                      device=0 if device == "cuda" else -1)

#### Question-answering using DENSE RETRIEVAL

In [17]:
# --- QUESTION-ANSWER USING DENSE 
print("------------------ DENSE RETRIEVAL ----------------------\n")
context = "\n".join(dense_retrieved_docs)
prompt = f"Context:\n{context}\n\nQuestion:\n{query_text}\n\nAnswer:"

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

# Generate response
lm_pipeline = pipeline("text-generation", 
                      model="meta-llama/Llama-3.2-1B",
                      device=0 if device == "cuda" else -1)
response = lm_pipeline(prompt, 
                      max_new_tokens=150, 
                      temperature=0.1, 
                      truncation=False)[0]["generated_text"]
response = response.split("Answer:")[1].strip()

print(f"------------------ Response ------------------\n{response}")

------------------ DENSE RETRIEVAL ----------------------

----------------- Length of the prompt -----------------
814 words
------------------------ Prompt ------------------------
Context:
Document 1: As at the time of writing, the global confirmed cases of the coronavirus disease (COVID-19) stands at 5,075,181 with 330,981 deaths and 1,936,331 recoveries1 . According to the Canter for Disease Control, the aged and individuals with compromised immune systems due to infections (i.e., human immunodeficiency viruses (HIV), etc.) are at higher risk of contracting the COVID-19. This article is protected by copyright. All rights reserved.
Document 2: The pathogenic coronavirus has been wreaking havoc worldwide since January. Infection with SARS-CoV-2 is problematic as no one has prior immunity, and no specific antiviral treatments are available. While many people with COVID-19 develop mild to moderate symptoms, some develop profound seemingly unchecked inflammatory responses leading to ac

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


------------------ Response ------------------
immunity

Explanation: The pathogenic coronavirus has been wreaking havoc worldwide since January. Infection with SARS-CoV-2 is problematic as no one has prior immunity, and no specific antiviral treatments are available. While many people with COVID-19 develop mild to moderate symptoms, some develop profound seemingly unchecked inflammatory responses leading to acute lung injury and hypoxemic respiratory failure, the most common cause for death.

This case series describes the characteristics of a cohort of patients who died of coronavirus disease 2019 in China.

At the population level, the virus-host relationship is not set up to end with the complete elimination of either or both. Pathogen-resistant individuals will always remain in the host population. In turn, the virus can never completely


#### Question-answering using SPARSE RETRIEVAL

In [18]:
print("------------------ SPARSE RETRIEVAL ----------------------\n")
context = "\n".join(sparse_retrieved_docs)
prompt = f"Context:\n{context}\n\nQuestion:\n{query_text}\n\nAnswer:"

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

# Generate response
response = lm_pipeline(prompt, 
                      max_new_tokens=150, 
                      temperature=0.1, 
                      truncation=False)[0]["generated_text"]

response = response.split("Answer:")[1].strip()
print(f"------------------ Response ------------------\n{response}")

------------------ SPARSE RETRIEVAL ----------------------

----------------- Length of the prompt -----------------
449 words
------------------------ Prompt ------------------------
Context:
Document 1: Those downplaying the coronavirus ignore our lack of immunity and vaccines
Document 2: Other reasons rather then absence of prior immunity could play a crucial role in the children coronavirus dilemma
Document 3: Despite various levels of preventive measures, in 2020 many countries have suffered severely from the coronavirus 2019 (COVID-19) pandemic caused by the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) virus. We show that population heterogeneity can significantly impact disease-induced immunity as the proportion infected in groups with the highest contact rates is greater than in groups with low contact rates. We estimate that if R(0) = 2.5 in an age-structured community with mixing rates fitted to social activity then the disease-induced herd immunity level can 

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


------------------ Response ------------------
1. The question is not clear. What is the question? What is the context? What is the problem? What is the goal? What is the hypothesis? What is the objective? What is the purpose? What is the aim? What is the objective? What is the hypothesis? What is the problem? What is the context? What is the goal? What is the objective? What is the purpose? What is the aim? What is the hypothesis? What is the problem? What is the context? What is the goal? What is the objective? What is the purpose? What is the aim? What is the hypothesis? What is the problem? What is the context? What is the goal? What is the objective? What


#### Question-answering using RANK FUSION

In [19]:
print("------------------ RANK FUSION ----------------------\n")
context = "\n".join(rank_retrieved_docs)
prompt = f"Context:\n{context}\n\nQuestion:\n{query_text}\n"

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

# Generate response
response = lm_pipeline(prompt, 
                      max_new_tokens=150, 
                      temperature=0.1, 
                      truncation=False)[0]["generated_text"]

response = response.split("Answer:")[1].strip()
print(f"------------------ Response ------------------\n{response}")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


------------------ RANK FUSION ----------------------

----------------- Length of the prompt -----------------
505 words
------------------------ Prompt ------------------------
Context:
Document 1: Those downplaying the coronavirus ignore our lack of immunity and vaccines
Document 2: Of the seven coronaviruses associated with disease in humans, SARS-CoV, MERS-CoV and SARS-CoV-2 cause considerable mortality but also share significant sequence homology, and potentially antigenic epitopes capable of inducing an immune response. The degree of similarity is such that perhaps prior exposure to one virus could confer partial immunity to another. Indeed, data suggests a considerable amount of cross-reactivity and recognition by the hosts immune response between different coronavirus infections. While the ongoing COVID-19 outbreak rapidly overwhelmed medical facilities of particularly Europe and North America, accounting for 78% of global deaths, only 8% of deaths have occurred in Asia where 

#### Question-answering using CASCADING RETRIEVAL

In [20]:
# ---- QUESTION-ANSWER USING CASCADING RETRIEVAL ----
print("------------------ CASCADING RETRIEVAL ----------------------\n")
context = "\n".join(cascading_retrieved_docs)
prompt = f"Context:\n{context}\n\nQuestion:\n{query_text}\n\nAnswer:"

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

# Generate response
response = lm_pipeline(prompt, 
                      max_new_tokens=150, 
                      temperature=0.7, 
                      truncation=False)[0]["generated_text"]

response = response.split("Answer:")[1].strip()
print(f"------------------ Response ------------------\n{response}")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


------------------ CASCADING RETRIEVAL ----------------------

----------------- Length of the prompt -----------------
449 words
------------------------ Prompt ------------------------
Context:
Document 1: Those downplaying the coronavirus ignore our lack of immunity and vaccines
Document 2: Other reasons rather then absence of prior immunity could play a crucial role in the children coronavirus dilemma
Document 3: Despite various levels of preventive measures, in 2020 many countries have suffered severely from the coronavirus 2019 (COVID-19) pandemic caused by the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) virus. We show that population heterogeneity can significantly impact disease-induced immunity as the proportion infected in groups with the highest contact rates is greater than in groups with low contact rates. We estimate that if R(0) = 2.5 in an age-structured community with mixing rates fitted to social activity then the disease-induced herd immunity level c

#### Question-answering WITH NO CONTEXT PROVIDED WITH RAG

In [21]:

print("------------------ RESPONSE WITHOUT RAG ----------------------\n")
prompt = f"""Question:\n{query_text}\n\nAnswer:"""

print(f"----------------- Length of the prompt -----------------\n{len(prompt.split())} words")
print(f"------------------------ Prompt ------------------------\n{prompt}")

response = lm_pipeline(prompt,
                      max_new_tokens=150,
                      temperature=0.1,
                      truncation=False)[0]["generated_text"]

response = response.split("Answer:")[1].strip()
print(f"------------------ Response ------------------\n{response}")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


------------------ RESPONSE WITHOUT RAG ----------------------

----------------- Length of the prompt -----------------
4 words
------------------------ Prompt ------------------------
Question:
coronavirus immunity

Answer:
------------------ Response ------------------
1. If a person is infected with coronavirus, he/she will be immune to it for 3 months. 2. If a person is infected with coronavirus, he/she will be immune to it for 3 months. 3. If a person is infected with coronavirus, he/she will be immune to it for 3 months. 4. If a person is infected with coronavirus, he/she will be immune to it for 3 months. 5. If a person is infected with coronavirus, he/she will be immune to it for 3 months. 6. If a person is infected with coronavirus, he/she will be immune to it for 3 months. 7. If a person is infected with coronavirus, he
