# Hybrid Search in RAG
### Naive Retrieval + Keyword Search

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [2]:
# sample documents
documents=[
    'this is a list containing sample documents.',
    'keywords are important for keyword-based search!',
    'Document analysis involves extracting keywords.',
    'Keyword-based search relies on sparse embeddings.'
]

In [3]:
import re

def preprocess_text(text):
  text=text.lower()
  # remove punctuation
  text=re.sub(r'[^\w\s]','', text)
  return text

In [4]:
query="keyword-based search"

In [5]:
preprocessed_text=[preprocess_text(doc) for doc in documents]
preprocessed_text

['this is a list containing sample documents',
 'keywords are important for keywordbased search',
 'document analysis involves extracting keywords',
 'keywordbased search relies on sparse embeddings']

In [6]:
query=preprocess_text(query)
query

'keywordbased search'

In [7]:
vector=TfidfVectorizer()

In [8]:
X=vector.fit_transform(preprocessed_text)
X.toarray()

array([[0.        , 0.        , 0.40824829, 0.        , 0.40824829,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.40824829, 0.        , 0.        , 0.40824829, 0.        ,
        0.        , 0.40824829, 0.        , 0.        , 0.40824829],
       [0.        , 0.4533864 , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.4533864 , 0.4533864 , 0.        ,
        0.        , 0.35745504, 0.35745504, 0.        , 0.        ,
        0.        , 0.        , 0.35745504, 0.        , 0.        ],
       [0.46516193, 0.        , 0.        , 0.46516193, 0.        ,
        0.        , 0.46516193, 0.        , 0.        , 0.46516193,
        0.        , 0.        , 0.36673901, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.43671931, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.34431452, 0.        , 0

In [9]:
query_embed=vector.transform([query])
query_embed.toarray()

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.70710678, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.70710678, 0.        , 0.        ]])

In [10]:
similarities=cosine_similarity(X,query_embed)
similarities

array([[0.        ],
       [0.50551777],
       [0.        ],
       [0.48693426]])

In [11]:
import numpy as np

In [12]:
ranked_indices=np.argsort(similarities, axis=0)[::-1].flatten()
ranked_indices

array([1, 3, 2, 0])

In [13]:
ranked_docs=[documents[i] for i in ranked_indices]

In [14]:
# printing ranked docs
for i,doc in enumerate(ranked_docs):
  print(f"Rank{i+1} : {doc}")

Rank1 : keywords are important for keyword-based search!
Rank2 : Keyword-based search relies on sparse embeddings.
Rank3 : Document analysis involves extracting keywords.
Rank4 : this is a list containing sample documents.


In [15]:
!pip install -q -U pypdf langchain langchain-community

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/310.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.5/2.5 MB[0m [31m117.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m48.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/444.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m444.0/444.0 kB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [17]:
file_path=r"/content/RAG for NLP tasks.pdf"

In [18]:
from langchain_community.document_loaders import PyPDFLoader

loader=PyPDFLoader(file_path, extract_images=True)

docs=loader.load()

In [19]:
docs[0].page_content

'Retrieval-Augmented Generation for\nKnowledge-Intensive NLP Tasks\nPatrick Lewis†‡, Ethan Perez⋆,\nAleksandra Piktus†, Fabio Petroni†, Vladimir Karpukhin†, Naman Goyal†, Heinrich Küttler†,\nMike Lewis†, Wen-tau Yih†, Tim Rocktäschel†‡, Sebastian Riedel†‡, Douwe Kiela†\n†Facebook AI Research; ‡University College London; ⋆New York University;\nplewis@fb.com\nAbstract\nLarge pre-trained language models have been shown to store factual knowledge\nin their parameters, and achieve state-of-the-art results when ﬁne-tuned on down-\nstream NLP tasks. However, their ability to access and precisely manipulate knowl-\nedge is still limited, and hence on knowledge-intensive tasks, their performance\nlags behind task-speciﬁc architectures. Additionally, providing provenance for their\ndecisions and updating their world knowledge remain open research problems. Pre-\ntrained models with a differentiable access mechanism to explicit non-parametric\nmemory have so far been only investigated for extract

In [20]:
len(docs)

19

In [21]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

text_chunks=text_splitter.split_documents(docs)
len(text_chunks)

158

In [22]:
def clean_metadata(documents):
    cleaned_docs = []
    for doc in documents:
        new_meta = {}
        for k, v in doc.metadata.items():
            clean_key = k.replace(".", "_").replace("-", "_")
            new_meta[clean_key] = v
        doc.metadata = new_meta
        cleaned_docs.append(doc)
    return cleaned_docs

text_chunks = clean_metadata(text_chunks)

In [23]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-1.0.20-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.9 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.36.0-py3-none-any.whl.metadata (2.4 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Downloading PyPika-0.48.9.tar.gz (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [

In [24]:
from langchain_community.vectorstores import Chroma

In [25]:
!pip install langchain_openai

Collecting langchain_openai
  Downloading langchain_openai-0.3.32-py3-none-any.whl.metadata (2.4 kB)
Downloading langchain_openai-0.3.32-py3-none-any.whl (74 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.5/74.5 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain_openai
Successfully installed langchain_openai-0.3.32


In [26]:
from google.colab import userdata

In [27]:
import os
os.environ["OPENAI_API_KEY"]=userdata.get("openai_api_key")

In [28]:
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

# Initialize embeddings correctly
embeddings = OpenAIEmbeddings()

In [29]:
# Use Chroma in local mode with persist_directory
vectorstore = Chroma.from_documents(
    documents=text_chunks,
    embedding=embeddings,
    persist_directory="./chroma_db"  # ensure persistence
)


In [30]:
vector_retriever=vectorstore.as_retriever(search_kwargs={"k":3}) # Dense vector

In [31]:
!pip install rank_bm25 # to perform keyword search it is necessary updadte version of TfIDFVectorizer

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


# ***BM25Retriever***
### OpenSearchBM25Retriever is a keyword-based Retriever that fetches Documents matching a query from an OpenSearchDocumentStore . It determines the similarity between Documents and the query based on the BM25 algorithm, which computes a weighted word overlap between the two strings.
### It is a ranking function used in information retrieval systems to estimate the relevance of documents to a given search query
### fundamental improvement of BM25 compared to traditional TF-IDF is that BM25 takes the document length into account. With BM25, a 10-word document that contains one keyword would be a stronger candidate than a 1000-word document that contains 10 keywords.

## Is BM25 dense or sparse?
### Sparse retrieval methods, like TF-IDF or BM25, represent text as high-dimensional vectors where most dimensions are zero, encoding the presence or absence of specific words.



In [32]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever

In [33]:
keyword_retriever=BM25Retriever.from_documents(text_chunks)

In [34]:
keyword_retriever.k=3

# Mixing vector search and keyword search for hybrid search

## hybrid_score=(1-alpha)\*sparse_vector(BM25Retriever) + alpha\*dense_vector(OpenAIEmbeddings)

In [35]:
retriever=EnsembleRetriever(retrievers=[vector_retriever,keyword_retriever],weights=[.3,.7])

In [36]:
retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7c8482faf770>, search_kwargs={'k': 3}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x7c8481df6c30>, k=3)], weights=[0.3, 0.7])

# **Getting model for Better Response**

In [37]:
!pip install bitsandbytes accelerate

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0


In [38]:
model_name="HuggingFaceH4/zephyr-7b-beta"

In [39]:
import torch
from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline)
from langchain import HuggingFacePipeline

In [40]:
### Function to load 4-bit quantized model
def loading_quantized_model(model):
  bnb_config=BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bfloat16
  )

  model=AutoModelForCausalLM.from_pretrained(
      model_name,
      torch_dtype=torch.bfloat16,
      quantization_config=bnb_config
  )

  return model

In [41]:
### initializing Tokenizer
def getting_tokenizer(model_name):
  tokenizer=AutoTokenizer.from_pretrained(model_name, return_token_type_ids=False)
  tokenizer.bos_token_id=1 # setting begging of sentence id
  return tokenizer

In [42]:
tokenizer=getting_tokenizer(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [43]:
tokenizer

LlamaTokenizerFast(name_or_path='HuggingFaceH4/zephyr-7b-beta', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='left', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>', 'additional_special_tokens': ['<unk>', '<s>', '</s>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

In [44]:
model=loading_quantized_model(model_name)

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [45]:
pipeline=pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    use_cache=True,
    device_map='auto',
    max_length=2048,
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

Device set to use cuda:0


In [46]:
llm=HuggingFacePipeline(pipeline=pipeline)

  llm=HuggingFacePipeline(pipeline=pipeline)


## Creating chain

In [47]:
from langchain.chains import RetrievalQA

In [48]:
normal_chain=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_retriever
)

In [49]:
hybrid_chain=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

## Generating Response with Normal Retrieval

In [50]:
response1=normal_chain.invoke("what is RAG token model?")

In [51]:
response1

{'query': 'what is RAG token model?',
 'result': 'Use the following pieces of context to answer the question at the end. If you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\nblob/master/examples/rag/README.md and an interactive demo of a RAG model can be found\nat https://huggingface.co/rag/\n2https://github.com/pytorch/fairseq\n3https://github.com/huggingface/transformers\n17\n\nto predict each target token. The second approach, RAG-Token, can predict each target token based\non a different document. In the following, we formally introduce both models and then describe the\npη and pθ components, as well as the training and decoding procedure.\n2.1 Models\nRAG-Sequence Model The RAG-Sequence model uses the same retrieved document to generate\nthe complete sequence. Technically, it treats the retrieved document as a single latent variable that\n\nBART were factual in a further 17% of cases, clearly demonstrating the effectiveness of RAG on\nt

In [52]:
print(response1.get("result"))

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

blob/master/examples/rag/README.md and an interactive demo of a RAG model can be found
at https://huggingface.co/rag/
2https://github.com/pytorch/fairseq
3https://github.com/huggingface/transformers
17

to predict each target token. The second approach, RAG-Token, can predict each target token based
on a different document. In the following, we formally introduce both models and then describe the
pη and pθ components, as well as the training and decoding procedure.
2.1 Models
RAG-Sequence Model The RAG-Sequence model uses the same retrieved document to generate
the complete sequence. Technically, it treats the retrieved document as a single latent variable that

BART were factual in a further 17% of cases, clearly demonstrating the effectiveness of RAG on
the task over a state-of-the-art generation model. Evaluators also ﬁnd

## Generating Response with Hybrid Retrieval

In [53]:
response2=hybrid_chain.invoke("what is RAG token model?")

In [54]:
response2

{'query': 'what is RAG token model?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nthe parameters of a language model? arXiv e-prints, 2020. URL https://arxiv.org/abs/\n2002.08910.\n[53] Stephen Robertson and Hugo Zaragoza. The probabilistic relevance framework: Bm25 and\nbeyond. Found. Trends Inf. Retr., 3(4):333–389, April 2009. ISSN 1554-0669. doi: 10.1561/\n1500000019. URL https://doi.org/10.1561/1500000019.\n[54] Irene Solaiman, Miles Brundage, Jack Clark, Amanda Askell, Ariel Herbert-V oss, Jeff Wu, Alec\n\nMS-\nMARCO\ndeﬁne middle\near\nBART ?The middle ear is the part of the ear between the middle ear and the nose.\nRAG-T The middle ear is the portion of the ear internal to the eardrum.\nRAG-S The middle ear includes the tympanic cavity and the three ossicles.\nwhat currency\nneeded in\nscotland\nBART The currency needed in Scotland is Pound sterl

In [55]:
print(response2.get("result"))

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

the parameters of a language model? arXiv e-prints, 2020. URL https://arxiv.org/abs/
2002.08910.
[53] Stephen Robertson and Hugo Zaragoza. The probabilistic relevance framework: Bm25 and
beyond. Found. Trends Inf. Retr., 3(4):333–389, April 2009. ISSN 1554-0669. doi: 10.1561/
1500000019. URL https://doi.org/10.1561/1500000019.
[54] Irene Solaiman, Miles Brundage, Jack Clark, Amanda Askell, Ariel Herbert-V oss, Jeff Wu, Alec

MS-
MARCO
deﬁne middle
ear
BART ?The middle ear is the part of the ear between the middle ear and the nose.
RAG-T The middle ear is the portion of the ear internal to the eardrum.
RAG-S The middle ear includes the tympanic cavity and the three ossicles.
what currency
needed in
scotland
BART The currency needed in Scotland is Pound sterling.
RAG-T Pound is the currency needed in Scotland.
RAG-S The curren

## Generating Response with Normal Retrieval

In [56]:
response1=normal_chain.invoke("what is Abstractive question answering ?")

In [57]:
response1

{'query': 'what is Abstractive question answering ?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nQuestions (NQ) [29], TriviaQA (TQA) [24]. WebQuestions (WQ) [3] and CuratedTrec (CT) [2]. As\nCT and WQ are small, we follow DPR [26] by initializing CT and WQ models with our NQ RAG\nmodel. We use the same train/dev/test splits as prior work [ 31, 26] and report Exact Match (EM)\nscores. For TQA, to compare with T5 [52], we also evaluate on the TQA Wiki test set.\n3.2 Abstractive Question Answering\nRAG models can go beyond simple extractive QA and answer questions with free-form, abstractive\n\nhttp://arxiv.org/abs/1412.6980.\n[29] Tom Kwiatkowski, Jennimaria Palomaki, Olivia Redﬁeld, Michael Collins, Ankur Parikh,\nChris Alberti, Danielle Epstein, Illia Polosukhin, Matthew Kelcey, Jacob Devlin, Ken-\nton Lee, Kristina N. Toutanova, Llion Jones, Ming-Wei C

In [58]:
print(response1.get("result"))

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Questions (NQ) [29], TriviaQA (TQA) [24]. WebQuestions (WQ) [3] and CuratedTrec (CT) [2]. As
CT and WQ are small, we follow DPR [26] by initializing CT and WQ models with our NQ RAG
model. We use the same train/dev/test splits as prior work [ 31, 26] and report Exact Match (EM)
scores. For TQA, to compare with T5 [52], we also evaluate on the TQA Wiki test set.
3.2 Abstractive Question Answering
RAG models can go beyond simple extractive QA and answer questions with free-form, abstractive

http://arxiv.org/abs/1412.6980.
[29] Tom Kwiatkowski, Jennimaria Palomaki, Olivia Redﬁeld, Michael Collins, Ankur Parikh,
Chris Alberti, Danielle Epstein, Illia Polosukhin, Matthew Kelcey, Jacob Devlin, Ken-
ton Lee, Kristina N. Toutanova, Llion Jones, Ming-Wei Chang, Andrew Dai, Jakob
Uszkoreit, Quoc Le, and Slav Petrov. Natural Questions

## Generating Response with Hybrid Retrieval

In [60]:
response2=hybrid_chain.invoke("what is Abstractive question answering ?")

In [62]:
response2

{'query': 'what is Abstractive question answering ?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nfact checking [ 56], fact completion [ 48], long-form question answering [ 12], Wikipedia article\ngeneration [36], dialogue [ 41, 65, 9, 13], translation [ 17], and language modeling [ 19, 27]. Our\nwork uniﬁes previous successes in incorporating retrieval into individual tasks, showing that a single\nretrieval-based architecture is capable of achieving strong performance across several tasks.\n8\n\nNavigable Small World approximation for fast retrieval [37]. During training, we retrieve the top\nkdocuments for each query. We consider k∈{5,10}for training and set kfor test time using dev\ndata. We now discuss experimental details for each task.\n3.1 Open-domain Question Answering\nOpen-domain question answering (QA) is an important real-world application an

In [63]:
print(response2.get("result"))

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

fact checking [ 56], fact completion [ 48], long-form question answering [ 12], Wikipedia article
generation [36], dialogue [ 41, 65, 9, 13], translation [ 17], and language modeling [ 19, 27]. Our
work uniﬁes previous successes in incorporating retrieval into individual tasks, showing that a single
retrieval-based architecture is capable of achieving strong performance across several tasks.
8

Navigable Small World approximation for fast retrieval [37]. During training, we retrieve the top
kdocuments for each query. We consider k∈{5,10}for training and set kfor test time using dev
data. We now discuss experimental details for each task.
3.1 Open-domain Question Answering
Open-domain question answering (QA) is an important real-world application and common testbed
for knowledge-intensive tasks [20]. We treat questions and an