In [3]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
!mkdir data
!wget --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "data/llama2.pdf"

In [5]:
from pathlib import Path
from llama_index.readers.file import PyMuPDFReader

loader = PyMuPDFReader()
documents = loader.load(file_path="./data/llama2.pdf")

In [6]:
from dotenv import load_dotenv

load_dotenv()

True

In [7]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
embed_model = OpenAIEmbedding(model="text-embedding-3-small", embed_batch_size=256)

In [8]:
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
index = VectorStoreIndex.from_documents(
    documents, transformations=[splitter], emded_model=embed_model)


## Step1 : Query generation/ rewriting

The first step is to generate queries from the original query to better match the query intent, and increase precision/recall of the retrieved results. For instance, we might be able to rewrite the query into smaller queries.

We can do this by prompting ChatGPT.

In [9]:
from llama_index.core import PromptTemplate


In [None]:
query = "How do the models developed in this work compare to open-source chat models based on the benchmarks tested?"

In [11]:
queryGenPromptStr = (
    "You are a helpful assistant that generates multiple search queries based on a "
    "single input query. Generate {num_queries} search queries, one on each line, "
    "related to the following input query:\n"
    "Query: {query}\n"
    "Queries:\n"
)
QueryGenPrompt = PromptTemplate(queryGenPromptStr)

In [12]:
QueryGenPrompt

PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['num_queries', 'query'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='You are a helpful assistant that generates multiple search queries based on a single input query. Generate {num_queries} search queries, one on each line, related to the following input query:\nQuery: {query}\nQueries:\n')

In [13]:
def generateQueries(llm, query, numQueries=4):
    queryGenPrompt = queryGenPromptStr.format(num_queries=numQueries - 1, query=query)
    response = llm.complete(queryGenPrompt)
    queries = response.text.split("\n")
    return queries

In [15]:
queries = generateQueries(llm, query, numQueries=4)

In [16]:
queries

['1. Comparison of models developed in this work to open-source chat models in terms of benchmark performance',
 '2. Evaluation of open-source chat models against models developed in this work using benchmark tests',
 '3. Analysis of differences between models developed in this work and existing open-source chat models in benchmark evaluations']

### step 2: Perform vector search for each query 
Now we run retrieval for each query. This means that we fetch the top-k most relevant results from each vector store.

NOTE: We can also have multiple retrievers. Then the total number of queries we run is NM, where N is number of retrievers and M is number of generated queries. Hence there will also be NM retrieved lists.

In [25]:
from tqdm.asyncio import tqdm

async def runQuery(queries, retrivers):

    tasks = []
    for query in queries:
        for i, retriever in enumerate(retrivers):
            print(i, "retriver count")
            tasks.append(retriever.aretrieve(query))

    taskResults = await tqdm.gather(*tasks)
    resultsDict = {}
    for i, (query, queryResult) in enumerate(zip(queries, taskResults)):
        resultsDict[(query, i)] = queryResult

    return resultsDict

In [26]:
from llama_index.retrievers.bm25 import BM25Retriever

vector_retriever = index.as_retriever(similarity_top_k=2)

bm25_retriever = BM25Retriever.from_defaults(docstore=index.docstore, similarity_top_k=2)

In [27]:
results_dict = await runQuery(queries, [vector_retriever, bm25_retriever])

0 retriver count
1 retriver count
0 retriver count
1 retriver count
0 retriver count
1 retriver count


100%|██████████| 6/6 [00:00<00:00,  7.41it/s]


In [36]:
print(len(index.docstore.get_all_ref_doc_info()))

77
