In [1]:
from pathlib import Path
from tqdm.asyncio import tqdm

# Data Connector
from llama_index import SimpleDirectoryReader
# Index
from llama_index import VectorStoreIndex


# Llama Index LLM
from llama_index import ServiceContext
from llama_index import get_response_synthesizer
from llama_index import PromptTemplate

# Other LLM
from langchain.llms import OpenAI

# Retriever
from llama_index.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index.retrievers import BM25Retriever

# Embeddings
from sentence_transformers import SentenceTransformer
from langchain.embeddings import SentenceTransformerEmbeddings

# Display
from llama_index.response.notebook_utils import display_source_node


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import nest_asyncio
nest_asyncio.apply()

nest_asyncio.apply() patches the existing event loop in a Jupyter Notebook environment to allow nested usage of asyncio.

It is utilized later in the notebook to ensure that the asyncio event loop functions correctly within a Jupyter Notebook environment, enabling the concurrent execution of multiple asynchronous retrieval tasks without encountering event loop compatibility issues.

# LLM

In [3]:
# Initialize the SentenceTransformerEmbeddings with the loaded model
local_embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [4]:
# Initialize the Mistral model from LM studio server
llm = OpenAI(openai_api_key="NULL",temperature=0,openai_api_base="http://192.168.48.33:1234/v1")
# Initialize service context : LLM and Embeddings model for the vector store
service_context = ServiceContext.from_defaults(llm=llm, embed_model=local_embeddings)

  warn_deprecated(


# Import data

In [5]:
# Initialize the data connector/ reader. 
# SimpleDirectoryReader adapt to the document format.
reader = SimpleDirectoryReader(
    input_files=["thesis.pdf"]
)

documents  = reader.load_data()
print(f"Loaded {len(documents)} docs")

Loaded 138 docs


# Load in vector store

In [6]:
# Initialize a simple vector store index 
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context
)

# Define Advanced Retriever

1. Query generation/rewriting: generate multiple queries given the original user query

2. Perform retrieval for each query over an ensemble of retrievers.

3. Reranking/fusion: fuse results from all queries, and apply a reranking step to “fuse” the top relevant results!

## Query Generation/Rewriting
In this step we're creating the function to generate k different queries from the original

In [7]:
query_str = "explain me how we used langchain in the methodology?"

In [8]:
query_gen_prompt_str = (
    "You are a helpful assistant that generates multiple search queries based on a "
    "single input query. Generate {num_queries} search queries, one on each line, "
    "related to the following input query:\n"
    "Query: {query}\n"
    "Queries:\n"
)
query_gen_prompt = PromptTemplate(query_gen_prompt_str)

In [9]:
def generate_queries(llm, query_str: str, num_queries: int = 4):
    fmt_prompt = query_gen_prompt.format(
        num_queries=num_queries - 1, query=query_str # remove the original query
    )
    
    response = llm.generate([fmt_prompt])
    
    # Assuming there's only one generation in the response
    if response.generations and len(response.generations[0]) > 0:
        generation_text = response.generations[0][0].text
        queries = generation_text.split("\n")
        return queries
    else:
        return []
    

In [10]:
queries = generate_queries(llm, query_str, num_queries=4)

In [11]:
queries # The 3 queries generated from:  "explain to me how we used langchain in the methodology?"

['1. What is the role of Langchain in our methodology and how was it implemented?',
 '2. Can you provide an example of using Langchain in our research process?',
 '3. How does Langchain enhance collaboration and communication within our team during project execution?']

### More examples


In [12]:
t1 = generate_queries(llm, "who is the author of the paper", num_queries=4)
t2 = generate_queries(llm, "What are the conclusion of the research document", num_queries=4)
t3 = generate_queries(llm, "What is the self attention mechanism", num_queries=4)

In [13]:
t1

['1. Who wrote the specific paper with this title?',
 '2. Author name for the given paper publication.',
 '3. Identify the individual(s) that authored the mentioned paper.']

In [14]:
t2

['1. "Summary of findings in the research document"',
 '2. "Conclusions drawn from the research study"',
 '3. "Key takeaways from the research paper"']

In [15]:
t3

['1. How does the self attention mechanism work in deep learning?',
 '2. What are the benefits of using self attention mechanism in neural networks?',
 '3. Can you explain the mathematical formula for calculating self attention scores?']

## Perform Vector Search for Each Query

This code defines an asynchronous function run_queries to execute search queries using multiple retrieval methods. For each query, it asynchronously sends requests to each retriever (like a vector retriever and a BM25 retriever). The results are compiled into a dictionary, mapping each query and its position to the corresponding result.

In [16]:
async def run_queries(queries, retrievers):
    """
    Run queries against retrievers asynchronously.

    :param queries: A list of queries to be processed.
    :param retrievers: A list of retriever objects that will process the queries.
    :return: A dictionary mapping each query and its index to its corresponding result.
    """
    tasks = []
    for query in queries:
        # For each query, iterate over each retriever.
        for i, retriever in enumerate(retrievers):
            # For each retriever, create an asynchronous task to retrieve the query
            # and add it to the tasks list.
            tasks.append(retriever.aretrieve(query))

    task_results = await tqdm.gather(*tasks)

    results_dict = {}
     # Iterate over each pair of query and its result.
    for i, (query, query_result) in enumerate(zip(queries, task_results)):
        # Map each query and its index to its result in the dictionary.
        results_dict[(query, i)] = query_result

    return results_dict

In [17]:
# vector retriever
vector_retriever = index.as_retriever(similarity_top_k=2)

In [18]:
# bm25 retriever
bm25_retriever = BM25Retriever.from_defaults(
    docstore=index.docstore, similarity_top_k=2
)

BM25 also known as the Okapi BM25, is a ranking function used in information retrieval systems to estimate the relevance of documents to a given search query.

A bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document, regardless of their proximity within the document. [Wiki here](https://en.wikipedia.org/wiki/Okapi_BM25#:~:text=BM25%20is%20a%20bag%2Dof,their%20proximity%20within%20the%20document.)


In [19]:
results_dict = await run_queries(queries, [vector_retriever, bm25_retriever])

100%|██████████| 6/6 [00:00<00:00, 150.47it/s]


The function is called with a set of queries and two specific retrievers, vector_retriever and bm25_retriever, to fetch and collate their results asynchronously.

### more examples

In [20]:
r1 = await run_queries(t1, [vector_retriever, bm25_retriever])
r2 = await run_queries(t2, [vector_retriever, bm25_retriever])
r3 = await run_queries(t3, [vector_retriever, bm25_retriever])

100%|██████████| 6/6 [00:00<00:00, 177.06it/s]
100%|██████████| 6/6 [00:00<00:00, 200.67it/s]
100%|██████████| 6/6 [00:00<00:00, 207.50it/s]


## Perform Fusion

The next step here is to perform fusion: combining the results from several retrievers into one and re-ranking.

Note that a given node might be retrieved multiple times from different retrievers, so there needs to be a way to de-dup and rerank the node given the multiple retrievals.

This stage perform “reciprocal rank fusion”: for each node, add up its reciprocal rank in every list where it’s retrieved.

Then reorder nodes by highest score to least.

Full paper [here](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf)

In [21]:
# Llama index function 
def fuse_results(results_dict, similarity_top_k: int = 2):
    """Fuse results."""
    k = 60.0  # `k` is a parameter used to control the impact of outlier rankings.
    fused_scores = {}
    text_to_node = {}

    # compute reciprocal rank scores
    for nodes_with_scores in results_dict.values():
        for rank, node_with_score in enumerate(
            sorted(
                nodes_with_scores, key=lambda x: x.score or 0.0, reverse=True
            )
        ):
            text = node_with_score.node.get_content()
            text_to_node[text] = node_with_score
            if text not in fused_scores:
                fused_scores[text] = 0.0
            fused_scores[text] += 1.0 / (rank + k)

    # sort results
    reranked_results = dict(
        sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    )

    # adjust node scores
    reranked_nodes: List[NodeWithScore] = []
    for text, score in reranked_results.items():
        reranked_nodes.append(text_to_node[text])
        reranked_nodes[-1].score = score

    return reranked_nodes[:similarity_top_k]

In [22]:
final_results = fuse_results(results_dict)

In [23]:
for n in final_results:
    display_source_node(n, source_length=500)

**Node ID:** 86988f55-dea7-490c-84a0-041ef2f2564f<br>**Similarity:** 0.03333333333333333<br>**Text:** 60 
Nathan Destrez  straightforward tasks to complex operations.  In addition to its customizable nature, LangChain also 
provides pre -built chains. These are pre -assembled components designed for specific tasks, enabling 
developers to quickly start projects. For more intricate and unique applications, the framework's 
modular nature allows for the creation of customized chains, offering a balance between convenience 
and personalization. LangChain's design caters to a diverse range of use...<br>

**Node ID:** c09a2f19-a6a4-43c8-9d28-2ac06484d376<br>**Similarity:** 0.03278688524590164<br>**Text:** 59 
Nathan Destrez  1.10 Virtual Assistants in Industry and Academia  
1.10.1  The Role of LangChain and Emerging Trends  
Virtual assistants have emerged as a pivotal innovation, transforming interactions between 
humans and machines. This literature review delves into the multifaceted world of virtual assistants, 
examining their development and application across industry and  academia. By exploring the existing 
landscape of these digital aides, this section aims to shed light on the prog...<br>

### More examples

In [24]:
fr1 = fuse_results(r1)
fr2 = fuse_results(r2)
fr3 = fuse_results(r3)

In [25]:
for n in fr1:
    display_source_node(n, source_length=500)

**Node ID:** fc6f78a0-7ee2-4394-b58d-f1fa4e51fa84<br>**Similarity:** 0.03306010928961749<br>**Text:** 4 
Nathan Destrez<br>

**Node ID:** 735f970d-ea5b-45c7-ad68-91c2f0161382<br>**Similarity:** 0.016666666666666666<br>**Text:** 89 
Nathan Destrez  An initial study comparing the ratio of stop words to total words was conducted, but it did not yield 
significant patterns. Consequently, most short texts, predominantly composed of stop words, were 
excluded from further processing.  
A key observation during our initial explorations with the embeddings base was the retriever's 
occasional struggle with implicit concepts or unique terminologies present in only a few documents.  
 
Figure 6 Document retrieved from the Sky...<br>

In [26]:
for n in fr2:
    display_source_node(n, source_length=500)

**Node ID:** 87c6da31-42aa-4a63-a0ae-741f8702d80f<br>**Similarity:** 0.03333333333333333<br>**Text:** 5 
Nathan Destrez   
Contents   
Introduction  ................................ ................................ ................................ ................................  9 
Literature Review  ................................ ................................ ................................ ......................  11 
1.1 Historical evolution of chatbots and virtual assistants.  ................................ .................  11 
1.2 AI in France and the Regulation in Europe  ......<br>

**Node ID:** 0e24cd10-6edf-43b9-a19a-110b50652db9<br>**Similarity:** 0.016666666666666666<br>**Text:** 49 
Nathan Destrez  1.7.5 Transformers and their role in representing longer textual data.  
BERT's ability to understand context has naturally extended the use of embeddings from 
individual words to entire sentences or even longer texts. Sentence Transformers, as discussed in the 
article "Understanding BERT" on Towards AI, take this concept furth er by providing mechanisms to 
derive meaningful sentence -level embeddings. These embeddings can then be used in various NLP 
tasks, such as sem...<br>

In [27]:
for n in fr3:
    display_source_node(n, source_length=500)

**Node ID:** d0f6f609-996f-4f96-a22b-57625e6bf31b<br>**Similarity:** 0.03333333333333333<br>**Text:** 42 
Nathan Destrez  1.7.3 The Attention mechanism  
The concept of Attention within the domain of neural networks has garnered significant interest 
due to its remarkable impact on enhancing state -of-the-art results across various research fields. This 
includes areas as diverse as image captioning, language translation, and interactive question 
answering. Attention has rapidly ascended to become an indispensable instrument in the researcher's 
toolkit. The assertion by some in the field th...<br>

**Node ID:** 13d442ca-fcab-495a-8a42-ecb1ccb764af<br>**Similarity:** 0.03278688524590164<br>**Text:** 46 
Nathan Destrez  model to 'focus' on the information that is most predictive of the desired outcome. This geometric 
reconfiguration is pivotal in enhancing the model's performance by ensuring that it attends to the 
most salient features within the data.  
The burgeoning field of research has begun to refer to this mechanism as "Memory," positing that this 
term more aptly describes its functionality. The Attention layer facilitates the model's ability to "recall" 
and focus on previously...<br>

# Plug into RetrieverQueryEngine

In [28]:
from llama_index import QueryBundle
from llama_index.retrievers import BaseRetriever
from typing import Any, List
from llama_index.schema import NodeWithScore

from llama_index.query_engine import RetrieverQueryEngine

In [29]:
class FusionRetriever(BaseRetriever):
    """Ensemble retriever with fusion."""

    def __init__(
        self,
        llm,
        retrievers: List[BaseRetriever],
        similarity_top_k: int = 2,
    ) -> None:
        """Init params."""
        self.llm = llm  # Store the llm instance
        self._retrievers = retrievers
        self._similarity_top_k = similarity_top_k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve."""
        queries = generate_queries(self.llm, query_str, num_queries=4)  # Use the llm instance
        results = run_queries(queries, self._retrievers)
        final_results = fuse_results(
            results_dict, similarity_top_k=self._similarity_top_k
        )

        return final_results

In [30]:
llm = OpenAI(openai_api_key="NULL",temperature=0,openai_api_base="http://192.168.48.33:1234/v1")
service_context = ServiceContext.from_defaults(llm=llm, embed_model=local_embeddings)

In [31]:
fusion_retriever = FusionRetriever(
    llm, [vector_retriever, bm25_retriever], similarity_top_k=2
)

response_synthesizer= get_response_synthesizer(service_context,streaming=True) # streaming False for classic answer generation

In [32]:
# Initialize the RetrieverQueryEngine
query_engine = RetrieverQueryEngine.from_args(
    retriever=fusion_retriever,
    response_synthesizer=response_synthesizer,
    service_context=service_context, 
    streaming=True # streaming False for classic answer generation
)

In [33]:
streaming_response = query_engine.query(
    "Tell me about the document",
)

streaming_response.print_response_stream()

  nodes = self._retrieve(query_bundle)
  warn_deprecated(


 The document appears to be a literature review on virtual assistants, focusing on their development and application in industry and academia. It highlights LangChain as a groundbreaking tool for simplifying the integration and application of Large Language Models (LLMs) in both commercial and academic settings. LangChain's impact is discussed in terms of its ability to create context-aware applications, enhance reasoning capabilities, and offer modular components for customization. The document also mentions LangChain's collaboration with Retrieval Augmented Generation (RAG) and its role in streamlining the creation of advanced virtual assistant applications.

In [34]:
streaming_response = query_engine.query(
    "What are the conclusions for the future of the tool in the company",
)

streaming_response.print_response_stream()

  nodes = self._retrieve(query_bundle)


 The literature review highlights LangChain as a groundbreaking tool that significantly simplifies the integration and application of Large Language Models (LLMs) in both commercial and academic settings. Its impact is multifaceted and profound, enabling developers to create context-aware applications with enhanced reasoning capabilities. LangChain's modular architecture offers versatility and customization options, making it essential for tailoring applications to meet a range of requirements. The tool's ability to streamline the deployment and enhance the accessibility of advanced virtual assistants is crucial for accelerating their adoption in various industries.