In [2]:
import os
from dotenv import load_dotenv

# importing the libarires for loading the data and creating the document nodes
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import SimpleDirectoryReader, SummaryIndex, VectorStoreIndex

# For defining the llm model and embeddings to use
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# For creating the query engine tools
from llama_index.core.tools import QueryEngineTool

# For creating the query engine routers
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector

In [3]:
openai_api_key = os.getenv("OPENAI_API_KEY")
load_dotenv()

if openai_api_key:
    os.environ["OPENAI_API_KEY"] = openai_api_key
    print("OpenAI API key has been set.")
else:
    print("Error: OpenAI API key not found in .env file.")

OpenAI API key has been set.


# Loading the PDF document

In [4]:
document_path = '/Users/ishadave/Documents/Priyesh/Repositories/Generative AI/langchain-projects/Advance RAG/Datasets/'
directory_reader_obj = SimpleDirectoryReader(document_path)
loaded_document = directory_reader_obj.load_data()
loaded_document

[Document(id_='13b938ef-57d1-434e-b798-5905965d6d3c', embedding=None, metadata={'page_label': '1', 'file_name': 'attention.pdf', 'file_path': '/Users/ishadave/Documents/Priyesh/Repositories/Generative AI/langchain-projects/Advance RAG/Datasets/attention.pdf', 'file_type': 'application/pdf', 'file_size': 2215244, 'creation_date': '2024-07-05', 'last_modified_date': '2024-07-05'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip

In [5]:
len(loaded_document)

15

In [6]:
loaded_document[0].metadata

{'page_label': '1',
 'file_name': 'attention.pdf',
 'file_path': '/Users/ishadave/Documents/Priyesh/Repositories/Generative AI/langchain-projects/Advance RAG/Datasets/attention.pdf',
 'file_type': 'application/pdf',
 'file_size': 2215244,
 'creation_date': '2024-07-05',
 'last_modified_date': '2024-07-05'}

# Creating the nodes from the loaded document

In [7]:
sentence_splitter = SentenceSplitter(chunk_size=1000,
                                     chunk_overlap=200)

In [8]:
document_nodes = sentence_splitter.get_nodes_from_documents(loaded_document)
print(f'Splitted the documents in {len(document_nodes)} nodes')

Splitted the documents in 17 nodes


In [9]:
document_nodes

[TextNode(id_='92423281-cca9-4b57-a1ea-d0f07b0a81eb', embedding=None, metadata={'page_label': '1', 'file_name': 'attention.pdf', 'file_path': '/Users/ishadave/Documents/Priyesh/Repositories/Generative AI/langchain-projects/Advance RAG/Datasets/attention.pdf', 'file_type': 'application/pdf', 'file_size': 2215244, 'creation_date': '2024-07-05', 'last_modified_date': '2024-07-05'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='13b938ef-57d1-434e-b798-5905965d6d3c', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'attention.pdf', 'file_path': '/Users/ishadave/Documents/Priyesh/Repositories/Generative AI/langchain-projects/Advance RAG/Datasets/attention.pdf', 'file_type': 'appli

# Defining the OpenAI model and embeddings

In [10]:
llm_model = OpenAI('gpt-3.5-turbo')

In [11]:
llm_embeddings = OpenAIEmbedding(model='text-embedding-ada-002')

# Creating summary and vector indexes

In [12]:
summary_index = SummaryIndex(document_nodes)
summary_index

<llama_index.core.indices.list.base.SummaryIndex at 0x7fdc691cdb80>

In [13]:
vector_index =  VectorStoreIndex(document_nodes)
vector_index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7fdc98c9b310>

# Query Engine Tools and Router Query Engines 

## Defining the query engines

In [129]:
summary_query_engine = summary_index.as_query_engine(use_async=True)
summary_query_engine

<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x7fdc79014700>

In [130]:
vector_query_engine = vector_index.as_query_engine()
vector_query_engine

<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x7fdc79032310>

## Defining the tools

In [131]:
summary_tool = QueryEngineTool.from_defaults(query_engine = summary_query_engine,
                               description = 'This tool if for summarization task')
summary_tool

<llama_index.core.tools.query_engine.QueryEngineTool at 0x7fdc79032820>

In [132]:
vector_tool = QueryEngineTool.from_defaults(query_engine = vector_query_engine,
                               description = 'This tool if for vector search task form the indexes')
vector_tool

<llama_index.core.tools.query_engine.QueryEngineTool at 0x7fdc79032af0>

## Defining the routers

In [133]:
router = RouterQueryEngine(selector = LLMSingleSelector.from_defaults(),
                           query_engine_tools = [summary_tool, vector_tool],
                           verbose = True)
router

<llama_index.core.query_engine.router_query_engine.RouterQueryEngine at 0x7fdc79022af0>

In [134]:
response = router.query("what is the summary of this paper?")
print(response)

[1;3;38;5;200mSelecting query engine 0: The paper is focused on summarization task.
[0mThe paper introduces the Transformer model, a network architecture based solely on attention mechanisms, eliminating the need for recurrent or convolutional layers. The Transformer model is designed for sequence transduction tasks and consists of stacked self-attention and fully connected layers for both the encoder and decoder. It allows for more parallelization, faster training, and achieves state-of-the-art results in machine translation tasks. The paper also discusses the importance of self-attention, model architecture variations, training details, and generalization to tasks like English constituency parsing. The Transformer model outperforms previous models in translation tasks and shows promising results in constituency parsing as well.


In [135]:
# response = router.query("what is the attention is all you need?")
# print(response)

In [136]:
response = router.query("Who wrote this paper?")
print(response)

[1;3;38;5;200mSelecting query engine 0: The tool mentioned is more likely to be used for summarization tasks, which could potentially include summarizing a paper and identifying the author..
[0mAshish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, and Illia Polosukhin wrote this paper.


In [137]:
# response = router.query("Who wrote this paper? use the query tool not summarization")
# print(response)

In [138]:
# response = router.query("Give the names of the writers of this paper")
# print(response)

In [139]:
response.metadata

{'92423281-cca9-4b57-a1ea-d0f07b0a81eb': {'page_label': '1',
  'file_name': 'attention.pdf',
  'file_path': '/Users/ishadave/Documents/Priyesh/Repositories/Generative AI/langchain-projects/Advance RAG/Datasets/attention.pdf',
  'file_type': 'application/pdf',
  'file_size': 2215244,
  'creation_date': '2024-07-05',
  'last_modified_date': '2024-07-05'},
 'f1ccf2df-c359-4f55-8e46-7040290529fb': {'page_label': '2',
  'file_name': 'attention.pdf',
  'file_path': '/Users/ishadave/Documents/Priyesh/Repositories/Generative AI/langchain-projects/Advance RAG/Datasets/attention.pdf',
  'file_type': 'application/pdf',
  'file_size': 2215244,
  'creation_date': '2024-07-05',
  'last_modified_date': '2024-07-05'},
 '9f043a94-85e6-434a-a97c-3791fd1323f8': {'page_label': '3',
  'file_name': 'attention.pdf',
  'file_path': '/Users/ishadave/Documents/Priyesh/Repositories/Generative AI/langchain-projects/Advance RAG/Datasets/attention.pdf',
  'file_type': 'application/pdf',
  'file_size': 2215244,
  'c

# Auto Retrieval Tools

In [26]:
from llama_index.core.vector_stores import MetadataFilters

In [109]:
print(document_nodes[7].metadata)
print()
print(document_nodes[7].text)

{'page_label': '8', 'file_name': 'attention.pdf', 'file_path': '/Users/ishadave/Documents/Priyesh/Repositories/Generative AI/langchain-projects/Advance RAG/Datasets/attention.pdf', 'file_type': 'application/pdf', 'file_size': 2215244, 'creation_date': '2024-07-05', 'last_modified_date': '2024-07-05'}

Table 2: The Transformer achieves better BLEU scores than previous state-of-the-art models on the
English-to-German and English-to-French newstest2014 tests at a fraction of the training cost.
ModelBLEU Training Cost (FLOPs)
EN-DE EN-FR EN-DE EN-FR
ByteNet [18] 23.75
Deep-Att + PosUnk [39] 39.2 1.0·1020
GNMT + RL [38] 24.6 39.92 2.3·10191.4·1020
ConvS2S [9] 25.16 40.46 9.6·10181.5·1020
MoE [32] 26.03 40.56 2.0·10191.2·1020
Deep-Att + PosUnk Ensemble [39] 40.4 8.0·1020
GNMT + RL Ensemble [38] 26.30 41.16 1.8·10201.1·1021
ConvS2S Ensemble [9] 26.36 41.29 7.7·10191.2·1021
Transformer (base model) 27.3 38.1 3.3·1018
Transformer (big) 28.4 41.8 2.3·1019
Residual Dropout We apply dropout [ 33] 

In [124]:
metadata_dict = {
    'key': 'page_label',
    'value' : '8'
}
metadata_filter = MetadataFilters.from_dicts([metadata_dict])
metadata_filter

MetadataFilters(filters=[MetadataFilter(key='page_label', value='8', operator=<FilterOperator.EQ: '=='>)], condition=<FilterCondition.AND: 'and'>)

In [125]:
page_specific_vector_qe = vector_index.as_query_engine(similality_top_k = 2,
                                                       filters = metadata_filter)
page_specific_vector_qe

<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x7fdc694e0ac0>

In [126]:
page_specific_summary_qe = summary_index.as_query_engine(similality_top_k = 2,
                                                       filters = metadata_filter)
page_specific_summary_qe

<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x7fdc7901da00>

In [110]:
response = page_specific_vector_qe.query('Transformers are better that state-of-the-art models in what context?')
response.response

'Transformers are better than state-of-the-art models in the context of achieving higher BLEU scores on the English-to-German and English-to-French newstest2014 tests while requiring a fraction of the training cost compared to previous models.'

In [113]:
response = page_specific_summary_qe.query('Transformers are better that state-of-the-art models in what context?')
response.response

'Machine translation tasks.'

In [127]:
response = page_specific_vector_qe.query('Who has better BLUE score than state-of-the-art models?')
response.response

'The Transformer model, both the base model and the big model, has better BLEU scores than the state-of-the-art models mentioned in the context.'

In [128]:
response = page_specific_summary_qe.query('Who has better BLUE score than state-of-the-art models?')
response.response

'The Transformer model achieves better BLEU scores than the previously reported state-of-the-art models on the English-to-German and English-to-French translation tasks.'

# Function Tools

In [None]:
def vector_tool_function(input_query, page_labels):
    metadata_dicts = [{'key': 'page_label',
                       'value' : page_label} for page_label in page_labels]
    
    metadata_filter = MetadataFilters.from_dicts(
        
    )