In [4]:
from dotenv import load_dotenv
load_dotenv()
import os

In [6]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader


documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents, show_progress=True)
query_engine = index.as_query_engine()


In [7]:
documents

[Document(id_='aee86b9c-9aeb-46e6-aeb7-fcbb9fe5aa43', embedding=None, metadata={'page_label': '1', 'file_name': '1706.03762.pdf', 'file_path': 'data\\1706.03762.pdf', 'file_type': 'application/pdf', 'file_size': 2215244, 'creation_date': '2024-02-03', 'last_modified_date': '2024-01-28', 'last_accessed_date': '2024-02-03'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.

In [21]:
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.postprocessor import SimilarityPostprocessor

retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
processor =SimilarityPostprocessor(similarity_cutoff=0.8)
query_engine = RetrieverQueryEngine(retriever=retriever, node_postprocessors=[processor
                                                                              ])

In [22]:
response  = query_engine.query('What is attention mechanism?')

In [23]:
response

Response(response='The attention mechanism is a component used in sequence transduction models. It connects the encoder and decoder in these models and is based on complex recurrent or convolutional neural networks. The attention mechanism allows the model to focus on different parts of the input sequence when generating the output sequence, improving the quality of the generated output.', source_nodes=[NodeWithScore(node=TextNode(id_='36e9fd89-6d9d-4f50-b918-62f37b0ed6c2', embedding=None, metadata={'page_label': '1', 'file_name': '1706.03762.pdf', 'file_path': 'data\\1706.03762.pdf', 'file_type': 'application/pdf', 'file_size': 2215244, 'creation_date': '2024-02-03', 'last_modified_date': '2024-01-28', 'last_accessed_date': '2024-02-03'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accesse

In [24]:
from llama_index.response.pprint_utils import pprint_response
pprint_response(response, show_source=True)

Final Response: The attention mechanism is a component used in
sequence transduction models. It connects the encoder and decoder in
these models and is based on complex recurrent or convolutional neural
networks. The attention mechanism allows the model to focus on
different parts of the input sequence when generating the output
sequence, improving the quality of the generated output.
______________________________________________________________________
Source Node 1/1
Node ID: 36e9fd89-6d9d-4f50-b918-62f37b0ed6c2
Similarity: 0.8000853709870223
Text: Provided proper attribution is provided, Google hereby grants
permission to reproduce the tables and figures in this paper solely
for use in journalistic or scholarly works. Attention Is All You Need
Ashish Vaswani∗ Google Brain avaswani@google.comNoam Shazeer∗ Google
Brain noam@google.comNiki Parmar∗ Google Research
nikip@google.comJakob Uszkor...


In [25]:
## Store and  Load Index from Disk and make query engine

import os.path
from llama_index import VectorStoreIndex,SimpleDirectoryReader, StorageContext, load_index_from_storage

# check if storage already exists
PERSISTANT_DIR ='./storage'
if not os.path.exists(PERSISTANT_DIR):
    # load documents and create the index
    document = SimpleDirectoryReader('data').load_data()
    index = VectorStoreIndex.from_documents(documents=documents)
    # store it in disk
    index.storage_context.persist(persist_dir=PERSISTANT_DIR)

else:
    # load the existing index
    storage_cxt =StorageContext.from_defaults(persist_dir=PERSISTANT_DIR)
    index = load_index_from_storage(storage_cxt)


query_engine = index.as_query_engine()
response = query_engine.query('what is positional enconding?')
pprint_response(response)



Final Response: Positional encoding is a technique used in the
Transformer model architecture to incorporate information about the
relative or absolute position of tokens in a sequence. It involves
adding "positional encodings" to the input embeddings at the bottom of
the encoder and decoder stacks. These positional encodings have the
same dimension as the embeddings and are typically represented as sine
and cosine functions of different frequencies. The purpose of
positional encoding is to allow the model to make use of the order of
the sequence, even though the Transformer model does not use
recurrence or convolution.
