In [37]:
# Install llama_index which is a popular middleware used in many GenAI applications
!pip install llama_index



In [38]:
!pip install pypdf



In [39]:
#          ___         
#         / ()\\        
#       _|_____|_       
#      | | === | |      
#      |_|  O  |_|        
#       ||  O  ||         
#       ||__*__||         
#      |~ \\___/ ~|       
#      /=\\ /=\\ /=\\     
#______[_]_[_]_[_]_______


# Import necessary classes and functions from the llama_index and langchain libraries
from llama_index import (
    GPTVectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
    LLMPredictor,
    load_index_from_storage,
)
from langchain.chat_models import ChatOpenAI

# Import the openai library and os module to set the API key
import openai
import os

# SECURITY ALERT: Never reveal your API keys directly in code. Use environment variables or other secure means.
# Here, we're setting the OpenAI API key both using an environment variable and directly (demonstration purposes only)
os.environ['OPENAI_API_KEY'] = ''
openai.api_key =  ''

# Notify the user that the document loading process has begun
print("started the loading document process...")

# Read the data from the specified directory. Change './boiler_docs/' to your desired path.
documents = SimpleDirectoryReader('docs').load_data()
print(documents)
# Initialize the LLMPredictor with the desired GPT-3.5-turbo model and temperature setting
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"))

# Create a ServiceContext using the initialized predictor
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

# Notify the user that the indexing process has begun
print("started the indexing process...")

# Create an index using the loaded documents and the created service context
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
print(index)


started the loading document process...
[Document(id_='24671758-a509-4d65-9625-b78fafda6261', embedding=None, metadata={'page_label': '1', 'file_name': 'sample.pdf'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='747512885e696ac9bf0eecd7c6fbb94fc5761b3cabeac784477e3b6b2e35f6ee', text=' \nA Hare was making fun of the Tortoise one day for being so slow.  \n \n"Do you ever get anywhere?" he asked with a mocking laugh.  \n \n"Yes," replied the Tortoise, "and I get there sooner than you think. I\'ll run you a race and \nprove it."  \n \nThe Hare was much amused at the idea of running a race with the Tortoise, but for the fun \nof the thing he agreed. So the Fox, who had consented to act as judge, marked the distance \nand started the runners off.  \n \nThe Hare was soon far out of sight, and to make the Tortoise fe el very deeply how ridiculous \nit was for him to try a race with a Hare, he lay down beside the course to take a nap until the \nTortoi

In [40]:
import os

In [41]:
# Store the created index to the disk at the specified location
print("storing the index to disk")
if os.path.exists('docs_index'):
    os.makedirs('docs_index')
index.storage_context.persist(persist_dir="doc_index")


storing the index to disk


In [42]:
prompt = "With the information given in the documents and not on prior knowledge give the answers in a detailed way for the following question: "

In [43]:
# Notify the user that we are querying the index
print("Querying the index...")

# Query the index for the provided question and store the response
response = index.as_query_engine().query(prompt + "Write a detailed summary of the story")

# Print the received response
print(response)

Querying the index...
The story is about a Hare and a Tortoise. The Hare mocks the Tortoise for being slow and challenges him to a race. The Tortoise accepts the challenge and the Fox acts as the judge. The race begins and the Hare quickly runs far ahead of the Tortoise. Feeling confident in his speed, the Hare decides to take a nap while waiting for the Tortoise to catch up. Meanwhile, the Tortoise continues to move slowly but steadily. Eventually, the Tortoise passes the spot where the Hare is sleeping. When the Hare wakes up and realizes that the Tortoise is near the finish line, he starts running as fast as he can. However, it is too late, and the Tortoise crosses the finish line before the Hare can catch up. The story teaches the lesson that slow and steady progress can often lead to success, even when faced with seemingly faster opponents.


In [44]:
# Notify the user that we are querying the index
print("Querying the index...")

# Query the index for the provided question and store the response
response1 = index.as_query_engine().query("Write a detailed summary of the story")

# Print the received response
print(response1)

Querying the index...
The story is about a Hare and a Tortoise. The Hare mocks the Tortoise for being slow and challenges him to a race. The Tortoise accepts the challenge and they start the race with a Fox acting as the judge. The Hare quickly runs far ahead and decides to take a nap, underestimating the Tortoise's ability to catch up. Meanwhile, the Tortoise continues to move steadily and eventually passes the sleeping Hare. When the Hare wakes up and realizes the Tortoise is near the finish line, he tries to run his fastest but is unable to catch up in time. The story teaches a moral lesson about the importance of consistency and perseverance.


In [45]:
response==response1

False

In [46]:
# Notify the user that we are querying the index
print("Querying the index...")

# Query the index for the provided question and store the response
response = index.as_query_engine().query("Who are the main characters if the story")

# Print the received response
print(response)

Querying the index...
The main characters of the story are the Hare and the Tortoise.


You can look at the details of the response object and go deeper into what sources of information were recalled.

In [47]:
dir(response)

['__annotations__',
 '__class__',
 '__dataclass_fields__',
 '__dataclass_params__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__match_args__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'get_formatted_sources',
 'metadata',
 'response',
 'source_nodes']

In [48]:
response.source_nodes[0]

NodeWithScore(node=TextNode(id_='3940b4b9-edc9-4d94-a6fe-b029c755edaf', embedding=None, metadata={'page_label': '1', 'file_name': 'sample.pdf'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='24671758-a509-4d65-9625-b78fafda6261', node_type=None, metadata={'page_label': '1', 'file_name': 'sample.pdf'}, hash='747512885e696ac9bf0eecd7c6fbb94fc5761b3cabeac784477e3b6b2e35f6ee')}, hash='0ee55943b0a86648b73531e996228b9a99b60a3fbea512f7cc0ed7abd4f86f77', text='A Hare was making fun of the Tortoise one day for being so slow.  \n \n"Do you ever get anywhere?" he asked with a mocking laugh.  \n \n"Yes," replied the Tortoise, "and I get there sooner than you think. I\'ll run you a race and \nprove it."  \n \nThe Hare was much amused at the idea of running a race with the Tortoise, but for the fun \nof the thing he agreed. So the Fox, who had consented to act as judge, marked the distance \nand started the run