In [2]:
# RAG
import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [5]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("pdfs").load_data()

In [6]:
documents

[Document(id_='723d904b-47c3-49dc-9672-4a62f3561437', embedding=None, metadata={'page_label': '1', 'file_name': 'sre_runbook.pdf', 'file_path': 'e:\\PROJECTS_FOR_REFERRAL\\GenAI\\llama_index\\pdfs\\sre_runbook.pdf', 'file_type': 'application/pdf', 'file_size': 81000, 'creation_date': '2024-05-28', 'last_modified_date': '2024-05-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Sample\nRunBook\nfor\nHandling\nBackend\nSystem\nFailures\nTable\nof\nContents\n1.\nIntroduction\n2.\nDatabase\nConnection\nFailures\n●\nIdentification\n●\nImmediate\nActions\n●\nT roubleshooting\nSteps\n●\nResolution\n3.\nAPI\nEndpoint\nFailures\n●\nIdentification\n●\nImmediate\nActions\n●\nT roubleshooting\nSteps\n●\nResolution\n4.\nService\nT imeout\nIssues\n●\nIdentificatio

In [7]:
index = VectorStoreIndex.from_documents(documents, show_progress=True)

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 18/18 [00:00<00:00, 1238.62it/s]
Generating embeddings: 100%|██████████| 18/18 [00:01<00:00, 10.53it/s]


In [8]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x23db18e4130>

In [9]:
query_engine = index.as_query_engine()

In [22]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

retriever = VectorIndexRetriever(index=index, similarity_top_k=4)
postprocessor = SimilarityPostprocessor(similarity_cutoff=0.80)
# Custom Query Engine with Top K results
query_engine = RetrieverQueryEngine(retriever=retriever,
                                    node_postprocessors=[postprocessor])

In [23]:
response = query_engine.query("What are the symptoms of database failure")

In [24]:
from llama_index.core.response.pprint_utils import pprint_response

pprint_response(response, show_source=True)

Final Response: Symptoms of database failure include alerts related to
database connectivity problems in the monitoring system, error
messages like "Unable to connect to database," "Connection refused,"
or "Timeout" in application logs, and user reports of issues such as
"unable to retrieve data," "database timeout," or "application errors
related to data retrieval."
______________________________________________________________________
Source Node 1/1
Node ID: 71d18746-0901-4373-bea9-d57f5344baf9
Similarity: 0.8132135649167415
Text: issues, and high CPU usage. Each section includes steps for
identification, immediate actions, troubleshooting, and resolution.
This document is intended for use by Site Reliability Engineers (SREs)
and other technical staf f responsible for maintaining the reliability
and performance of backend systems. Database Connection Failures
Identificati...


# Persistant Storage

In [25]:
import os
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)

PERSIST_DIR = "./storage"

if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("pdfs").load_data()
    index = VectorStoreIndex.from_documents(documents, show_progress=True)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

# either way we can now query the table
query_engine = index.as_query_engine()
response = query_engine.query("What are the symptoms of database failure")
print(response)


Parsing nodes: 100%|██████████| 18/18 [00:00<00:00, 1101.99it/s]
Generating embeddings: 100%|██████████| 18/18 [00:01<00:00, 12.08it/s]


Symptoms of database failure include alerts related to database connectivity issues in the monitoring system, specific error messages like "Unable to connect to database," "Connection refused," or "Timeout" in application logs, and user reports of issues such as "unable to retrieve data," "database timeout," or "application errors related to data retrieval."
