In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

## Load private document

In [2]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)

documents = SimpleDirectoryReader('data').load_data()

In [5]:
print(documents[0])
len(documents)


Doc ID: d24d10c5-5120-441a-b88e-48719a0b0362
Text: Aplicaciones LLM  LlamaIndex  © 2023 Julio Colomer, Aceleradora
AI


7

## Create Vector Database

In [6]:
index = VectorStoreIndex.from_documents(documents=documents)




## Ask questions to private document

In [7]:
query_engine = index.as_query_engine()
response = query_engine.query('Summarize the document in 100 words')
response

Response(response='The document discusses optimizing advanced retrieval strategies, evaluation, and building performant RAG applications for production. It also covers various use cases such as QA, chatbot, agent, structured data extraction, and multimodal applications. The content is authored by Julio Colomer from Aceleradora AI and is focused on enhancing retrieval strategies, application performance, and exploring different use cases like QA and chatbots.', source_nodes=[NodeWithScore(node=TextNode(id_='65b85065-8ed3-4463-80e0-56ed70eefd40', embedding=None, metadata={'page_label': '6', 'file_name': '116BESP-LLAMAINDEX.pdf', 'file_path': 'c:\\Users\\jarai\\OneDrive\\Escritorio\\git\\Llm_apps_bootcamp\\LlamaIndex\\data\\116BESP-LLAMAINDEX.pdf', 'file_type': 'application/pdf', 'file_size': 45439, 'creation_date': '2024-10-24', 'last_modified_date': '2024-10-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_d

## Save the vector database

In [9]:
import os.path


#check if storage already exists
if not os.path.exists('./storage'):
    #load the documents and create the index
    documents = SimpleDirectoryReader('data').load_data()
    index = VectorStoreIndex.from_documents(documents=documents)
    #store it for later
    index.storage_context.persist()
else:
    #load the existing index
    storage_context = StorageContext.from_defaults(persist_dir='./storage')
    index = load_index_from_storage(storage_context=storage_context)

# either way we can now query the index
quey_engine = index.as_query_engine()
query_engine = query_engine.query('According to the author, what is good?')

print(response)

The document discusses optimizing advanced retrieval strategies, evaluation, and building performant RAG applications for production. It also covers various use cases such as QA, chatbot, agent, structured data extraction, and multimodal applications. The content is authored by Julio Colomer from Aceleradora AI and is focused on enhancing retrieval strategies, application performance, and exploring different use cases like QA and chatbots.


## Customization options
* parse into smaller chunks
* use a different vector store
* retrieve more context when I query
* use a different LLM
* use a different response mode
* stream the response back

## Use cases
* QA
* Chatbot
* Agent
* Structured Data Extraction
* Multimodal

## Optimizing
* Advanced Retrieval Strategies
* Evaluation
* Building performant RAG applications for production

## Other
* LlamaPacks and Create-llama = LangChain templates
* Very recent, still in beta
* Very interesting: create-llama allows you to create a Vercel app!
* Very interesting: open-source end-to-end project (SEC Insights)
    * llamaindex + react/nextjs (vercel) + fastAPI + render + AWS
    * environment setup: localStack + docker
    * monitoring: sentry
    * load testing: loader.io
    * [web](https://www.secinsights.ai/)
    * [code](https://github.com/run-llama/sec-insights)