# An Introduction to RAG with LlamaIndex

Optional, go through LangChain notebook first

In [None]:
import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)
from dotenv import load_dotenv
load_dotenv()

The following code builds an index over all the documents in the folder "data", this will take a minute

In [None]:
# this loader method works for many doc types, including Markdown, PDFs, 
# Word documents, PowerPoint decks, images, audio and video
documents = SimpleDirectoryReader("data").load_data()

# transform the data by chunking, extracting metadata, and embedding each chunk
# look at llamaindex docs if you want to customize this
index = VectorStoreIndex.from_documents(documents)

Create a query engine over the index and generate a response for any question.

In [None]:
query_engine = index.as_query_engine()
question = "What did the author do growing up?"
response = query_engine.query(question)  # retrieves context from the index and synthesizes response using a LLM
print(response)

Instead of storing the embeddings in memory, we can write them to disk

In [None]:
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # if storage folder doesn't already exist, load the documents and create the index
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # otherwise load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

Now create an engine and query the index, same as before

In [None]:
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)