# Build Your First RAG System

1. Data Ingestion.
2. Indexing.
3. Retriever.
4. Response Synthesizer.
5. Querying.

In [None]:
pip install llama-index

In [None]:
import os 

In [None]:
from dotenv import load_dotenv, find_dotenv

In [None]:
load_dotenv('./.env')

In [None]:
# Retrieve the OpenAI API key from environment variables
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

# Stage 1: Data Ingestion

## Data Loaders


In [None]:
from llama_index.core import SimpleDirectoryReader

In [None]:
documents = SimpleDirectoryReader(input_files=['data/transformers.pdf']).load_data()

In [None]:
# Check the datatype and length of the loaded documents
type(documents)

In [None]:
# total number of pages read from the PDF
len(documents)

In [None]:
# Retrieve the first document (essentially the first page in the PDF)
documents[0]

In [None]:
# Get the ID of the first document
documents[0].id_

In [None]:
documents[0].doc_id

In [None]:
# Get the metadata of the first document
documents[0].metadata

In [None]:
# Get the text content of the first document
print(documents[0].text)

## Embedding Model

In [None]:
# Embedding Model
from llama_index.embeddings.openai import OpenAIEmbedding

In [None]:
# Initialize the embedding model
embed_model = OpenAIEmbedding(model="text-embedding-3-large")

## LLM

In [None]:
# LLM
from llama_index.llms.openai import OpenAI

In [None]:
# Initialize the large language model
llm = OpenAI(model= "gpt-4o-mini")

# Stage 2: Indexing

In [None]:
# Indexing
from llama_index.core import VectorStoreIndex

In [None]:
# Create an index from the documents using the embedding model and LLM
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

# Stage 3: Retrieval

In [None]:
# Setting up the Index as Retriever
retriever = index.as_retriever()

In [None]:
# Retrieve information based on the query "What are Transformers?"
retrieved_nodes = retriever.retrieve("What is self attention?")

In [None]:
# Get the metadata of the first retrieved node
retrieved_nodes[0].metadata

In [None]:
# Access the ID of the first retrieved node
retrieved_nodes[0].id_

In [None]:
# Access the full node object of the first retrieved node
retrieved_nodes[0].node

In [None]:
# Access the text content of the first retrieved node
print(retrieved_nodes[0].text)

In [None]:
retrieved_nodes[1].metadata

In [None]:
print(retrieved_nodes[1].text)

# Stage 4: Response Synthesis


In [None]:
from llama_index.core import get_response_synthesizer

In [None]:
# Initialize the response synthesizer with the LLM
response_synthesizer = get_response_synthesizer(llm=llm)

## Stage 5: Query Engine

In [None]:
# Create a query engine using the index, LLM, and response synthesizer
query_engine = index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer)

In [None]:
# Query the LLM using the query engine
response = query_engine.query("What is self attention?")  

In [None]:
# View the response from the LLM
response.response 

In [None]:
# Check the length of the response
len(response.response) # number of characters in the response

In [None]:
# Check the number of source nodes
len(response.source_nodes)  # list of 2 nodes

In [None]:
# Access the ID and metadata of the first source node
response.source_nodes[0].id_

In [None]:
# Access the ID and metadata of the second source node
response.source_nodes[0].metadata

In [None]:
response.source_nodes[1].id_

In [None]:
response.source_nodes[1].metadata

# End to End RAG Pipeline