# Khipus.ai
## Retrieval Augmented Generation
### Case Study: RAG Pipeline
### LangChain + Azure OpenAI + Pinecone
<span>© Copyright Notice 2025, Khipus.ai - All Rights Reserved.</span>

### Retrieval-Augmented Generation (RAG) for question answering using PDF documents


### Note: This notebook requires Python 3.11. You can download from here https://www.python.org/ftp/python/3.11.0/python-3.11.0rc2-amd64.exe


In [None]:
#%pip install -r requirements.txt

### Step 1: Import Dependencies 

In [None]:
# Step 1: Import Dependencies 
import os
import pinecone
import openai
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import AzureOpenAIEmbeddings
from langchain.vectorstores import Pinecone as PineconeVectorStore
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from pinecone import Pinecone, ServerlessSpec
from langchain.vectorstores import Pinecone as PineconeVectorStore
from langchain.chat_models import AzureChatOpenAI

### Step 2: Read Pinecone and Azure OpenAI Environment Variables

In [None]:
# Step 2: Read Pinecone and Azure OpenAI Environment Variables
os.environ["AZURE_OPENAI_API_KEY"] = "YOUR_AZURE_OPENAI_API_KEY" #key from the Azure OpenAI resource
os.environ["AZURE_OPENAI_API_BASE"] = "YOUR_AZURE_OPENAI_ENDPOINT"#https://azure-openai-<your-resource-name>.openai.azure.com/
os.environ["AZURE_OPENAI_DEPLOYMENT"] = "text-embedding-ada-002"
os.environ["AZURE_OPENAI_API_VERSION"] = "2023-05-15"

openai.api_key = os.environ["AZURE_OPENAI_API_KEY"]
openai.api_base = os.environ["AZURE_OPENAI_API_BASE"]
openai.api_type = "azure"
openai.api_version = os.environ["AZURE_OPENAI_API_VERSION"]



### Step 3: Load your PDF and split into chunks

In [None]:
# Step 3: Load your PDF and split into chunks
pdf_path = "./docs/corollacross_brochure.pdf"  # Adjust the file path if needed
loader = PyPDFLoader(pdf_path)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
print(f"Loaded {len(documents)} document(s) and split into {len(docs)} chunks.")

### Step 4: Initialize the Azure OpenAI embeddings object using LangChain.

In [None]:
# Step 4: Initialize the Azure OpenAI embeddings object using LangChain.
embeddings = AzureOpenAIEmbeddings(
    openai_api_key=openai.api_key,
    azure_endpoint=openai.api_base,  
    openai_api_version=openai.api_version,
    deployment=os.environ["AZURE_OPENAI_DEPLOYMENT"]
)

### Step 5: Generate embeddings for each chunk

In [None]:
# Step 5: Generate embeddings for each chunk
pinecone.Index = pinecone.data.index.Index

# Replace these values as needed
api_key = "YOUR_PINECONE_API_KEY" 
index_name = "langchain-demo"

# Create an instance of the Pinecone class using the new API

pc = Pinecone(api_key=api_key)

# List indexes to check connectivity
print("Available indexes:", pc.list_indexes().names())


### Step 6 Create and store embeddings using the PineconeVectorStore

In [None]:
# Step 6 Create and store embeddings using the PineconeVectorStore
# Retrieve the index client
index = pc.Index(index_name)

# Create and store embeddings using the PineconeVectorStore
vector_store = PineconeVectorStore(
    index,         # The instance of pinecone.Index
    embeddings,    # Your initialized embeddings object (Azure OpenAI embeddings)
    text_key="text",  # Adjust if your documents use a different key
    namespace="default"
)

# Assuming 'docs' contains your document chunks
vector_store.add_documents(docs)

print("Embeddings have been successfully stored in Pinecone!")

### Step 7: Perform a similarity search and retrieve the most relevant documents

In [None]:
# Step 7: Perform a similarity search and retrieve the most relevant documents

# Initialize the language model using Azure Chat OpenAI
llm = AzureChatOpenAI(
    temperature=0,
    openai_api_base=os.environ["AZURE_OPENAI_API_BASE"],
    openai_api_key=os.environ["AZURE_OPENAI_API_KEY"],
    openai_api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2024-10-21"),
    deployment_name=os.environ.get("AZURE_OPENAI_GPT4_MODEL_NAME", "gpt-4o")
)

# Load the QA chain
chain = load_qa_chain(llm, chain_type="stuff")




In [None]:

# Define your query
query = "What is the engine size of the Toyota Corolla Cross?"
#What is the estimated fuel efficiency of the Corolla Cross Hybrid?

# Retrieve similar documents from the vector store (removed include_metadata)
docs = vector_store.similarity_search(query)

# Optionally, access metadata from the documents if needed
for doc in docs:
    print("Metadata:", doc.metadata)



# Get the answer from the chain
result = chain.run(input_documents=docs, question=query)

print(f"Answer: \n\n{result}")
