In [1]:
# Setup
import os
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
  print("Warning: OPENAI_API_KEY not found. Set it in .env file")
else:
  print("API Key loaded")

API Key loaded


In [2]:
# Sample documents for all examples
documents = [
  "Python is a high-level programming language known for readability and simplicity",
  "Machine learning is a subset of AI that enables systems to learn from data",
  "RAG combines retrieval and generation to provide accurate, grounded responses"
]

## Approach 1

In [3]:
# Custom RAG
from sentence_transformers import SentenceTransformer
import numpy as np
from openai import OpenAI

# 1. Generate embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = model.encode(documents)

print(doc_embeddings)


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
'(ProtocolError('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)), '(Request ID: 81a516fd-ea4d-417e-ba13-75f414612e65)')' thrown while requesting HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer_config.json
Retrying in 1s [Retry 1/5].


[[-0.04487509  0.00994213 -0.06010572 ...  0.15394153  0.13505308
   0.0484637 ]
 [-0.04518789  0.00941441  0.01459944 ...  0.07502524  0.06059849
  -0.05321749]
 [-0.06161088  0.03101305  0.08409733 ...  0.03325175 -0.04304386
  -0.00118812]]


In [5]:
doc_embeddings.shape

(3, 384)

In [None]:
# 2. Query and Retrieve
query = "What is RAG?"
query_embeddings = model.encode([query])[0]

In [None]:
# 3. Compute similarity
similarities = np.dot(doc_embeddings, query_embeddings)
top_idx = np.argmax(similarities)
retrieved_doc = documents[top_idx]

In [8]:
retrieved_doc

'RAG combines retrieval and generation to provide accurate, grounded responses'

In [11]:
# Generate Responses
client = OpenAI(api_key=openai_api_key)
prompt = f"""Context: {retrieved_doc}

Question: {query}

Answer based on context:"""

response = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[{"role": "user", "content": prompt}],
  temperature=0.5
)


In [12]:
response

ChatCompletion(id='chatcmpl-ClBd1HaLMKfi8q6T7KCIq5dnsbHb2', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='RAG is a system that combines retrieval and generation to provide accurate, grounded responses.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1765362151, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier='default', system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=34, total_tokens=51, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [None]:
print("Custom RAG Answer:")
print(response.choices[0].message.content)

### Approach 2: Langchain Implementation with FAISS

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.document import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# Convert documents
lc_docs = [Document(page_content=doc) for doc in documents]

# Vector store
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
vectorestore = FAISS.from_documents(lc_docs, embeddings)
retriever = vectorstore.as_retriever()

# LLM
llm = chatOpenAI(
  model="gpt-3.5-turbo",
  temperature = 0,
  openai_api_key=openai_api_key
)

# prompt
prompt = ChatPromptTemplate.from_messages([
  ("system", "You are an expert assistant. Use ONLY the retrieved context."),
  ("human", "{question\n\n Context:\n{context}}")
])

# Build RAG Pipeline
rag_chain = (
  RunnableParallel(context=retriever, question=RunnablePassthrough())
    prompt
    llm
)

# Query
response = rag_chain.invoke("What is RAG?")
print(response)


## Approach 2: Langchain Implementation with Chroma DB

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.docstore.document import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# convert documents
lc_docs = [Document(page_content=doc) for doc in documents]

# vector store (Chroma)
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

# Persist_directory allows saving DB locally optional
vectorestore = Chroma.from_documents(
  lc_docs,
  embeddings,
  collection_name="my_rag_collection",
  persist_directory="./chroma_db" #optional
)

retriever = vectorstore.as_retriever()

# LLM
llm = ChatOpenAI(
  model = "gpt-4o-mini",
  temperature = 0,
  openai_api_key = openai_api_key
)

# Prompt
prompt = ChatPromptTemplate.from_messages([
  ("system", "You are an expert assistant. Use ONLY the retrieved context"),
  ("human", "{question}\n\nContext:\n{context}")
])

# Build RAG Pipeline
rag_chain = (
  RunnableParallel(context=retriever, question=RunnablePassthrough())
  prompt
  llm
  
)

# Query
response = rag_chain.invoke("WHat is Rag?")
print(response)

### Approach 3: LlamaIndex Implementation

In [None]:
# pip install llama-index

In [None]:
from llama_index.core import Document, VectorStoreIndex, Settings
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# Configure LlamaIndex
settings.llm = LlamaOpenAI(model="gpt=3.5-turbo", temperature= 0, api_key=openai_api_key)

# Create documents and index
llama_docs = [Document(text=doc) for doc in documents]
index = VectorStoreIndex.from_documents(llama_docs)

# Query
query_engine = index.as_query_engine()
response = query_engine.query("What is RAG?")

print("LlamaIndex Answer:")
print(response.response)
