In [1]:
import os
from dotenv import load_dotenv
import numpy as np
from sentence_transformers import SentenceTransformer
from openai import OpenAI

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    print("Warning: API_KEY not found in environment variables.")
else:
    print("API_KEY successfully loaded.")    

API_KEY successfully loaded.


In [3]:
#Sample documents for all examples
documents = [
"Python is a high-level programming language known for readability and simplicity.",
"Machine learning is a subset of AI that enables systems to learn from data.",
"RAG combines retrieval and generation to provide accurate, grounded responses."
]

**Approach 1: Custom Implementation**

In [4]:
# Custom RAG (simplified example)
# 1. Generate embeddings for documents
model = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = model.encode(documents)
print("Model Loaded")


Model Loaded


In [5]:
# 2. Query and Retrieval
query = "What is RAG?"
query_embedding = model.encode([query])[0]

In [6]:
# 3. Compute similarity
similarities = np.dot(doc_embeddings, query_embedding)
top_idx = np.argmax(similarities)
retrieved_doc = documents[top_idx]

In [7]:
retrieved_doc

'RAG combines retrieval and generation to provide accurate, grounded responses.'

In [8]:
# 4. Generate response
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
prompt = f"""Context: {retrieved_doc}

Question: {query}
Answer based on context:"""
response = client.chat.completions.create(
    model = "gpt-3.5-turbo",
    messages = [{"role": "user", "content": prompt}],
    temperature=0.7
)
print("Custom RAG Answer")
print(response.choices[0].message.content)

Custom RAG Answer
RAG is a system that combines retrieval and generation to provide accurate, grounded responses.


In [9]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.document import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

In [10]:
#Convert documents
lc_docs = [Document(page_content=doc) for doc in documents]

#Vector store
embeddings = OpenAIEmbeddings(api_key=openai_api_key)
vectorstore = FAISS.from_documents(lc_docs, embeddings)
retriever = vectorstore.as_retriever()

# LLM
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0,
api_key=openai_api_key
)
#Prompt
prompt = ChatPromptTemplate.from_messages([
("system", "You are an expert assistant. Use ONLY the retrieved context."),
("human", "{question}\n\nContext: \n{context}")
])
#Build RAG pipeline
rag_chain =(
    RunnableParallel(context=retriever, question=RunnablePassthrough())
    |prompt
    |llm
)
# Query
response = rag_chain.invoke("What is RAG?")
print(response)

content='RAG combines retrieval and generation to provide accurate, grounded responses.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 169, 'total_tokens': 182, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_aa07c96156', 'id': 'chatcmpl-ClXNcyEnGfmikMikpY9aqAC7FWydw', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--019b0cc4-7e61-7d30-9596-d33a08cf5aed-0' usage_metadata={'input_tokens': 169, 'output_tokens': 13, 'total_tokens': 182, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
