In [1]:
# Setup
import os
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
  print("Warning: OPENAI_API_KEY not found. Set it in .env file")
else:
  print("API Key loaded")

API Key loaded


In [2]:
# Sample documents for all examples
documents = [
  "Python is a high-level programming language known for readability and simplicity",
  "Machine learning is a subset of AI that enables systems to learn from data",
  "RAG combines retrieval and generation to provide accurate, grounded responses"
]

In [3]:
# Custom RAG
from sentence_transformers import SentenceTransformer
import numpy as np
from openai import OpenAI

# 1. Generate embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = model.encode(documents)

print(doc_embeddings)

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


[[-0.04487509  0.00994214 -0.0601057  ...  0.15394156  0.13505308
   0.04846369]
 [-0.04518789  0.00941444  0.01459946 ...  0.07502524  0.06059848
  -0.05321748]
 [-0.06161087  0.03101303  0.08409731 ...  0.03325173 -0.04304384
  -0.00118809]]


In [4]:
# 2. Query and retrieve
query = "What is RAG?"
query_embeddings = model.encode([query])[0]

In [5]:
# 3. compute similarity
similarities = np.dot(doc_embeddings, query_embeddings)
top_idx = np.argmax(similarities)
retrieved_doc = documents[top_idx]

retrieved_doc

'RAG combines retrieval and generation to provide accurate, grounded responses'

In [7]:
# 4. Generate response
client = OpenAI(api_key=openai_api_key)
prompt = f"""Context: {retrieved_doc}

Question: {query}

Answer based on context:
"""

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.1
) 

In [8]:
response

ChatCompletion(id='chatcmpl-ClBesxcQEVQ2GQo6XqsHFDIsweEsg', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='RAG is a system that combines retrieval and generation techniques to produce accurate and grounded responses.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1765362266, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier='default', system_fingerprint=None, usage=CompletionUsage(completion_tokens=18, prompt_tokens=34, total_tokens=52, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [None]:
p