In [2]:
import torch
import tiktoken
import numpy as np
from scipy.spatial.distance import cdist
from transformers import AutoTokenizer, AutoModel, pipeline


from utils import get_context
from inference import fetch as fetch_inflection

# **Retrieval-Augmented Generation (RAG) for Context-Aware AI Agents Using Inflection AI**

This notebook demonstrates how to leverage **Retrieval-Augmented Generation (RAG)** to build **context-aware AI agents** using **Inflection AI**. The approach enhances response quality by integrating **retrieval-based context** with **deep learning models** for intelligent reasoning and decision-making.

## **Overview**

- Implements **context-aware AI agents** using **RAG**, ensuring responses are grounded in retrieved knowledge.
- Utilizes **Inflection AI API** to enhance reasoning and inference.
- Leverages **transformers-based models** for tokenization and embedding.
- Key functionalities include:
  - **Chunking** input text for efficient retrieval.
  - **Embedding** textual data for semantic similarity matching.
  - **Retrieving relevant context** before generating a response.
  - **Generating coherent and informed answers** using RAG.
- Integrates **ModernBERT-base** for embedding and retrieval tasks.
- Demonstrates how **Inflection AI can be used effectively when RAG is established**, ensuring responses are well-supported by retrieved evidence.

This structured approach enables AI agents to **retrieve, reason, and respond** with **high accuracy and contextual awareness**.


In [3]:
model_name =  "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.13M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/599M [00:00<?, ?B/s]

In [4]:
texts = [
    "Electric vehicles (EVs) are becoming more popular due to their efficiency and environmental benefits. Charging infrastructure is expanding worldwide.",
    "Quantum computing uses principles of quantum mechanics to perform calculations at speeds unattainable by classical computers.",
    "Renewable energy sources like solar and wind power are key to reducing carbon emissions and combating climate change."
]

In [5]:
def get_chunks(texts: list, max_tokens: int =10 , encoding_name: str="cl100k_base") -> list:
    enc = tiktoken.get_encoding(encoding_name)
    chunks = []
    for text in texts:
        tokens = enc.encode(text)
        for i in range(0, len(tokens), max_tokens):
            chunk = tokens[i:i + max_tokens]
            chunks.append(enc.decode(chunk))
    return chunks

In [14]:
processed_chunks = get_chunks(texts)
print(len(processed_chunks))


9


In [15]:
def encode_text(text: str) -> np.ndarray:
    """Encodes a piece of text using ModernBERT"""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].squeeze().numpy()  # Take [CLS] token representation

In [None]:
chunk_embeddings = np.array([encode_text(chunk) for chunk in processed_chunks])  # Store embeddings in memory

768


In [19]:
# Store chunks for lookup
chunk_dict = {i: processed_chunks[i] for i in range(len(processed_chunks))}

In [20]:
def retrieve_top_k(query: str, k: int=4) -> list:
    """Encodes query, retrieves top k matching chunks using cosine similarity"""
    query_embedding = encode_text(query).reshape(1, -1)  # Encode query
    distances = cdist(query_embedding, chunk_embeddings, metric="cosine")  # Compute cosine similarity
    indices = np.argsort(distances)[0][:k]  # Get top-k closest chunks
    
    retrieved_texts = [chunk_dict[idx] for idx in indices]
    return retrieved_texts

In [21]:
question = "What are the benefits of electric vehicles?"
retrieved_chunks = retrieve_top_k(question)
print("Retrieved Chunks:", retrieved_chunks)

Retrieved Chunks: ['Electric vehicles (EVs) are becoming more popular', ' is expanding worldwide.', ' are key to reducing carbon emissions and combating climate change', ' calculations at speeds unattainable by classical computers']


In [22]:
system_instruction_prompt = """
You are a helpful assistant. Your task is to answer the user's question strictly based on the provided context.

## Instructions:
- You will be given a question and a context.
- Your answer must be based only on the provided context. Do not include any external information or assumptions.
- If the answer is not in the context, state that explicitly. Do not attempt to infer or fabricate an answer.
"""

## Test Scenario: Retrieve relevant information and use it to answer the question

In [23]:
class color:
    BOLD = '\033[1m'
    END = '\033[0m'

In [24]:
async def test_rag_enabled_agents():
    print("Starting test: test_rag_enabled_agents")
    print("+*"*20)

    query = f"Query: {question}\nRetrieved context: {retrieved_chunks}"
    context = get_context(system_instruction_prompt, query)
    response = await fetch_inflection(context)
    
    print(f"{color.BOLD} Question: {color.END} {question}")
    print(f"{color.BOLD} Response: {color.END} {response}")
    print("+*"*20)

    print("Test completed successfully! 🙌")


# Run the test
await test_rag_enabled_agents()

Starting test: test_rag_enabled_agents
+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*


INFO:inference:Inflection AI API request took 4159.67 ms (Model=[inflection_3_pi]) 


[1m Question: [0m What are the benefits of electric vehicles?
[1m Response: [0m Electric vehicles (EVs) have several benefits, primarily being key to reducing carbon emissions and combating climate change. Although the provided context doesn't detail other advantages, EVs are generally known for lower operating costs, reduced noise pollution, and lower maintenance requirements compared to traditional combustion engine vehicles.
+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*
Test completed successfully! 🙌
