Install Libraries

In [None]:
!pip install langchain langchain-community pypdf faiss-cpu openai sentence-transformers

Mount Drive to access folder

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Install Libraries

In [19]:
!pip install python-dotenv



Load Secret Credentials via ".env"

In [22]:
import os
from dotenv import load_dotenv

# Load variables from .env file
load_dotenv("/content/drive/MyDrive/rag_folder/credentials.env")

# Access them
AZURE_KEY = os.getenv("AZURE_OPENAI_KEY")
AZURE_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
EMBED_MODEL = os.getenv("AZURE_EMBED_MODEL", "text-embedding-ada-002")
CHAT_MODEL = os.getenv("AZURE_CHAT_MODEL", "gpt-4o-mini")


Initialize Azure client in a function

In [None]:
from openai import AzureOpenAI

def get_azure_client():
    if not AZURE_KEY or not AZURE_ENDPOINT:
        raise ValueError("Azure key or endpoint not set in .env")
    return AzureOpenAI(
        api_version="2025-01-01-preview",
        azure_endpoint=AZURE_ENDPOINT,
        api_key=AZURE_KEY
    )

client = get_azure_client()


Install Libraries

In [4]:
!pip install pypdf python-docx

Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.2.0


Documents Loader function

In [17]:
import os
from pypdf import PdfReader
from docx import Document

def load_documents_from_drive(folder_path):
    documents = []

    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.lower().endswith(".pdf"):
            reader = PdfReader(file_path)
            text = ""
            for page in reader.pages:
                extracted = page.extract_text()
                if extracted:
                    text += extracted + "\n"
            documents.append(text)

        elif filename.lower().endswith(".docx"):
            doc = Document(file_path)
            text = ""
            for para in doc.paragraphs:
                text += para.text + "\n"
            documents.append(text)

    return documents

#RAG Pipeline

Loading Documents

In [6]:
folder = "/content/drive/MyDrive/rag_folder"
documents = load_documents_from_drive(folder)
print("Loaded", len(documents), "documents.")


Loaded 2 documents.


Chunk the documents

In [7]:
def chunk_text(text, chunk_size=300, overlap=50):
    words = text.split()
    chunks = []
    start = 0
    while start < len(words):
        end = start + chunk_size
        chunks.append(" ".join(words[start:end]))
        start = end - overlap  # overlap for context continuity
    return chunks

def build_corpus_chunks(doc_list):
    chunks = []
    for doc in doc_list:
        chunks.extend(chunk_text(doc))
    return chunks

chunks = build_corpus_chunks(documents)
print("Total chunks:", len(chunks))


Total chunks: 35


Embed the chunks

In [8]:
from openai import AzureOpenAI
import numpy as np

# Azure OpenAI config
AZURE_ENDPOINT = "https://assessment2025-resource.cognitiveservices.azure.com/"
AZURE_KEY = "EeK1erGEZA2b3ec4sDGzx3JvwUUvAv4YU6LbQ6gWJqKcJya33LK1JQQJ99BKACHYHv6XJ3w3AAAAACOGUL8L"
EMBED_MODEL = "text-embedding-ada-002"

client = AzureOpenAI(
    api_version="2025-01-01-preview",
    azure_endpoint=AZURE_ENDPOINT,
    api_key=AZURE_KEY
)

def embed(texts):
    response = client.embeddings.create(
        model=EMBED_MODEL,
        input=texts
    )
    return [d.embedding for d in response.data]

embeddings = embed(chunks)
print("Embeddings generated:", len(embeddings))


Embeddings generated: 35


Install Libraries

In [10]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.7 kB)
Downloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m61.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.0


Build FAISS index

In [11]:
import faiss

dimension = len(embeddings[0])
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype("float32"))

# Map chunk index to actual text
chunk_map = {i: chunk for i, chunk in enumerate(chunks)}

Retrieval function

In [12]:
def retrieve(query, k=3):
    q_emb = embed([query])[0]
    scores, idx = index.search(np.array([q_emb]).astype("float32"), k)
    results = [chunk_map[int(i)] for i in idx[0]]
    return results

Agent (tool + critic + final answer)

In [15]:
CHAT_MODEL = "gpt-4.1-mini"

def call_model(prompt):
    res = client.chat.completions.create(
        model=CHAT_MODEL,
        messages=[{"role": "user", "content": prompt}]
    )
    return res.choices[0].message.content

def agent(question):
    # Retrieve
    retrieved = retrieve(question)
    context_text = "\n\n".join(retrieved)

    # Draft answer
    draft_prompt = f"""
You are an assistant grounded ONLY in the following retrieved context.

Context:
{context_text}

Question:
{question}

Write a grounded answer. If context does not contain the answer, say "The documents do not contain this information."
"""
    draft = call_model(draft_prompt)

    # Critic
    critic_prompt = f"""
Evaluate this answer based on the context.

Answer: {draft}

Context:
{context_text}

Is it grounded? Does it hallucinate? Suggest corrections.
"""
    critic = call_model(critic_prompt)

    # Final answer
    final_prompt = f"""
Revise the answer using the critic feedback.

Draft answer:
{draft}

Critic feedback:
{critic}

Give the final corrected answer.
"""
    final_answer = call_model(final_prompt)
    return final_answer


Simple CLI chat loop for Mini Agentic RAG System

In [18]:
def chat():
    print("\nMini Agentic RAG System Ready.\nType 'exit' to quit.\n")

    while True:
        q = input("You: ")
        if q.lower() == "exit":
            break

        ans = agent(q)
        print("\nAgent:", ans, "\n")


# Run chat loop if main file
if __name__ == "__main__":
    chat()



Mini Agentic RAG System Ready.
Type 'exit' to quit.

You: What is attention?

Agent: Attention is a function that maps a query and a set of key-value pairs to an output, where queries, keys, values, and outputs are vectors—or, when batched, matrices of vectors. The output is computed as a weighted sum of the values, with weights determined by a compatibility function between the queries and keys. Specifically, the Transformer model uses **Scaled Dot-Product Attention**, which calculates the dot products of the query vectors with all key vectors, scales these dot products by dividing by the square root of the key dimension \( d_k \), and then applies a softmax function to produce normalized weights. The final output is the weighted sum of the value vectors according to these weights.

Formally, the attention output is computed as:

\[
Attention(Q, K, V) = softmax\left(\frac{QK^{T}}{\sqrt{d_k}}\right) V
\]

where  
- \( Q \in \mathbb{R}^{n \times d_k} \) is the matrix of queries,  
- \(