<a href="https://colab.research.google.com/github/SujoySengupta/Agentic-AI-Sujoy/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# --- STEP 1: IMPORTS ---
import torch
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# --- STEP 2: CREATE DUMMY DATA (Bypassing the broken download) ---
# We manually create 3 "Research Papers" so the code has something to chew on.
raw_text_data = [
    {
        "text": "Deep Learning is a subset of machine learning using neural networks with three or more layers. These neural networks attempt to simulate the behavior of the human brain—albeit far from matching its ability—allowing it to 'learn' from large amounts of data. While a neural network with a single layer can still make approximate predictions, additional hidden layers can help to optimize and refine for accuracy.",
        "source": "Intro to DL.pdf"
    },
    {
        "text": "Retrieval-Augmented Generation (RAG) is a technique for enhancing the accuracy and reliability of generative AI models with facts fetched from external sources. By bridging the gap between private data and public LLMs, RAG solves the hallucination problem. It retrieves relevant context and passes it to the LLM along with the user query.",
        "source": "RAG Architecture.pdf"
    },
    {
        "text": "Agentic AI refers to AI systems that can pursue complex goals with limited direct supervision. Unlike passive AI which waits for prompts, agentic AI can break down tasks, use tools, and iterate on solutions. This autonomy allows for more complex workflows in software engineering and data analysis.",
        "source": "Agents_v1.pdf"
    }
]

print("Creating documents from dummy data...")
documents = [
    Document(page_content=item["text"], metadata={"source": item["source"]})
    for item in raw_text_data
]

# --- STEP 3: CHUNKING ---
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
chunks = text_splitter.split_documents(documents)
print(f"Successfully created {len(chunks)} chunks.")

# --- STEP 4: EMBEDDINGS & VECTOR STORE ---
print("Creating embeddings...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

# --- STEP 5: LLM SETUP ---
print("Initializing LLM...")
device = 0 if torch.cuda.is_available() else -1
hf_pipeline = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_new_tokens=100,
    device=device
)
llm = HuggingFacePipeline(pipeline=hf_pipeline)

# --- STEP 6: RAG CHAIN ---
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = PromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# --- STEP 7: TEST QUERIES (For your Report) ---
print("\n--- TEST RESULTS ---")
queries = [
    "What is Retrieval-Augmented Generation?",
    "How is Agentic AI different from passive AI?",
    "What allows neural networks to learn?"
]

for q in queries:
    print(f"\nQ: {q}")
    response = rag_chain.invoke(q)
    print(f"A: {response}")

Creating documents from dummy data...
Successfully created 7 chunks.
Creating embeddings...
Initializing LLM...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=hf_pipeline)



--- TEST RESULTS ---

Q: What is Retrieval-Augmented Generation?
A: a technique for enhancing the accuracy and reliability of generative AI models with facts fetched from external sources

Q: How is Agentic AI different from passive AI?
A: agentic AI can break down tasks, use tools, and iterate on Retrieval-Augmented Generation (RAG) is a technique for enhancing the accuracy and reliability of generative AI models with facts fetched from external sources

Q: What allows neural networks to learn?
A: large amounts of data


In [2]:
# Install compatible versions of torch and torchvision together
%pip install -U torch torchvision torchaudio

Collecting torchvision
  Downloading torchvision-0.25.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.4 kB)
Collecting torchaudio
  Downloading torchaudio-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (6.9 kB)
Downloading torchvision-0.25.0-cp312-cp312-manylinux_2_28_x86_64.whl (8.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m69.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading torchaudio-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m66.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchvision, torchaudio
  Attempting uninstall: torchvision
    Found existing installation: torchvision 0.24.0+cpu
    Uninstalling torchvision-0.24.0+cpu:
      Successfully uninstalled torchvision-0.24.0+cpu
  Attempting uninstall: torchaudio
    Found existing installation: torchaudio 2.9.0+cpu
    Uninstalling torchaudio-2.9.0+cpu:
      S

In [2]:
%pip install pdfplumber

Collecting pdfplumber
  Downloading pdfplumber-0.11.9-py3-none-any.whl.metadata (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six==20251230 (from pdfplumber)
  Downloading pdfminer_six-20251230-py3-none-any.whl.metadata (4.3 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-5.4.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.9/67.9 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.9-py3-none-any.whl (60 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer_six-20251230-py3-none-any.whl (6.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━