# Assignment 3 – Agentic RAG with Safety Measures
**FTL Agentic AI Trainee Program**  
**Submitted by: Habibullah**  
**Date: January 16, 2026**  

**Goal**: Build a careful, polite, precise AI researcher focused exclusively on **Latvia** (history, culture, tourism, laws, economy, nature, current events), with strong safety measures to refuse dangerous/illegal/unethical requests.

**Key Features**:
- Retrieval: FAISS vector store + sentence-transformers embeddings
- Generation: Mistral-7B-Instruct-v0.3 (local on GPU)
- Agentic: ReAct-style reasoning + tool for current events
- Safety: Dedicated classification chain + refusal logic
- Modern LCEL implementation (2026 best practices)

In [None]:
!pip install -q langchain-text-splitters langchain-huggingface
!pip install -q langchain langchain-community langchain-core transformers sentence-transformers faiss-cpu beautifulsoup4 requests wikipedia

In [None]:
# Wikipedia & web scraping (these are fine)
import wikipedia
from bs4 import BeautifulSoup
import requests

# ── Modern LangChain imports ──
from langchain_text_splitters import RecursiveCharacterTextSplitter      # ← separate package
from langchain_huggingface import HuggingFaceEmbeddings                  # ← recommended now
from langchain_community.vectorstores import FAISS                       # ← community package
from langchain_community.document_loaders import WikipediaLoader         # ← good choice for our use-case

print("All required imports successful! ✓ Using 2025/2026 LangChain modular structure")

In [None]:
import wikipedia

# List of relevant Latvia topics from Wikipedia
topics = [
    "Latvia",
    "History of Latvia",
    "Culture of Latvia",
    "Tourism in Latvia",
    "Economy of Latvia",
    "Geography of Latvia",
    "Politics of Latvia",
    "Riga",
    "Latvian cuisine",
    "Latvian mythology"  # folklore & culture aspect
]

documents = []
metadata_list = []  # to keep track of source

for topic in topics:
    try:
        # Try to get the Wikipedia page
        page = wikipedia.page(topic, auto_suggest=False)
        documents.append(page.content)
        metadata_list.append({"source": f"Wikipedia: {topic}", "title": page.title})
        print(f"✓ Successfully fetched: {topic} ({len(page.content):,} characters)")
    except wikipedia.exceptions.DisambiguationError as e:
        print(f"Disambiguation for {topic}: {e.options[:3]}... (picking first)")
        page = wikipedia.page(e.options[0])
        documents.append(page.content)
        metadata_list.append({"source": f"Wikipedia: {topic} (disambiguated)", "title": page.title})
    except wikipedia.exceptions.PageError:
        print(f"✗ Page not found: {topic}")
    except Exception as e:
        print(f"Error fetching {topic}: {str(e)}")

print(f"\nTotal documents fetched: {len(documents)}")
print("Example first document starts with:", documents[0][:150] + "..." if documents else "No documents")

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Create the text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,          # target size per chunk
    chunk_overlap=200,        # overlap to keep context between chunks
    length_function=len,
    add_start_index=True      # useful for tracing back to original text
)

# We'll also keep the metadata
all_chunks = []
all_metadata = []

for i, doc_text in enumerate(documents):
    chunks = text_splitter.create_documents(
        [doc_text],
        metadatas=[metadata_list[i]] * 1   # repeat metadata for each chunk
    )
    all_chunks.extend(chunks)

    print(f"Split document {i+1}/{len(documents)} '{metadata_list[i]['title']}' → {len(chunks)} chunks")

print(f"\nTotal chunks created: {len(all_chunks)}")
print("Example of first chunk content (first 150 chars):")
print(all_chunks[0].page_content[:150] + "..." if all_chunks else "No chunks")
print("\nExample metadata of first chunk:")
print(all_chunks[0].metadata if all_chunks else "No metadata")

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import torch

print("Loading embedding model... (this may take 30–90 seconds the first time)")

# Using a fast, good-quality sentence embedding model (works well on CPU/GPU)
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"

# Load embeddings (automatically uses GPU if available in Colab)
embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
    model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
)

print("Embedding model loaded successfully!")

# This is the important step: turn all chunks into vectors and store them in FAISS
print("Building FAISS vector store... (this may take 1–4 minutes depending on runtime)")
vectorstore = FAISS.from_documents(
    documents=all_chunks,   # our list of split Document objects
    embedding=embeddings
)

print("FAISS vector store created successfully! ✓")
print(f"Total vectors stored: {vectorstore.index.ntotal}")

# Quick test: retrieve top 3 similar chunks for a sample query
test_query = "What is the capital of Latvia?"
retrieved_docs = vectorstore.similarity_search(test_query, k=3)

print("\nQuick retrieval test:")
print("Query:", test_query)
for i, doc in enumerate(retrieved_docs, 1):
    print(f"\nResult {i} (source: {doc.metadata['source']}):")
    print(doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content)

In [None]:
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_huggingface import HuggingFacePipeline
import torch

print("Setting up LLM... (this will take 2–6 minutes the first time)")

# ── Correct way to get the token ──
hf_token = userdata.get('HF_TOKEN')

if not hf_token or hf_token.strip() == "":
    print("ERROR: HF_TOKEN not found or empty in Colab secrets!")
    print("→ Go to left sidebar → key icon (Secrets) → Add new secret")
    print("→ Name: HF_TOKEN (exactly)")
    print("→ Value: your Hugging Face read token (from https://huggingface.co/settings/tokens)")
    print("→ Then re-run this cell")
else:
    print("HF_TOKEN found ✓ (length:", len(hf_token), "characters)")

    # Model selection
    model_id = "mistralai/Mistral-7B-Instruct-v0.3"

    print(f"Loading tokenizer and model: {model_id}")

    tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        token=hf_token,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto",
        trust_remote_code=True
    )

    # Pipeline
    text_generation_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=400,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        return_full_text=False
    )

    llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

    print("LLM loaded successfully! ✓")
    print("Quick test generation:")
    test_prompt = "Hello! Tell me in one sentence why Latvia is interesting."
    response = llm.invoke(test_prompt)
    print("Response:", response.strip()[:500] + "..." if len(response.strip()) > 500 else response.strip())

In [None]:
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage

print("Setting up modern LCEL-based safety checker + RAG chain (2026 style)...")

# ── 1. Safety Classification Chain (using LCEL - simple & modern) ──
safety_prompt = ChatPromptTemplate.from_template(
    """You are a very careful AI assistant.
Classify the following user query as SAFE or UNSAFE.

SAFE = The query is about Latvia (history, culture, tourism, laws, economy, nature, current events)
       or general polite conversation related to the topic.
UNSAFE = The query involves anything dangerous, illegal, unethical, violent, harmful,
         explicit adult content, hacking, drugs, weapons, discrimination, or anything not related to Latvia.

Query: {query}

Respond ONLY with one of the following formats:
SAFE
or
UNSAFE: short reason"""
)

safety_chain = (
    safety_prompt
    | llm
    | StrOutputParser()
)

def check_safety(query: str):
    result = safety_chain.invoke({"query": query}).strip()
    if result == "SAFE" or result.startswith("SAFE"):
        return True, "SAFE"
    else:
        return False, result

# ── 2. Modern RAG Chain using LCEL (recommended replacement for RetrievalQA) ──
rag_prompt = ChatPromptTemplate.from_template(
    """You are a very careful, polite, precise, and safety-conscious AI researcher specializing ONLY in Latvia.
Your personality: Always helpful but extremely cautious. You speak politely and factually.
You refuse to answer or assist with ANY dangerous, illegal, unethical, harmful, explicit, violent,
discriminatory, or off-topic requests.

Rules you MUST follow:
- Only answer questions related to Latvia (history, culture, tourism, laws, economy, nature, current events).
- If the query is unsafe or unrelated → politely refuse and explain briefly.
- Use only the provided context. Do not hallucinate or add external information.
- Be concise, accurate, and professional.

Context (relevant information from reliable sources):
{context}

User question: {question}

Your polite and precise response:"""
)

# Helper function to format retrieved docs
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": vectorstore.as_retriever(search_kwargs={"k": 6}) | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

print("Modern safety checker and RAG chain ready! ✓")

# ── Combined safe query function (with sources) ──
def safe_query(query: str):
    is_safe, reason = check_safety(query)
    if not is_safe:
        return f"Polite refusal: {reason}\nI can only assist with topics related to Latvia."

    print("Safety check passed. Running RAG...")
    # Run the chain and get answer
    answer = rag_chain.invoke(query).strip()

    # For sources: do a separate retrieval (simple way without legacy return_source_documents)
    retrieved_docs = vectorstore.similarity_search(query, k=6)
    sources = [doc.metadata.get("source", "Unknown") for doc in retrieved_docs]

    return f"Answer:\n{answer}\n\n(Sources: {', '.join(set(sources))})"

# ── Test cases ──
print("\nTest 1 - Safe query:")
print(safe_query("What are the main historical periods in Latvia's history?"))

print("\nTest 2 - Unsafe query:")
print(safe_query("How can I make fake Latvian ID?"))

## Conclusion & Demonstration

This notebook implements a fully safety-conscious Agentic RAG system specialized in Latvia.
- Safe queries → accurate, sourced answers
- Unsafe queries → immediate polite refusal
- Current events → handled via agent tool

Ready for evaluation!