## Setup & Imports

In [1]:
import os
import bs4
from pathlib import Path
from langchain.chat_models import init_chat_model
import csv
from langchain_core.documents import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader, DirectoryLoader, TextLoader
from dotenv import load_dotenv

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
load_dotenv(override=True)  # Load environment variables from .env file
api_key = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = api_key
model = init_chat_model("google_genai:gemini-2.5-flash-lite")

In [3]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

In [4]:
vector_store = InMemoryVectorStore(embeddings)

## Load knowledge docs

In [5]:
KNOWLEDGE_PATH = Path("knowledge")
QA_FILE = KNOWLEDGE_PATH / "knowledge_qa.csv"

# ---------------------------
# Load Q&A CSV
# ---------------------------
docs = []

with QA_FILE.open(newline="", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        content = f"Q: {row['Question']}\nA: {row['Answer']}"
        docs.append(
            Document(
                page_content=content,
                metadata={
                    "type": "qa",
                    "source": QA_FILE.name
                }
            )
        )

print(f"Loaded {len(docs)} Q&A documents")

if docs:
    print(docs[0].page_content[:200])

# ---------------------------
# Load Markdown Docs
# ---------------------------
raw_documents = []

for file in KNOWLEDGE_PATH.glob("*.md"):
    text = file.read_text(encoding="utf-8")
    raw_documents.append(
        Document(
            page_content=text,
            metadata={
                "type": "doc",
                "source": file.name
            }
        )
    )

print(f"Loaded {len(raw_documents)} markdown documents")

# ---------------------------
# Combine & Stats
# ---------------------------
documents = docs + raw_documents

total_chars = sum(len(d.page_content) for d in documents)
print(f"Total Characters (All Pages): {total_chars}")

Loaded 32 Q&A documents
Q: What is the Intern Task Management System?
A: A web-based application designed to help teams manage projects track tasks and collaborate effectively.
Loaded 1 markdown documents
Total Characters (All Pages): 8045


## Text Splitting

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True
)

# ‡πÉ‡∏ä‡πâ documents ‡∏ó‡∏µ‡πà‡∏Ñ‡∏∏‡∏ì‡∏£‡∏ß‡∏°‡πÑ‡∏ß‡πâ‡πÅ‡∏•‡πâ‡∏ß
all_splits = text_splitter.split_documents(documents)

print(f"Total splits created: {len(all_splits)} sub-documents")

# preview
if all_splits:
    print(all_splits[0].page_content[:200])
    print(all_splits[0].metadata)

Total splits created: 38 sub-documents
Q: What is the Intern Task Management System?
A: A web-based application designed to help teams manage projects track tasks and collaborate effectively.
{'type': 'qa', 'source': 'knowledge_qa.csv', 'start_index': 0}


## Vector Store + Retriever

In [7]:
# Add to Vector Store
document_ids = vector_store.add_documents(documents=all_splits)
print(document_ids[:3])

['8e2db3b7-a812-4ca6-bfeb-3e52b9787539', '7841571a-5839-4c94-a73c-c87554ca2169', 'e0c78dad-bad4-44f7-bddb-e3362d435af6']


In [8]:
vector_store.dump('Vector_Store_RAG')

In [9]:
vector_store = InMemoryVectorStore.load('Vector_Store_RAG', embeddings)

## RAG Agent with LangChain

In [10]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrive_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=3 ) #‡πÉ‡∏ä‡πâ similarity search 
    serialized = "\n\n".join(f"Source: {doc.metadata}\nContent: {doc.page_content}" for doc in retrieved_docs)
    return serialized, retrieved_docs

In [11]:
from langchain.agents import create_agent
# ‡πÉ‡∏™‡πà system prompt ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏Ñ‡∏ß‡∏ö‡∏Ñ‡∏∏‡∏°‡∏û‡∏§‡∏ï‡∏¥‡∏Å‡∏£‡∏£‡∏°‡∏Ç‡∏≠‡∏á‡πÇ‡∏°‡πÄ‡∏î‡∏•
system_prompt =("""
    You are an AI assistant for a Task Management System.

Your role is to answer user questions using information retrieved from the system's knowledge base.
When a retrieval tool is used, you MUST:
- Read and understand the retrieved content.
- Synthesize the information into a clear, concise, and helpful answer.
- Base your response ONLY on the retrieved information.

If the retrieved information is insufficient or not relevant:
- Clearly state that the information is not available in the current knowledge base.

DO NOT:
- Invent or assume information.
- Return an empty response.
- Copy the retrieved text verbatim without summarizing.

Always provide a final, user-facing answer that is easy to understand.
If the information is not found in the knowledge base,
clearly say so and suggest what type of information
would be needed to answer the question.

DO NOT return an empty response.
"""
)
agent = create_agent(model,tools=[retrive_context], system_prompt=system_prompt)

In [12]:
query = "What can an Admin do?"

for event in agent.stream(
        {"messages":[{"role":"user","content":query}]},
        stream_mode="values",
    ):
        event['messages'][-1].pretty_print()


What can an Admin do?
Tool Calls:
  retrive_context (b822da35-8d49-48e2-8c48-d6527538bae8)
 Call ID: b822da35-8d49-48e2-8c48-d6527538bae8
  Args:
    query: Admin capabilities in the Task Management System
Name: retrive_context

Source: {'type': 'doc', 'source': 'knowledge_base.md', 'start_index': 0}
Content: # üåê System Overview

## What is this system?
The Intern Task Management System is a web-based application designed to help teams manage projects, track tasks, and collaborate effectively.

## Why is this system needed?
Without a task management system, teams often face:
- Unclear task ownership
- Missed deadlines
- Poor visibility of project progress

This system provides structure, visibility, and accountability for team-based work.

## Key Features
- Authentication & Authorization
- User Management
- Project Dashboard
- Kanban Board
- Real-time task updates

---

# üë• User Roles & Permissions

## Admin
### What can Admin do?
- Create, edit, and delete users
- Assign user r

In [13]:
print(event['messages'][-1].pretty_print())

None
