## Setup & Imports

In [33]:
import os
import bs4
from pathlib import Path
from langchain.chat_models import init_chat_model
import csv
from langchain_core.documents import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader, DirectoryLoader, TextLoader
from dotenv import load_dotenv

In [34]:
load_dotenv()  # Load environment variables from .env file
api_key = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = api_key
model = init_chat_model("google_genai:gemini-2.5-flash-lite")

In [35]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [36]:
vector_store = InMemoryVectorStore(embeddings)

## Load knowledge docs

In [37]:
KNOWLEDGE_PATH = Path("knowledge")
QA_FILE = KNOWLEDGE_PATH / "knowledge_qa.csv"

# ---------------------------
# Load Q&A CSV
# ---------------------------
docs = []

with QA_FILE.open(newline="", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        content = f"Q: {row['Question']}\nA: {row['Answer']}"
        docs.append(
            Document(
                page_content=content,
                metadata={
                    "type": "qa",
                    "source": QA_FILE.name
                }
            )
        )

print(f"Loaded {len(docs)} Q&A documents")

if docs:
    print(docs[0].page_content[:200])

# ---------------------------
# Load Markdown Docs
# ---------------------------
raw_documents = []

for file in KNOWLEDGE_PATH.glob("*.md"):
    text = file.read_text(encoding="utf-8")
    raw_documents.append(
        Document(
            page_content=text,
            metadata={
                "type": "doc",
                "source": file.name
            }
        )
    )

print(f"Loaded {len(raw_documents)} markdown documents")

# ---------------------------
# Combine & Stats
# ---------------------------
documents = docs + raw_documents

total_chars = sum(len(d.page_content) for d in documents)
print(f"Total Characters (All Pages): {total_chars}")

Loaded 32 Q&A documents
Q: What is the Intern Task Management System?
A: A web-based application designed to help teams manage projects track tasks and collaborate effectively.
Loaded 1 markdown documents
Total Characters (All Pages): 8045


## Text Splitting

In [38]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True
)

# ใช้ documents ที่คุณรวมไว้แล้ว
all_splits = text_splitter.split_documents(documents)

print(f"Total splits created: {len(all_splits)} sub-documents")

# preview
if all_splits:
    print(all_splits[0].page_content[:200])
    print(all_splits[0].metadata)

Total splits created: 38 sub-documents
Q: What is the Intern Task Management System?
A: A web-based application designed to help teams manage projects track tasks and collaborate effectively.
{'type': 'qa', 'source': 'knowledge_qa.csv', 'start_index': 0}


## Vector Store + Retriever

In [39]:
documents_id = vector_store.add_documents(docs)
print(documents_id[:3])

['ef460c40-e681-4723-8601-1ad5e0faa5e9', 'b287001e-622d-41dc-b047-55c4a1b4cdd0', '540e1491-9a8f-49b9-93f8-bdad8159b4f5']


In [40]:
vector_store.dump('Vector_Store_RAG')

In [41]:
vector_store = InMemoryVectorStore.load('Vector_Store_RAG', embeddings)

## RAG Agent with LangChain

In [42]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrive_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=3 ) #ใช้ similarity search 
    serialized = "\n\n".join(f"Source: {doc.metadata}\nContent: {doc.page_content}" for doc in retrieved_docs)
    return serialized, retrieved_docs

In [43]:
from langchain.agents import create_agent
# ใส่ system prompt เพื่อควบคุมพฤติกรรมของโมเดล
system_prompt =("""
    You are an AI assistant for a Task Management System.

Your role is to answer user questions using information retrieved from the system's knowledge base.
When a retrieval tool is used, you MUST:
- Read and understand the retrieved content.
- Synthesize the information into a clear, concise, and helpful answer.
- Base your response ONLY on the retrieved information.

If the retrieved information is insufficient or not relevant:
- Clearly state that the information is not available in the current knowledge base.

DO NOT:
- Invent or assume information.
- Return an empty response.
- Copy the retrieved text verbatim without summarizing.

Always provide a final, user-facing answer that is easy to understand.
If the information is not found in the knowledge base,
clearly say so and suggest what type of information
would be needed to answer the question.

DO NOT return an empty response.
"""
)
agent = create_agent(model,tools=[retrive_context], system_prompt=system_prompt)

In [47]:
query = "What can an Admin do?"

for event in agent.stream(
        {"messages":[{"role":"user","content":query}]},
        stream_mode="values",
    ):
        event['messages'][-1].pretty_print()


What can an Admin do?
Tool Calls:
  retrive_context (6e12e4f6-b06e-4acf-a5e4-8650c5d8e774)
 Call ID: 6e12e4f6-b06e-4acf-a5e4-8650c5d8e774
  Args:
    query: What can an Admin do?
Name: retrive_context

Source: {'type': 'qa', 'source': 'knowledge_qa.csv'}
Content: Q: What can an Admin do?
A: An Admin can create edit and delete users assign roles view analytics and configure system settings.

Source: {'type': 'qa', 'source': 'knowledge_qa.csv'}
Content: Q: Why is the Admin role important?
A: Admins ensure system security and proper access control.

Source: {'type': 'qa', 'source': 'knowledge_qa.csv'}
Content: Q: Why is the PM role separate from Admin?
A: PM focuses on project execution while Admin handles system-level management.

An Admin can create, edit, and delete users, assign roles, view analytics, and configure system settings. They are also responsible for ensuring system security and proper access control.


In [45]:
print(event['messages'][-1].pretty_print())


An Admin can create, edit, and delete users, assign roles, view analytics, and configure system settings. They are also important for ensuring system security and proper access control.
None
