### Setting Environment
- conda create --name rag-project python=3.12
- conda active rag-project
- pip install langchain langchain-text-splitters langchain-community bs4
- pip install "langchain[google-genai]

In [41]:
from langchain.chat_models import init_chat_model
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
import bs4
from langchain_community.document_loaders import WebBaseLoader
from dotenv import load_dotenv
import os

In [42]:
load_dotenv()  # Load environment variables from .env file
api_key = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = api_key
model = init_chat_model("google_genai:gemini-2.5-flash-lite")

In [43]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [44]:
vector_store = InMemoryVectorStore(embeddings)

### Web Scraping with LangChain
การดึงเนื้อหาจากเว็บไซต์โดยใช้ `WebBaseLoader` และกรองข้อมูลด้วย SoupStrainer

### Step:
1. กำหนด strainer เพื่อกรองเฉพาะหัวข้อและเนื้อหาบทความ
2. โหลดข้อมูลจาก URL ที่ต้องการ
3. ตรวจสอบและแสดงผลข้อมูลที่ได้

In [45]:
bs4_strainer = bs4.SoupStrainer(class_=("article-title", "text"))
loader = WebBaseLoader(
    web_paths=("https://www.geeksforgeeks.org/software-engineering/what-is-task-management/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
documents = loader.load()
print(f"Total characters in document: {len(documents[0].page_content)}")

Total characters in document: 17753


In [46]:
print(documents[0].page_content[:500])


What is Task Management?


Task management in project management refers to the process of planning, organizing, and executing tasks within a project to achieve specific goals and objectives. It involves identifying tasks, assigning responsibilities, setting deadlines, and monitoring progress to ensure that projects are completed on time and within budget. Effective task management is crucial for project success as it helps teams stay organized, prioritize work, and achieve desired outcomes effi


## RAG Document Processing

### Step 1: Split Documents.

In [47]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, # 1. แบ่งเอกสารเป็นชิ้นเล็ก (chunks) ขนาด 1,000 ตัวอักษร
    chunk_overlap=200, # 2. กำหนดการทับซ้อนกันระหว่างชิ้น (overlap) เป็น 200 ตัวอักษร
    add_start_index=True, # 3. เพิ่มดัชนีเริ่มต้นในแต่ละชิ้น
)
all_splits = text_splitter.split_documents(documents)
print(f"Total splits created: {len(all_splits)} sub-documents")


Total splits created: 26 sub-documents


### Step 2: Store in Vector Database

In [48]:
documents_id = vector_store.add_documents(all_splits)
print(documents_id[:3])

['62982ed9-74f0-4c14-8fe5-ff4ef656acd9', 'd6b2b35b-8a0a-405b-9bd2-dc89d2963408', '6c1823d2-d5a1-4d5c-aaec-28b024490258']


### Step 3: Persist to Disk
- บันทึก vector store เพื่อใช้ซ้ำ

In [49]:
vector_store.dump('Vector_store_original')

### Step 4: Load from Disk

In [50]:
vector_store = InMemoryVectorStore.load('vector_store_original', embeddings)

## RAG Agent with LangChain
### Step 1: สร้าง Retrieval Tool สำหรับค้นหาข้อมูล

In [51]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrive_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=4 ) #ใช้ similarity search หา top-2 documents
    serialized = "\n\n".join(f"Source: {doc.metadata}\nContent: {doc.page_content}" for doc in retrieved_docs)
    return serialized, retrieved_docs

### Step 2: สร้าง Agent

In [52]:
from langchain.agents import create_agent
# ใส่ system prompt เพื่อควบคุมพฤติกรรมของโมเดล
system_prompt =("""
    - You are a helpful AI assistant.
    - When you receive a response from a tool, you MUST summarize it and provide a final answer to the user.
    - DO NOT return an empty response.
    - Always synthesize the information retrieved."""
)
agent = create_agent(model,tools=[retrive_context], system_prompt=system_prompt)

### Step 3: Query และรับคำตอบ

In [53]:
query = ("Explain task management techniques")

for event in agent.stream(
    {"messages":[{"role":"user","content":query}]},
    stream_mode="values",
):
    event['messages'][-1].pretty_print()


Explain task management techniques
Tool Calls:
  retrive_context (fdb5a8c6-c868-4949-923d-3bbb1d258819)
 Call ID: fdb5a8c6-c868-4949-923d-3bbb1d258819
  Args:
    query: task management techniques
Name: retrive_context

Source: {'source': 'https://www.geeksforgeeks.org/software-engineering/what-is-task-management/', 'start_index': 13363}
Content: Tools: Utilize task management software or tools that suit your needs. These tools help centralize tasks, set priorities, track progress, and collaborate with team members efficiently.Set Clear Goals and Priorities: Define clear objectives for each task or project. Prioritize tasks based on importance, deadlines, and dependencies to ensure that critical work gets completed first.Break Tasks into Smaller Steps: Break down larger tasks into smaller, manageable sub-tasks or milestones. This makes tasks more achievable and helps maintain momentum as you progress through the project.Establish Deadlines: Set realistic deadlines for tasks and projec

## Token Cost Calculator for API

### การคำนวณค่าใช้จ่าย API

**อัตราราคา Gemini 2.5 flash-lite**
- Input: $0.10 / 1M tokens
- Output: $0.40 / 1M tokens


In [54]:
example_prompt = """
Example:

Context: Task Prioritization is the process where tasks are prioritized based on their importance, deadlines, dependencies, 
and required resources. This helps in allocating resources effectively.

Question: Explain task management techniques

Answer: *   **Communicate Effectively:** Maintain open and clear communication channels with team members regarding task assignments, expectations, and progress updates.
*   **Utilize Tools:** Employ task management software or tools to centralize tasks, set priorities, track progress, and facilitate collaboration.
*   **Track Progress Regularly:** Consistently monitor the status of tasks to identify any bottlenecks or issues and ensure the project stays on schedule.
*   **Review and Adjust:** Periodically review your task management processes and workflows, making necessary adjustments to strategies or priorities based on performance and lessons learned.
"""
data_prompt = """
What is the standard method for Task Decomposition?

Once you get the answer, look up common extensions of that method.
================================== Ai Message ==================================
Tool Calls:
    retrieve_context (90225cc7-992b-42a7-b6d4-6d8f526822a7)
    all ID: 90225cc7-992b-42a7-b6d4-6d8f526822a7
    Args:
    query: Standard method for Task Decomposition and common extensions
================================= Tool Message =================================
Name: retrieve_context

Source: {'source': 'https://www.geeksforgeeks.org/software-engineering/what-is-task-management/', 'start_index': 1327}
Content: This involves breaking down the project into smaller, manageable components or tasks. Each task should be clearly defined with specific objectives and deliverables.Task Prioritization: Once tasks are identified, they need to be prioritized based on their importance, deadlines, dependencies, and resources required. Prioritization helps in allocating resources effectively and ensuring that critical tasks are completed on time.Assigning Responsibilities: After prioritization, tasks are assigned to team members or individuals responsible for their execution. Clear assignment of responsibilities ensures accountability and helps team members understand their roles and contributions to the project.Setting Deadlines: Tasks are associated with deadlines or target dates for completion. Setting realistic and achievable deadlines is crucial for managing time effectively and avoiding delays in project delivery.Monitoring Progress: Task management involves continuous monitoring of task progress to

Source: {'source': 'https://www.geeksforgeeks.org/software-engineering/what-is-task-management/', 'start_index': 1}
Content: What is Task Management?
"""
output_model = """ ...
*   **Communicate Effectively:** Maintain open and clear communication channels with team members regarding task assignments, expectations, and progress updates.
*   **Utilize Tools:** Employ task management software or tools to centralize tasks, set priorities, track progress, and facilitate collaboration.
*   **Track Progress Regularly:** Consistently monitor the status of tasks to identify any bottlenecks or issues and ensure the project stays on schedule.
*   **Review and Adjust:** Periodically review your task management processes and workflows, making necessary adjustments to strategies or priorities based on performance and lessons learned. """


In [55]:
import tiktoken
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens
def estimate_price_api(system_prompt, example_prompt, data_prompt,output_model, rateBathToUS):
    tokens = num_tokens_from_string(system_prompt + data_prompt + example_prompt, "cl100k_base")
    tokens_input = num_tokens_from_string(system_prompt + data_prompt + example_prompt, "cl100k_base")
    tokens_output = num_tokens_from_string(output_model, "cl100k_base")
    price_baht_input = round(tokens_input / 1_000_000 * 0.10 * rateBathToUS, 2)
    price_baht_output = round(tokens_output / 1_000_000 * 0.40 * rateBathToUS, 2)
    price_baht_total = price_baht_input + price_baht_output
    return price_baht_total

In [56]:
price_function = estimate_price_api(system_prompt,example_prompt,output_model,data_prompt, 35)
print("The price of using llm model in Bath is around" ,price_function)

The price of using llm model in Bath is around 0.0
