In [26]:
# Install Packages (Run this once)
# pip install langchain langchain-community langchain-google-genai langchain-text-splitters chromadb pypdf bs4 python-dotenv

In [27]:
from langchain.chat_models import init_chat_model
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
import bs4
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from dotenv import load_dotenv
import os

In [28]:

load_dotenv(override=True) # Load environment variables from .env file
api_key = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = api_key
model = init_chat_model("google_genai:gemini-2.5-flash-lite")

In [29]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

In [30]:
vector_store = InMemoryVectorStore(embeddings)

## üì• RAG: Data Ingestion (PDF & Web)
‡∏Å‡∏≤‡∏£‡∏î‡∏∂‡∏á‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏´‡∏≤‡∏à‡∏≤‡∏Å PDF ‡πÅ‡∏•‡∏∞ Web
- ‡πÅ‡∏ö‡∏ö‡∏ó‡∏µ‡πà 1: ‡∏≠‡πà‡∏≤‡∏ô‡∏ó‡∏µ‡∏•‡∏∞‡πÑ‡∏ü‡∏•‡πå 
`loader = PyPDFLoader("path/to/your/file.pdf")`
`docs = loader.load()`
- ‡πÅ‡∏ö‡∏ö‡∏ó‡∏µ‡πà 2: ‡∏≠‡πà‡∏≤‡∏ô‡∏ó‡∏±‡πâ‡∏á‡πÇ‡∏ü‡∏•‡πÄ‡∏î‡∏≠‡∏£‡πå 
`‡πÄ‡∏≠‡∏≤‡πÑ‡∏ü‡∏•‡πå PDF ‡πÑ‡∏õ‡∏ß‡∏≤‡∏á‡πÉ‡∏ô folder 'data/recipes' `

In [31]:
# PDF Loader
try:
    loader = PyPDFLoader("data/recipes/Booklet.pdf")
    pdf_docs = loader.load()
    print(f"PDF loaded: {len(pdf_docs)} pages")
    if pdf_docs:
        print("Simple content:", pdf_docs[0].page_content[:200])
except Exception as e:
    print(f"Error loading PDF: {e}")

# Web Loader
url = "https://kohplanner.com/food/10-classic-thai-dishes-recipes/"
bs4_strainer = bs4.SoupStrainer(class_="entry-content single-content")

loader = WebBaseLoader(
    web_paths=(url,),
    bs_kwargs={"parse_only": bs4_strainer},
)
web_docs = loader.load()
print(f"Web loaded: {len(web_docs)} pages")
if web_docs:
    print("Simple content:", web_docs[0].page_content[:500])
print(f"Total characters in document: {len(web_docs[0].page_content)}")

PDF loaded: 61 pages
Simple content: 1
THAI RECIPES
FROM
ÔÇ¨ chiangmaiecolodges.com ÔÇÇ @spicyvilla ÔÖ≠ @spicyjourney ÔÖß Spicy Journey
Web loaded: 1 pages
Simple content: 





Thai cuisine is a vibrant culinary tapestry, woven with centuries of tradition, culinary skill, and rich, diverse ingredients. Its origins are deeply rooted in the country's history, heavily influenced by neighbouring countries, including China, India, Laos, and Malaysia, resulting in a blend of flavours that are as diverse as they are delicious. We hope our selected Thai dishes can demonstrate this!
Key ingredients such as jasmine rice, coconut milk, fresh herbs, fish sauce, and an array 
Total characters in document: 23541


In [32]:
# Merge all documents
all_docs = pdf_docs + web_docs
print(f"Total Documents: {len(all_docs)}")
total_chars = sum(len(doc.page_content) for doc in all_docs)
print(f"Total Characters (All Pages): {total_chars}")

Total Documents: 62
Total Characters (All Pages): 117338


In [33]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, # 1. ‡πÅ‡∏ö‡πà‡∏á‡πÄ‡∏≠‡∏Å‡∏™‡∏≤‡∏£‡πÄ‡∏õ‡πá‡∏ô‡∏ä‡∏¥‡πâ‡∏ô‡πÄ‡∏•‡πá‡∏Å (chunks) ‡∏Ç‡∏ô‡∏≤‡∏î 1,000 ‡∏ï‡∏±‡∏ß‡∏≠‡∏±‡∏Å‡∏©‡∏£
    chunk_overlap=200, # 2. ‡∏Å‡∏≥‡∏´‡∏ô‡∏î‡∏Å‡∏≤‡∏£‡∏ó‡∏±‡∏ö‡∏ã‡πâ‡∏≠‡∏ô‡∏Å‡∏±‡∏ô‡∏£‡∏∞‡∏´‡∏ß‡πà‡∏≤‡∏á‡∏ä‡∏¥‡πâ‡∏ô (overlap) ‡πÄ‡∏õ‡πá‡∏ô 200 ‡∏ï‡∏±‡∏ß‡∏≠‡∏±‡∏Å‡∏©‡∏£
    add_start_index=True, # 3. ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏î‡∏±‡∏ä‡∏ô‡∏µ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡πÉ‡∏ô‡πÅ‡∏ï‡πà‡∏•‡∏∞‡∏ä‡∏¥‡πâ‡∏ô
)
all_splits = text_splitter.split_documents(all_docs)
print(f"Total splits created: {len(all_splits)} sub-documents")

Total splits created: 171 sub-documents


In [34]:
# Add to Vector Store
document_ids = vector_store.add_documents(documents=all_splits)
print(document_ids[:3])

['0d74375d-faf3-4f08-a513-fbb307640402', '6865d313-9ce0-4d86-8e88-f275853a1a15', '62362eba-42d0-4af4-9d10-5cf6d4c4fca4']


In [35]:
vector_store.dump('Vector_Store_RAG')

In [36]:
vector_store = InMemoryVectorStore.load('Vector_Store_RAG', embeddings)

## RAG Agent with LangChain

In [37]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrive_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=4 ) #‡πÉ‡∏ä‡πâ similarity search ‡∏´‡∏≤ top-2 documents
    serialized = "\n\n".join(f"Source: {doc.metadata}\nContent: {doc.page_content}" for doc in retrieved_docs)
    return serialized, retrieved_docs

In [38]:
from langchain.agents import create_agent
# ‡πÉ‡∏™‡πà system prompt ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏Ñ‡∏ß‡∏ö‡∏Ñ‡∏∏‡∏°‡∏û‡∏§‡∏ï‡∏¥‡∏Å‡∏£‡∏£‡∏°‡∏Ç‡∏≠‡∏á‡πÇ‡∏°‡πÄ‡∏î‡∏•
system_prompt =("""
    - You are a helpful AI assistant.
    - When you receive a response from a tool, you MUST summarize it and provide a final answer to the user.
    - DO NOT return an empty response.
    - Always synthesize the information retrieved."""
)
agent = create_agent(model,tools=[retrive_context], system_prompt=system_prompt)

In [39]:
query = "What roles exist in the system?"

for event in agent.stream(
    {"messages":[{"role":"user","content":query}]},
    stream_mode="values",
):
    event['messages'][-1].pretty_print()


What roles exist in the system?
Tool Calls:
  retrive_context (735dfca8-01e4-41e6-8259-2711dfbb8415)
 Call ID: 735dfca8-01e4-41e6-8259-2711dfbb8415
  Args:
    query: roles in the system
Name: retrive_context

Source: {'source': 'https://kohplanner.com/food/10-classic-thai-dishes-recipes/', 'start_index': 23514}
Content: Author: kohplanner_admin

Source: {'producer': 'Adobe PDF Library 15.0', 'creator': 'Adobe InDesign 15.1 (Windows)', 'creationdate': '2020-10-17T22:04:16+07:00', 'moddate': '2020-10-17T22:04:34+07:00', 'trapped': '/False', 'source': 'data/recipes/Booklet.pdf', 'total_pages': 61, 'page': 2, 'page_label': '3', 'start_index': 0}
Content: 4 5
Thai appetizers can be served before the main meal, 
as a quick bite on the go, or with drinks. They are little 
bites full of delicious Thai flavors, usually served with 
some fresh or steamed vegetables and a dipping sauce 
or paste on the side. 
 
Ranging from simple pork crackers or grilled sticky rice 
to more elaborate dishes, 

In [40]:
event['messages'][-1].pretty_print()


The provided context does not contain information about roles within a system. It mainly discusses Thai food, recipes, and appetizers.
