In [1]:
# Install Packages (Run this once)
# pip install langchain langchain-community langchain-google-genai langchain-text-splitters chromadb pypdf bs4 python-dotenv

In [2]:
from langchain.chat_models import init_chat_model
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
import bs4
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader, PyPDFDirectoryLoader
from dotenv import load_dotenv
import os

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
load_dotenv()  # Load environment variables from .env file
api_key = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = api_key
model = init_chat_model("google_genai:gemini-2.5-flash-lite")

In [4]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [5]:
vector_store = InMemoryVectorStore(embeddings)

## üì• RAG: Data Ingestion (PDF & Web)
‡∏Å‡∏≤‡∏£‡∏î‡∏∂‡∏á‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏´‡∏≤‡∏à‡∏≤‡∏Å PDF ‡πÅ‡∏•‡∏∞ Web
- ‡πÅ‡∏ö‡∏ö‡∏ó‡∏µ‡πà 1: ‡∏≠‡πà‡∏≤‡∏ô‡∏ó‡∏µ‡∏•‡∏∞‡πÑ‡∏ü‡∏•‡πå 
`loader = PyPDFLoader("path/to/your/file.pdf")`
`docs = loader.load()`
- ‡πÅ‡∏ö‡∏ö‡∏ó‡∏µ‡πà 2: ‡∏≠‡πà‡∏≤‡∏ô‡∏ó‡∏±‡πâ‡∏á‡πÇ‡∏ü‡∏•‡πÄ‡∏î‡∏≠‡∏£‡πå 
`‡πÄ‡∏≠‡∏≤‡πÑ‡∏ü‡∏•‡πå PDF ‡πÑ‡∏õ‡∏ß‡∏≤‡∏á‡πÉ‡∏ô folder 'data/recipes' `

In [6]:
# PDF Loader
try:
    loader = PyPDFLoader("data/recipes/Booklet.pdf")
    pdf_docs = loader.load()
    print(f"PDF loaded: {len(pdf_docs)} pages")
    if pdf_docs:
        print("Simple content:", pdf_docs[0].page_content[:200])
except Exception as e:
    print(f"Error loading PDF: {e}")

# Web Loader
url = "https://kohplanner.com/food/10-classic-thai-dishes-recipes/"
bs4_strainer = bs4.SoupStrainer(class_="entry-content single-content")

loader = WebBaseLoader(
    web_paths=(url,),
    bs_kwargs={"parse_only": bs4_strainer},   
)
web_docs = loader.load()
print(f"Web loaded: {len(web_docs)} pages")
if web_docs:
    print("Simple content:", web_docs[0].page_content[:500])
print(f"Total characters in document: {len(web_docs[0].page_content)}")

PDF loaded: 61 pages
Simple content: 1
THAI RECIPES
FROM
ÔÇ¨ chiangmaiecolodges.com ÔÇÇ @spicyvilla ÔÖ≠ @spicyjourney ÔÖß Spicy Journey
Web loaded: 1 pages
Simple content: 





Thai cuisine is a vibrant culinary tapestry, woven with centuries of tradition, culinary skill, and rich, diverse ingredients. Its origins are deeply rooted in the country's history, heavily influenced by neighbouring countries, including China, India, Laos, and Malaysia, resulting in a blend of flavours that are as diverse as they are delicious. We hope our selected Thai dishes can demonstrate this!
Key ingredients such as jasmine rice, coconut milk, fresh herbs, fish sauce, and an array 
Total characters in document: 23541


In [7]:
# Merge all documents
all_docs = pdf_docs + web_docs
print(f"Total Documents: {len(all_docs)}")
total_chars = sum(len(doc.page_content) for doc in all_docs)
print(f"Total Characters (All Pages): {total_chars}")

Total Documents: 62
Total Characters (All Pages): 117338


In [8]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, # 1. ‡πÅ‡∏ö‡πà‡∏á‡πÄ‡∏≠‡∏Å‡∏™‡∏≤‡∏£‡πÄ‡∏õ‡πá‡∏ô‡∏ä‡∏¥‡πâ‡∏ô‡πÄ‡∏•‡πá‡∏Å (chunks) ‡∏Ç‡∏ô‡∏≤‡∏î 1,000 ‡∏ï‡∏±‡∏ß‡∏≠‡∏±‡∏Å‡∏©‡∏£
    chunk_overlap=200, # 2. ‡∏Å‡∏≥‡∏´‡∏ô‡∏î‡∏Å‡∏≤‡∏£‡∏ó‡∏±‡∏ö‡∏ã‡πâ‡∏≠‡∏ô‡∏Å‡∏±‡∏ô‡∏£‡∏∞‡∏´‡∏ß‡πà‡∏≤‡∏á‡∏ä‡∏¥‡πâ‡∏ô (overlap) ‡πÄ‡∏õ‡πá‡∏ô 200 ‡∏ï‡∏±‡∏ß‡∏≠‡∏±‡∏Å‡∏©‡∏£
    add_start_index=True, # 3. ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏î‡∏±‡∏ä‡∏ô‡∏µ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡πÉ‡∏ô‡πÅ‡∏ï‡πà‡∏•‡∏∞‡∏ä‡∏¥‡πâ‡∏ô
)
all_splits = text_splitter.split_documents(all_docs)
print(f"Total splits created: {len(all_splits)} sub-documents")

Total splits created: 171 sub-documents


In [9]:
# Add to Vector Store
documents_id = vector_store.add_documents(all_splits)
print(documents_id[:3])

['600833be-f7d7-4b72-8555-88b7b9d20f62', '30b6ab50-5409-4639-a36d-b4f58e1df0d7', 'a27c3404-74c9-4c70-8761-2787f4b97d21']


In [10]:
vector_store.dump('Vector_Store_RAG')

In [11]:
vector_store = InMemoryVectorStore.load('Vector_Store_RAG', embeddings)

## RAG Agent with LangChain

In [14]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrive_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=4 ) #‡πÉ‡∏ä‡πâ similarity search ‡∏´‡∏≤ top-2 documents
    serialized = "\n\n".join(f"Source: {doc.metadata}\nContent: {doc.page_content}" for doc in retrieved_docs)
    return serialized, retrieved_docs

In [None]:
from langchain.agents import create_agent
# ‡πÉ‡∏™‡πà system prompt ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏Ñ‡∏ß‡∏ö‡∏Ñ‡∏∏‡∏°‡∏û‡∏§‡∏ï‡∏¥‡∏Å‡∏£‡∏£‡∏°‡∏Ç‡∏≠‡∏á‡πÇ‡∏°‡πÄ‡∏î‡∏•
system_prompt =("""
    - You are a helpful AI assistant.
    - When you receive a response from a tool, you MUST summarize it and provide a final answer to the user.
    - DO NOT return an empty response.
    - Always synthesize the information retrieved."""
)
agent = create_agent(model,tools=[retrive_context], system_prompt=system_prompt)

In [21]:
query = ("How to makeBok Choy Soup")

for event in agent.stream(
    {"messages":[{"role":"user","content":query}]},
    stream_mode="values",
):
    event['messages'][-1].pretty_print()


How to makeBok Choy Soup
Tool Calls:
  retrive_context (5f999e42-5836-44f7-a87d-db0edbf79344)
 Call ID: 5f999e42-5836-44f7-a87d-db0edbf79344
  Args:
    query: How to make Bok Choy Soup
Name: retrive_context

Source: {'producer': 'Adobe PDF Library 15.0', 'creator': 'Adobe InDesign 15.1 (Windows)', 'creationdate': '2020-10-17T22:04:16+07:00', 'moddate': '2020-10-17T22:04:34+07:00', 'trapped': '/False', 'source': 'data/recipes/Booklet.pdf', 'total_pages': 61, 'page': 10, 'page_label': '11', 'start_index': 804}
Content: powder and fried garlic.
 L 500 gr pork ribs (or 2 
blocks of tofu)
 L 500 g bok choy
 L 1 Tsp thinly sliced ginger
 L 4-5 Tsp tamarind sauce 
or lemon juice
 L 2 tsp (coconut) sugar
 L 2 Tsp fish sauce
 L 2 tsp chicken stock pow-
der
 L 3 Tsp of fried garlic
ÔÇ¨ chiangmaiecolodges.com ÔÇÇ @spicyvilla ÔÖ≠ @spicyjourney ÔÖß Spicy Journey
SERVES: 4
 COOK: 30 MIN
TOTAL: 45 MIN
PREP: 15 MIN
Soup BokSoup Bok
BOK CHOY SOUP

Source: {'producer': 'Adobe PDF Library 15.0', 'creat

In [22]:
event['messages'][-1].pretty_print()


The Bok Choy Soup recipe involves the following ingredients and steps:

**Ingredients:**
* 500g pork ribs (or 2 blocks of tofu)
* 500g bok choy
* 1 tsp thinly sliced ginger
* 4-5 tsp tamarind sauce or lemon juice
* 2 tsp (coconut) sugar
* 2 tsp fish sauce
* 2 tsp chicken stock powder
* 3 tsp of fried garlic

**Cooking Directions:**
1. Boil 2-3 liters of water until gently bubbling. Add the pork ribs and boil for 5 minutes.
2. Clean the bok choy thoroughly, then tear each bok choy by hand into large pieces.
3. Add the bok choy to the boiling water and bring it back to a boil.
4. Cook until the bok choy is soft (approximately 15 minutes).
5. Add the ginger, tamarind sauce, fish sauce, and sugar. Adjust quantities to taste.
6. Ladle the soup into serving bowls and sprinkle with chicken stock powder and fried garlic.

This recipe serves 4 people and takes approximately 45 minutes in total (15 minutes prep, 30 minutes cook).
