In [73]:
from pathlib import Path
from langchain.schema import Document
from langchain_qdrant import QdrantVectorStore
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import MarkdownHeaderTextSplitter
from qdrant_client import QdrantClient
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
import os

load_dotenv()

True

In [74]:
class Config:
    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") 
    QDRANT_URL = "http://localhost:6333"  
    PRODUCT_COLLECTION_NAME = "lensfolia_collection"
    EMBEDDING_MODEL = "models/gemini-embedding-001"
    LLM_MODEL = "gemini-2.5-flash"

In [75]:
all_docs = []

In [76]:
docs_path = Path("knowledge_base/products/")
existing_files = {f.name for f in docs_path.glob("*.md")}

In [77]:
if not existing_files:
    raise FileNotFoundError(f"Missing required documents: {existing_files}")

In [78]:
for file_path in docs_path.glob("*.md"):
    group = file_path.stem  
    loader = TextLoader(str(file_path))
    doc = loader.load()[0]

    splitter = MarkdownHeaderTextSplitter(headers_to_split_on=[("#", "product_name")])
    product_docs = splitter.split_text(doc.page_content)

    for pd in product_docs:
        all_docs.append(
            Document(
                page_content=pd.page_content,
                metadata={
                    "doc_type": "product",
                    "product_name": pd.metadata["product_name"],
                    "group": group
                }
            )
        )

In [79]:
plants_path = Path("knowledge_base/plants/plants.md")

if plants_path.exists():
    loader = TextLoader(str(plants_path))
    plants_doc = loader.load()[0]
    
    # Split by plant (h1) and sections (h2, h3, h4)
    splitter = MarkdownHeaderTextSplitter(
        headers_to_split_on=[
            ("#", "plant_name"),
            ("##", "section"),
            ("###", "subsection"),
            ("####", "disease_name")
        ]
    )
    plant_chunks = splitter.split_text(plants_doc.page_content)
    
    for chunk in plant_chunks:
        # Skip empty content chunks
        if not chunk.page_content.strip():
            continue
            
        # Extract metadata
        plant_name = chunk.metadata.get("plant_name", "Unknown").lower()
        section = chunk.metadata.get("section", "General")
        subsection = chunk.metadata.get("subsection", "")
        disease_name = chunk.metadata.get("disease_name", "")
        
        # Create meaningful section identifier
        section_id = section
        if subsection:
            section_id = f"{section} - {subsection}"
        if disease_name:
            section_id = f"{section} - {disease_name}"
        
        all_docs.append(
            Document(
                page_content=chunk.page_content,
                metadata={
                    "doc_type": "plant_info",
                    "plant_name": plant_name,
                    "section": section_id,
                    "content_type": section.lower().replace(" ", "_")
                }
            )
        )

# %%
print(f"Total documents loaded: {len(all_docs)}")
print("\nDocument types:")
products = [d for d in all_docs if d.metadata["doc_type"] == "product"]
plants = [d for d in all_docs if d.metadata["doc_type"] == "plant_info"]
print(f"Products: {len(products)}")
print(f"Plant info: {len(plants)}")

Total documents loaded: 1484

Document types:
Products: 56
Plant info: 1428


In [80]:
qdrant_client = QdrantClient(url=Config.QDRANT_URL)
embeddings = GoogleGenerativeAIEmbeddings(
    api_key=Config.GOOGLE_API_KEY,
    model=Config.EMBEDDING_MODEL
)

In [81]:
vector_store = QdrantVectorStore.from_documents(
    all_docs,
    embeddings,
    url=Config.QDRANT_URL,
    collection_name=Config.PRODUCT_COLLECTION_NAME,
    force_recreate=True
)

In [82]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}  
)

In [83]:
test_queries = [
    "apple leaf spot disease symptoms",  
]


In [84]:
for query in test_queries:
    print(f"\n{'='*60}")
    print(f"Query: {query}")
    print(f"{'='*60}")
    
    results = retriever.get_relevant_documents(query)
    
    for i, doc in enumerate(results, 1):
        doc_type = doc.metadata['doc_type']
        print(f"\nResult {i} ({doc_type}):")
        
        if doc_type == "product":
            print(f"Product: {doc.metadata['product_name']}")
            print(f"Group: {doc.metadata['group']}")
        else:
            print(f"Plant: {doc.metadata['plant_name']}")
            print(f"Section: {doc.metadata['section']}")
        
        print(f"Content: {doc.page_content[:300]}...")


Query: apple leaf spot disease symptoms

Result 1 (plant_info):
Plant: cherry (including sour)
Section: Common Pests and Diseases - Cherry leaf spot Coccomyces hiemalis
Content: - Symptoms: Small, red-purple spots on upper surfaces of leaves which turn brown and may
coalesce; leaves may become chlorotic if there are a few lesions present; if
tree becomes severely defoliated fruit may fail to develop properly and remain
light in color and watery in texture  
- Cause: Fungus ...

Result 2 (plant_info):
Plant: cashew nuts
Section: Common Pests and Diseases - Anthracnose Colletotrichum gloeosporoides
Content: - Symptoms: Water-soaked lesions on leaves, twigs, flowers or young apples which develop
into orange-brown or red lesions;  
- Cause: Fungus  
- Comments: Disease emergence favored by rainfall and high humidity  
- Management: Management A protective coating of copper-based fungicide on susceptible ...

Result 3 (plant_info):
Plant: quince
Section: Common Pests and Diseases - Leaf bl

In [85]:
def search_plant_info(query, k=3):
    """Search plant information only"""
    from qdrant_client.http import models
    filter_condition = models.Filter(
        must=[
            models.FieldCondition(
                key="metadata.doc_type",
                match=models.MatchValue(value="plant_info")
            )
        ]
    )
    return vector_store.similarity_search(query, filter=filter_condition, k=k)

def search_products(query, k=3):
    """Search products only"""
    from qdrant_client.http import models
    filter_condition = models.Filter(
        must=[
            models.FieldCondition(
                key="metadata.doc_type",
                match=models.MatchValue(value="product")
            )
        ]
    )
    return vector_store.similarity_search(query, filter=filter_condition, k=k)

def search_disease_info(query, plant_name=None, k=3):
    """Search disease-specific info"""
    from qdrant_client.http import models
    must_conditions = [
        models.FieldCondition(
            key="metadata.doc_type",
            match=models.MatchValue(value="plant_info")
        )
    ]
    
    if plant_name:
        must_conditions.append(
            models.FieldCondition(
                key="metadata.plant_name",
                match=models.MatchValue(value=plant_name)
            )
        )
    
    filter_condition = models.Filter(must=must_conditions)
    return vector_store.similarity_search(query, filter=filter_condition, k=k)

In [86]:
# Agent 1 output simulation: "apple leaf spot detected"
detected_plant = "apple"
detected_disease = "leaf spot"

# Agent 2: Get plant disease info
print(f"\nAgent 2 - Plant disease info for {detected_plant} {detected_disease}:")
plant_results = search_disease_info(f"{detected_plant} {detected_disease}", detected_plant)
for doc in plant_results[:1]:
    print(f"Plant: {doc.metadata['plant_name']}")
    print(f"Content: {doc.page_content[:400]}...")

# Agent 3: Get product recommendations
print(f"\nAgent 3 - Product recommendations for {detected_disease}:")
product_results = search_products(f"fungicide {detected_disease}")
for doc in product_results[:1]:
    print(f"Product: {doc.metadata['product_name']}")
    print(f"Group: {doc.metadata['group']}")
    print(f"Content: {doc.page_content[:400]}...")


Agent 2 - Plant disease info for apple leaf spot:
Plant: apple
Content: - Symptoms: Bright orange or yellow patches on top side of leaves surrounded by a red band
and small black spots in the center; by mid-summer, cup-like structures called
aecia form on the leaf undersides; these become covered in tubular structures
from which spores are released.  
- Cause: Fungus  
- Comments: Fungus requires two hosts to complete lifecycle; forms galls on Eastern red
cedar and sp...

Agent 3 - Product recommendations for leaf spot:
Product: SCORPIO 250 EC
Group: fungisida
Content: ![Image of product: SCORPIO 250 EC](https://kresna.co.id/sarikresnakimia/wp-content/uploads/2014/11/SCORPIO-1000x1000-360x360.png)  
**Bahan Aktif:** Difenokonazol 250 g/l  
**Deskripsi:** Fungisida sistemik berbentuk pekatan emulsi berwarna kuning bening untuk mengendalikan penyakit bercak daun ( Cercospora sp. ) penyakit busuk apih ( Rhizoctania Solani ) pada tanaman padi penyakit bercak daun ( ...
