In [1]:
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from openai import OpenAI

# Load environment variables from .env
load_dotenv()

QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

print("QDRANT_URL:", QDRANT_URL)
print("COLLECTION_NAME:", COLLECTION_NAME)

QDRANT_URL: https://5d9836f1-dece-45c3-a44b-fd0b1d0c053e.us-west-2-0.aws.cloud.qdrant.io:6333
COLLECTION_NAME: rice_knowledge_chunks


In [2]:
import os
from pathlib import Path

# Load environment variables manually from .env
env_path = Path('.') / '.env'
if env_path.exists():
    with open(env_path) as f:
        for line in f:
            if '=' in line and not line.strip().startswith('#'):
                key, value = line.strip().split('=', 1)
                os.environ[key] = value

QDRANT_URL = os.environ.get("QDRANT_URL")
QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
COLLECTION_NAME = os.environ.get("QDRANT_COLLECTION_NAME")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

print("QDRANT_URL:", QDRANT_URL)
print("COLLECTION_NAME:", COLLECTION_NAME)

# Print if API keys loaded
print("QDRANT_API_KEY is set:", bool(QDRANT_API_KEY))
print("OPENAI_API_KEY is set:", bool(OPENAI_API_KEY))

QDRANT_URL: https://5d9836f1-dece-45c3-a44b-fd0b1d0c053e.us-west-2-0.aws.cloud.qdrant.io:6333
COLLECTION_NAME: rice_knowledge_chunks
QDRANT_API_KEY is set: True
OPENAI_API_KEY is set: True


In [3]:
# Define retrieval functions (requires qdrant_client and openai installed)
# The following code won't run here because the packages are not installed.
from qdrant_client import QdrantClient
from openai import OpenAI


def get_query_embedding(text: str) -> list:
    """Get OpenAI embedding for a text query."""
    client = OpenAI(api_key=OPENAI_API_KEY)
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input=text
    )
    return response.data[0].embedding


def retrieve_chunks(query: str, k: int = 5, threshold: float = 0.35) -> list:
    """Semantic search from Qdrant with full payloads."""
    # Initialize Qdrant client
    client = QdrantClient(
        url=QDRANT_URL,
        api_key=QDRANT_API_KEY
    )
    query_vector = get_query_embedding(query)

    results = client.search(
        collection_name=COLLECTION_NAME,
        query_vector=query_vector,
        limit=k,
        with_payload=True,
        with_vectors=False
    )

    chunks = []
    for r in results:
        payload = r.payload or {}
        chunks.append({
            'score': r.score,
            'content': payload.get('content', ''),
            'title': payload.get('title', ''),
            'summary': payload.get('summary', ''),
            'url': payload.get('url', ''),
            'source': payload.get('source', ''),
            'chunk_id': payload.get('chunk_id', ''),
        })
    # Optionally filter by threshold
    return [c for c in chunks if c['score'] >= threshold]


In [9]:
# Cell 1: Imports and environment setup
import os
from pathlib import Path
from typing import List, Dict
from dotenv import load_dotenv
from qdrant_client import QdrantClient

from langchain.schema import Document
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.retrievers import BaseRetriever
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain import hub

from openai import OpenAI

# Load environment variables
load_dotenv()
env_path = Path('.') / '.env'
if env_path.exists():
    with open(env_path) as f:
        for line in f:
            if '=' in line and not line.strip().startswith('#'):
                key, value = line.strip().split('=', 1)
                os.environ.setdefault(key, value)

QDRANT_URL = os.environ.get("QDRANT_URL")
QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
COLLECTION_NAME = os.environ.get("QDRANT_COLLECTION_NAME")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

# Initialize clients
qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
openai_client = OpenAI(api_key=OPENAI_API_KEY)

# Helper: embed the query
def get_query_embedding(text: str) -> List[float]:
    resp = openai_client.embeddings.create(
        model="text-embedding-3-small",
        input=text
    )
    return resp.data[0].embedding



In [10]:
# Cell 2: Retrieval function using qdrant.search()
def retrieve_chunks(query: str, k: int = 5, threshold: float = 0.35) -> List[Document]:
    """
    Use the deprecated search() API to retrieve the top-k chunks and return them as LangChain Documents.
    Each Document has page_content set to the chunk text and metadata containing score, title, summary, etc.
    """
    query_vec = get_query_embedding(query)
    results = qdrant.search(
        collection_name=COLLECTION_NAME,
        query_vector=query_vec,
        limit=k,
        with_payload=True,
        with_vectors=False,
        score_threshold=threshold
    )

    docs: List[Document] = []
    for r in results:
        payload: Dict = r.payload or {}
        docs.append(
            Document(
                page_content=payload.get("content", ""),
                metadata={
                    "score": r.score,
                    "title": payload.get("title", ""),
                    "summary": payload.get("summary", ""),
                    "url": payload.get("url", ""),
                    "source": payload.get("source", ""),
                    "chunk_id": payload.get("chunk_id", ""),
                }
            )
        )
    return docs



In [11]:
# Cell 3: Custom Retriever for LangChain
class QdrantSearchRetriever(BaseRetriever):
    """A LangChain-compatible retriever that uses Qdrant's search() API."""
    k: int = 5
    threshold: float = 0.35

    def _get_relevant_documents(self, query: str, **kwargs) -> List[Document]:
        return retrieve_chunks(query, k=self.k, threshold=self.threshold)



In [16]:
# from langchain_core.prompts import ChatPromptTemplate
#
#
# # We assume retrieve_chunks() already returns a List[Document] with metadata.
# # If it still returns dictionaries, wrap each dict in a Document first.
#
# def qa_with_sources(
#     question: str,
#     k: int = 5,
#     threshold: float = 0.35,
# ) -> dict:
#     """
#     Retrieve context from Qdrant and generate a detailed answer.
#     Return both the answer text and the list of source documents.
#     """
#     docs = retrieve_chunks(question, k=k, threshold=threshold)  # List[Document]
#     context = "\n\n".join(doc.page_content for doc in docs)
#
#     # Build your detailed prompt
#     detailed_prompt = ChatPromptTemplate.from_template(
#         """
#         You are RiceAI Expert, a trusted agronomist and AI agent trained on sustainable
#         and high-yield rice farming practices.
#
#         Use all relevant information from the context provided, and write a thorough,
#         step-by-step answer that explains the key points and their significance.
#
#         Context:
#         {context}
#
#         Question:
#         {question}
#
#         Detailed Answer:
#         """
#     )
#
#     chat = ChatOpenAI(
#         model="gpt-4o-mini",
#         temperature=0,
#         openai_api_key=OPENAI_API_KEY
#     )
#     answer = chat.invoke(
#         detailed_prompt.format(context=context, question=question)
#     ).content
#
#     return {"answer": answer, "source_documents": docs}
#
# # Example usage:
# result = qa_with_sources("What are the key factors affecting organic fertilizer adoption in rice production?")
# print(result["answer"])
# for doc in result["source_documents"]:
#     print(doc.metadata["title"], doc.metadata["url"])


  results = qdrant.search(


The adoption of organic fertilizers in rice production is influenced by a variety of factors, as highlighted in the provided context. Understanding these factors is crucial for promoting sustainable agricultural practices and improving the financial efficiency of rice farming. Below is a detailed, step-by-step explanation of the key factors affecting organic fertilizer adoption:

### 1. **Education Level**
   - **Significance**: Higher education levels among farmers positively correlate with the adoption of organic fertilizers. Educated farmers are more likely to understand the benefits of organic farming practices and are better equipped to implement them effectively.
   - **Implication**: Educational programs and training on organic farming practices can enhance farmers' knowledge and encourage the use of organic fertilizers.

### 2. **Main Job**
   - **Significance**: Farmers whose primary occupation is rice production are more inclined to use organic fertilizers compared to those e

In [18]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

# Step 1: Define your detailed prompt (unchanged)
prompt = ChatPromptTemplate.from_template(
    """
    You are RiceAI Expert, a trusted agronomist and AI agent trained on sustainable and high-yield rice farming practices.

    When answering the question, use all relevant context provided. Explain the answer thoroughly and cover all key details, so that no important information is omitted. Be explicit and comprehensive.

    Context:
    {context}

    Question: {question}

    Detailed Answer:
    """
)

def format_docs(docs: List[Document]) -> str:
    return "\n\n".join(doc.page_content for doc in docs)

# Step 2: Instantiate the retriever (using search)
retriever = QdrantSearchRetriever(k=5, threshold=0.35)

# Step 3: Build the chain to get the answer only
qa_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough(),
    }
    | prompt
    | ChatOpenAI(model="gpt-4o-mini", temperature=0, openai_api_key=OPENAI_API_KEY)
    | StrOutputParser()
)

# Step 4: When you have a query, call both qa_chain and retriever
query = input("Enter your question: ").strip()

# Get the answer text
answer_text = qa_chain.invoke(query)

# Get the underlying documents with metadata
docs = retriever.invoke(query)

# Step 5: Display answer and formatted sources
print("### Answer:\n")
print(answer_text, "\n")

if docs:
    print("### Retrieved Sources:\n")
    for i, doc in enumerate(docs, 1):
        meta = doc.metadata
        title = meta.get("title", "No title")
        url = meta.get("url", "No URL")
        score = meta.get("score", 0.0)
        summary = meta.get("summary") or doc.page_content[:200] + "..."
        print(f"{i}. **{title}** (score: {score:.3f})")
        print(f"   URL: {url}")
        print(f"   Summary: {summary}\n")
else:
    print("No sources retrieved.")


  results = qdrant.search(
  results = qdrant.search(


### Answer:

Mechanized techniques for water management in rice farming are essential for enhancing efficiency, reducing labor costs, and optimizing water use. Here are several mechanized approaches that can be employed in rice cultivation, particularly in conjunction with practices like Alternate Wetting and Drying (AWD):

### 1. **Laser Land Leveling**
   - **Description**: Laser land leveling involves using laser-guided equipment to create a uniformly leveled field. This technique minimizes water pooling and ensures even water distribution across the field.
   - **Benefits**: A well-leveled field can reduce water requirements by 80-100 mm, as it prevents excess water from accumulating in low spots and ensures that all areas receive adequate irrigation.

### 2. **Automated Irrigation Systems**
   - **Description**: These systems use sensors and timers to automate the irrigation process. Soil moisture sensors can detect when the soil reaches a certain dryness level and trigger irrigat