In [3]:
!pip install sentence-transformers faiss-cpu google-generativeai groq requests

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting groq
  Downloading groq-0.33.0-py3-none-any.whl.metadata (16 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.1

In [4]:
import pickle
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from typing import List, Tuple, Dict, Optional, Any
import json
from datetime import datetime
import os
import warnings

warnings.filterwarnings('ignore')

2025-10-21 13:28:38.706645: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761053318.903070      37 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761053318.957570      37 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [5]:
# ============================================================================
# COMPONENT 1: QUERY EMBEDDER
# ============================================================================

class QueryEmbedder:
    """Converts user queries into embeddings using Sentence Transformers"""
    
    def __init__(self, model_name: str = "all-mpnet-base-v2"):
        """
        Initialize the embedding model
        Must use the SAME model that was used to create the vector database
        
        Args:
            model_name: Sentence Transformer model name
        """
        print(f"🔄 Loading embedding model: {model_name}")
        self.model = SentenceTransformer(model_name, trust_remote_code=True)
        self.embedding_dim = self.model.get_sentence_embedding_dimension()
        print(f"✅ Model loaded. Embedding dimension: {self.embedding_dim}")
    
    def embed_query(self, query: str) -> np.ndarray:
        """
        Convert text query to embedding vector
        
        Args:
            query: User's input question
            
        Returns:
            Normalized embedding vector as numpy array
        """
        query = query.strip()
        
        # Generate embedding
        embedding = self.model.encode(query, convert_to_numpy=True)
        
        # Normalize for cosine similarity (important for FAISS L2 index)
        embedding = embedding / np.linalg.norm(embedding)
        
        return embedding.astype('float32')



In [6]:
# ============================================================================
# COMPONENT 2: VECTOR RETRIEVER
# ============================================================================

class VectorRetriever:
    """Retrieves relevant passages from FAISS vector database"""
    
    def __init__(self, index_path: str, metadata_path: str):
        """
        Load FAISS index and metadata
        
        Args:
            index_path: Path to faiss_index.bin
            metadata_path: Path to documents_metadata.pkl
        """
        print(f"\n🔄 Loading FAISS index from: {index_path}")
        self.index = faiss.read_index(index_path)
        print(f"✅ Index loaded with {self.index.ntotal} vectors")
        
        print(f"🔄 Loading metadata from: {metadata_path}")
        with open(metadata_path, 'rb') as f:
            data = pickle.load(f)
            self.documents = data['documents']
            self.metadata = data['metadata']
        print(f"✅ Loaded {len(self.documents)} document chunks")
    
    def retrieve(
        self, 
        query_embedding: np.ndarray, 
        k: int = 5,
        similarity_threshold: float = 0.3
    ) -> Tuple[List[Dict], List[float]]:
        """
        Search vector database for most relevant passages
        
        Args:
            query_embedding: Query embedding vector
            k: Number of top results to retrieve
            similarity_threshold: Minimum similarity score (0-1)
            
        Returns:
            (retrieved_passages, similarity_scores) tuple
        """
        # Reshape for FAISS
        query_embedding = query_embedding.reshape(1, -1)
        
        # Search FAISS index (returns L2 distances)
        distances, indices = self.index.search(query_embedding, k)
        
        # Convert L2 distance to cosine similarity
        # For normalized vectors: similarity = 1 - (distance^2 / 2)
        similarities = 1 - (distances[0] ** 2) / 2
        
        # Filter by threshold and prepare results
        retrieved_passages = []
        filtered_scores = []
        
        for idx, score in zip(indices[0], similarities):
            if score >= similarity_threshold and idx < len(self.documents):
                passage_info = {
                    'text': self.documents[idx],
                    'source': self.metadata[idx]['source'],
                    'type': self.metadata[idx]['type'],
                    'metadata': self.metadata[idx]
                }
                retrieved_passages.append(passage_info)
                filtered_scores.append(float(score))
        
        print(f"📊 Retrieved {len(retrieved_passages)} passages above threshold {similarity_threshold}")
        
        return retrieved_passages, filtered_scores


In [7]:
# ============================================================================
# COMPONENT 3: PROMPT ASSEMBLER
# ============================================================================

class PromptAssembler:
    """Assembles context and query into structured LLM prompts"""
    
    def __init__(self, system_prompt: Optional[str] = None):
        """
        Initialize with custom or default agriculture system prompt
        
        Args:
            system_prompt: Custom system instructions (optional)
        """
        self.system_prompt = system_prompt or self._default_agriculture_prompt()
    
    def _default_agriculture_prompt(self) -> str:
        """Agriculture-specific system prompt"""
        return """You are an expert agricultural advisor with deep knowledge in:
- Crop cultivation, management, and rotation practices
- Soil science, fertility, and conservation
- Pest and disease management (organic and conventional)
- Irrigation and water management
- Sustainable farming practices
- Indian agriculture and regional practices
- Agricultural economics and policy

Your role is to provide accurate, practical, and actionable advice to farmers and agricultural professionals.

IMPORTANT GUIDELINES:
1. Base your answers PRIMARILY on the provided context passages
2. If the context doesn't contain sufficient information, clearly state this
3. Prioritize safety, sustainability, and scientific accuracy
4. Provide practical, implementable solutions
5. Consider Indian agricultural conditions and practices
6. If suggesting chemicals or treatments, mention safety precautions
7. Be clear and avoid unnecessary jargon

Answer comprehensively but concisely."""
    
    def assemble_prompt(
        self, 
        query: str, 
        retrieved_passages: List[Dict],
        similarity_scores: List[float],
        max_context_length: int = 3000
    ) -> Dict[str, str]:
        """
        Create structured prompt with retrieved context
        
        Args:
            query: Original user question
            retrieved_passages: Retrieved document chunks
            similarity_scores: Relevance scores
            max_context_length: Maximum characters for context
            
        Returns:
            Dictionary with 'system_prompt' and 'user_prompt'
        """
        if not retrieved_passages:
            context_text = "[No relevant information found in the knowledge base]"
        else:
            # Build context from passages
            context_parts = []
            total_length = 0
            
            for i, (passage, score) in enumerate(zip(retrieved_passages, similarity_scores), 1):
                text = passage['text']
                source = passage['source']
                
                passage_text = f"""[Context {i}] (Relevance: {score:.2%} | Source: {source})
{text}"""
                
                # Check length limit
                if total_length + len(passage_text) > max_context_length:
                    break
                
                context_parts.append(passage_text)
                total_length += len(passage_text)
            
            context_text = "\n\n" + "\n\n---\n\n".join(context_parts)
        
        # Assemble user prompt
        user_prompt = f"""Context from Agriculture Knowledge Base:
{context_text}

Question: {query}

Please provide a comprehensive answer based on the context above. If the context doesn't fully address the question, clearly state what information is available and what might be missing."""
        
        return {
            "system_prompt": self.system_prompt,
            "user_prompt": user_prompt
        }



In [8]:
# ============================================================================
# COMPONENT 4: LLM GENERATOR (Multi-Provider Support)
# ============================================================================

class LLMGenerator:
    """Handles LLM API calls for answer generation"""
    
    def __init__(
        self, 
        provider: str = "openrouter",
        api_key: Optional[str] = None,
        model_name: Optional[str] = "google/gemini-2.0-flash-exp:free",
        temperature: float = 0.7,
        max_tokens: int = 2000
    ):
        """
        Initialize LLM generator
        
        Args:
            provider: "openrouter", "gemini", or "groq"
            api_key: API key for the provider
            model_name: Specific model name (uses smart defaults if None)
            temperature: Generation temperature (0-1)
            max_tokens: Maximum response length
        """
        self.provider = provider.lower()
        self.temperature = temperature
        self.max_tokens = max_tokens
        
        # Get API key from parameter or environment
        env_var_map = {
            "openrouter": "OPENROUTER_API_KEY",
            "gemini": "GOOGLE_API_KEY",
            "groq": "GROQ_API_KEY"
        }
        self.api_key = api_key or os.getenv(env_var_map.get(self.provider, ""))
        
        # Set model name
        self.model_name = model_name or self._get_default_model()
        
        print(f"\n🤖 LLM Generator initialized")
        print(f"   Provider: {self.provider}")
        print(f"   Model: {self.model_name}")
        print(f"   API Key: {'✅ Configured' if self.api_key else '❌ Missing'}")
    
    def _get_default_model(self) -> str:
        """Smart default models for each provider"""
        defaults = {
            "openrouter": "google/gemini-2.0-flash-exp:free",  # Free tier
            "gemini": "gemini-1.5-flash",  # Fast and good
            "groq": "llama-3.1-70b-versatile"  # Best balance for groq
        }
        return defaults.get(self.provider, "unknown")
    
    def generate_openrouter(self, system_prompt: str, user_prompt: str) -> str:
        """Generate using OpenRouter API"""
        try:
            import requests
            
            url = "https://openrouter.ai/api/v1/chat/completions"
            
            headers = {
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
                "HTTP-Referer": "https://github.com/agriculture-rag",
                "X-Title": "Agriculture RAG System"
            }
            
            payload = {
                "model": self.model_name,
                "messages": [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                "temperature": self.temperature,
                "max_tokens": self.max_tokens
            }
            
            response = requests.post(url, headers=headers, json=payload, timeout=60)
            response.raise_for_status()
            
            result = response.json()
            return result["choices"][0]["message"]["content"]
        
        except Exception as e:
            return f"❌ OpenRouter Error: {str(e)}\n\nTip: Check your API key and model name."
    
    def generate_gemini(self, system_prompt: str, user_prompt: str) -> str:
        """Generate using Google Gemini API"""
        try:
            import google.generativeai as genai
            
            genai.configure(api_key=self.api_key)
            model = genai.GenerativeModel(
                model_name=self.model_name,
                system_instruction=system_prompt
            )
            
            generation_config = {
                "temperature": self.temperature,
                "max_output_tokens": self.max_tokens,
            }
            
            response = model.generate_content(
                user_prompt,
                generation_config=generation_config
            )
            
            return response.text
        
        except ImportError:
            return "❌ Error: 'google-generativeai' not installed. Run: pip install google-generativeai"
        except Exception as e:
            return f"❌ Gemini Error: {str(e)}\n\nTip: Check your API key at https://makersuite.google.com/app/apikey"
    
    def generate_groq(self, system_prompt: str, user_prompt: str) -> str:
        """Generate using Groq API"""
        try:
            from groq import Groq
            
            client = Groq(api_key=self.api_key)
            
            response = client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=self.temperature,
                max_tokens=self.max_tokens
            )
            
            return response.choices[0].message.content
        
        except ImportError:
            return "❌ Error: 'groq' not installed. Run: pip install groq"
        except Exception as e:
            return f"❌ Groq Error: {str(e)}\n\nTip: Get API key at https://console.groq.com/keys"
    
    def generate(self, system_prompt: str, user_prompt: str) -> str:
        """
        Generate response using configured provider
        
        Args:
            system_prompt: System instructions
            user_prompt: User query with context
            
        Returns:
            Generated answer text
        """
        if not self.api_key:
            return """❌ No API key configured!

Please provide an API key:
1. OpenRouter: https://openrouter.ai/keys
2. Gemini: https://makersuite.google.com/app/apikey
3. Groq: https://console.groq.com/keys

Set it as environment variable or pass to RAGPipeline constructor."""
        
        # Route to appropriate provider
        if self.provider == "openrouter":
            return self.generate_openrouter(system_prompt, user_prompt)
        elif self.provider == "gemini":
            return self.generate_gemini(system_prompt, user_prompt)
        elif self.provider == "groq":
            return self.generate_groq(system_prompt, user_prompt)
        else:
            return f"❌ Unknown provider: {self.provider}"


In [9]:
# ============================================================================
# COMPONENT 5: RAG PIPELINE ORCHESTRATOR
# ============================================================================

class AgricultureRAGPipeline:
    """Complete RAG pipeline for agriculture Q&A"""
    
    def __init__(
        self,
        # Vector DB paths
        faiss_index_path: str = "/kaggle/input/rag-vectordb/faiss_index.bin",
        metadata_path: str = "/kaggle/input/rag-vectordb/documents_metadata.pkl",
        
        # Embedding model (must match the one used for vector DB)
        embedding_model: str = "all-mpnet-base-v2",
        
        # LLM configuration
        llm_provider: str = "openrouter",  # "openrouter", "gemini", or "groq"
        llm_api_key: Optional[str] = None,
        llm_model: Optional[str] = None,
        
        # Retrieval settings
        top_k: int = 5,
        similarity_threshold: float = 0.35,
        
        # Generation settings
        temperature: float = 0.7,
        max_tokens: int = 2000,
        
        # Optional custom prompts
        system_prompt: Optional[str] = None,
        
        # Logging
        log_file: Optional[str] = None
    ):
        """
        Initialize complete RAG pipeline
        
        Args:
            faiss_index_path: Path to FAISS index file
            metadata_path: Path to metadata pickle file
            embedding_model: Sentence transformer model name
            llm_provider: LLM provider ("openrouter", "gemini", "groq")
            llm_api_key: API key (or set as environment variable)
            llm_model: Specific model name (uses smart defaults)
            top_k: Number of passages to retrieve
            similarity_threshold: Minimum similarity score
            temperature: LLM temperature
            max_tokens: Maximum response tokens
            system_prompt: Custom system prompt
            log_file: Path to log file
        """
        print("="*70)
        print("🌾 AGRICULTURE RAG PIPELINE INITIALIZATION")
        print("="*70)
        
        # Initialize components
        self.embedder = QueryEmbedder(embedding_model)
        self.retriever = VectorRetriever(faiss_index_path, metadata_path)
        self.prompt_assembler = PromptAssembler(system_prompt)
        self.llm_generator = LLMGenerator(
            provider=llm_provider,
            api_key=llm_api_key,
            model_name=llm_model,
            temperature=temperature,
            max_tokens=max_tokens
        )
        
        # Settings
        self.top_k = top_k
        self.similarity_threshold = similarity_threshold
        self.log_file = log_file
        
        print("\n" + "="*70)
        print("✅ RAG PIPELINE READY!")
        print("="*70)
    
    def query(
        self,
        question: str,
        top_k: Optional[int] = None,
        similarity_threshold: Optional[float] = None,
        verbose: bool = True
    ) -> Dict[str, Any]:
        """
        Process a question through the complete RAG pipeline
        
        Args:
            question: User's agriculture question
            top_k: Override default top_k
            similarity_threshold: Override default threshold
            verbose: Print progress information
            
        Returns:
            Dictionary containing:
                - question: Original question
                - answer: Generated answer
                - retrieved_passages: Context used
                - similarity_scores: Relevance scores
                - metadata: Additional info
        """
        if verbose:
            print(f"\n{'='*70}")
            print(f"❓ QUESTION: {question}")
            print(f"{'='*70}\n")
        
        # Use defaults if not overridden
        k = top_k or self.top_k
        threshold = similarity_threshold or self.similarity_threshold
        
        # Step 1: Embed query
        if verbose:
            print("⚙️  Step 1/4: Embedding query...")
        query_embedding = self.embedder.embed_query(question)
        
        # Step 2: Retrieve relevant passages
        if verbose:
            print(f"⚙️  Step 2/4: Retrieving top-{k} passages (threshold: {threshold})...")
        retrieved_passages, scores = self.retriever.retrieve(
            query_embedding, k, threshold
        )
        
        if verbose and retrieved_passages:
            print(f"\n   📚 Retrieved passages:")
            for i, (passage, score) in enumerate(zip(retrieved_passages, scores), 1):
                print(f"      {i}. {passage['source']} (score: {score:.3f})")
        
        # Step 3: Assemble prompt
        if verbose:
            print("\n⚙️  Step 3/4: Assembling prompt with context...")
        prompts = self.prompt_assembler.assemble_prompt(
            question, retrieved_passages, scores
        )
        
        # Step 4: Generate answer
        if verbose:
            print("⚙️  Step 4/4: Generating answer with LLM...")
        answer = self.llm_generator.generate(
            prompts["system_prompt"],
            prompts["user_prompt"]
        )
        
        # Prepare result
        result = {
            "question": question,
            "answer": answer,
            "retrieved_passages": retrieved_passages,
            "similarity_scores": scores,
            "metadata": {
                "num_passages_retrieved": len(retrieved_passages),
                "avg_similarity": float(np.mean(scores)) if scores else 0.0,
                "top_k": k,
                "threshold": threshold,
                "timestamp": datetime.now().isoformat()
            }
        }
        
        # Log if enabled
        if self.log_file:
            self._log_query(result)
        
        if verbose:
            print(f"\n{'='*70}")
            print("✅ ANSWER GENERATED")
            print(f"{'='*70}\n")
        
        return result
    
    def batch_query(
        self,
        questions: List[str],
        top_k: Optional[int] = None,
        similarity_threshold: Optional[float] = None
    ) -> List[Dict[str, Any]]:
        """
        Process multiple questions in batch
        
        Args:
            questions: List of questions
            top_k: Override default top_k
            similarity_threshold: Override default threshold
            
        Returns:
            List of result dictionaries
        """
        print(f"\n{'='*70}")
        print(f"🔄 BATCH PROCESSING: {len(questions)} questions")
        print(f"{'='*70}\n")
        
        results = []
        for i, question in enumerate(questions, 1):
            print(f"\n📝 Processing question {i}/{len(questions)}...")
            result = self.query(question, top_k, similarity_threshold, verbose=False)
            results.append(result)
            print(f"   ✅ Completed")
        
        print(f"\n{'='*70}")
        print(f"✅ BATCH COMPLETE: {len(results)} answers generated")
        print(f"{'='*70}\n")
        
        return results
    
    def _log_query(self, result: Dict[str, Any]):
        """Log query and response to file"""
        try:
            log_entry = {
                "timestamp": result["metadata"]["timestamp"],
                "question": result["question"],
                "num_passages": result["metadata"]["num_passages_retrieved"],
                "avg_similarity": result["metadata"]["avg_similarity"],
                "answer_length": len(result["answer"])
            }
            
            with open(self.log_file, 'a') as f:
                f.write(json.dumps(log_entry) + "\n")
        except Exception as e:
            print(f"⚠️  Logging error: {str(e)}")
    
    def print_answer(self, result: Dict[str, Any]):
        """Pretty print the result"""
        print("\n" + "="*70)
        print("🌾 AGRICULTURE Q&A RESULT")
        print("="*70)
        
        print(f"\n❓ QUESTION:")
        print(f"   {result['question']}")
        
        print(f"\n💡 ANSWER:")
        print(f"   {result['answer']}")
        
        if result['retrieved_passages']:
            print(f"\n📚 SOURCES ({len(result['retrieved_passages'])} passages):")
            for i, (passage, score) in enumerate(
                zip(result['retrieved_passages'], result['similarity_scores']), 1
            ):
                print(f"\n   [{i}] {passage['source']} (relevance: {score:.1%})")
                print(f"       Type: {passage['type']}")
                print(f"       Preview: {passage['text'][:150]}...")
        
        print("\n" + "="*70)

In [10]:
# ============================================================================
# USAGE EXAMPLES
# ============================================================================

if __name__ == "__main__":
    # Example 1: Using OpenRouter (recommended - works with free tier)
    print("\n" + "🌾"*35)
    print("EXAMPLE 1: OpenRouter (Free Tier Models Available)")
    print("🌾"*35)
    
    rag_openrouter = AgricultureRAGPipeline(
        llm_provider="openrouter",
        llm_api_key="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",  # Get from https://openrouter.ai/keys
        llm_model="google/gemini-2.0-flash-exp:free",  # Free model!
        top_k=3,
        similarity_threshold=0.35
    )
    
    # Test query
    result = rag_openrouter.query(
        "What are the best practices for organic pest control in tomato cultivation?"
    )
    rag_openrouter.print_answer(result)
    
    
    # Example 2: Using Google Gemini
    print("\n\n" + "🌾"*35)
    print("EXAMPLE 2: Google Gemini")
    print("🌾"*35)
    
    rag_gemini = AgricultureRAGPipeline(
        llm_provider="gemini",
        llm_api_key="AIzaSyCnKfBDX6eChKw7V5RHv9E4ni0w5NVXLqc",  # Get from https://makersuite.google.com/app/apikey
        llm_model="gemini-2.5-flash",
        top_k=5,
        similarity_threshold=0.3
    )
    
    result = rag_gemini.query(
        "How to improve soil fertility for wheat cultivation?"
    )
    rag_gemini.print_answer(result)
    
    
    # Example 3: Using Groq
    print("\n\n" + "🌾"*35)
    print("EXAMPLE 3: Groq (Fast Inference)")
    print("🌾"*35)
    
    rag_groq = AgricultureRAGPipeline(
        llm_provider="groq",
        llm_api_key="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",  # Get from https://console.groq.com/keys
        llm_model="llama-3.1-70b-versatile",
        top_k=4,
        similarity_threshold=0.35,
        temperature=0.6
    )
    
    result = rag_groq.query(
        "What is crop rotation and why is it important?"
    )
    rag_groq.print_answer(result)
    
    
    # Example 4: Batch Processing
    print("\n\n" + "🌾"*35)
    print("EXAMPLE 4: Batch Query Processing")
    print("🌾"*35)
    
    questions = [
        "What are the water requirements for rice cultivation?",
        "How to prevent fungal diseases in crops?",
        "What is the best season for sowing wheat in India?"
    ]
    
    batch_results = rag_openrouter.batch_query(questions)
    
    for i, result in enumerate(batch_results, 1):
        print(f"\n{'='*70}")
        print(f"Batch Result {i}/{len(batch_results)}")
        print(f"{'='*70}")
        rag_openrouter.print_answer(result)
    
    
    # Example 5: Using Environment Variables (Production Best Practice)
    print("\n\n" + "🌾"*35)
    print("EXAMPLE 5: Using Environment Variables (Production)")
    print("🌾"*35)
    
    # Set environment variable first:
    # export OPENROUTER_API_KEY="your-key-here"
    # or in Python:
    # os.environ['OPENROUTER_API_KEY'] = 'your-key-here'
    
    rag_production = AgricultureRAGPipeline(
        llm_provider="openrouter",
        # API key will be automatically read from environment variable
        log_file="rag_queries.log"  # Enable logging
    )
    
    result = rag_production.query(
        "What are the benefits of vermicomposting?"
    )
    rag_production.print_answer(result)
    
    
    print("\n\n" + "="*70)
    print("🎉 ALL EXAMPLES COMPLETED!")
    print("="*70)
    print("""
📝 QUICK START GUIDE:

1. Get API Keys:
   • OpenRouter: https://openrouter.ai/keys (Best option - many free models)
   • Gemini: https://makersuite.google.com/app/apikey
   • Groq: https://console.groq.com/keys (Fastest inference)

2. Install Required Packages:
   pip install sentence-transformers faiss-cpu google-generativeai groq requests

3. Basic Usage:
   ```python
   rag = AgricultureRAGPipeline(
       llm_provider="openrouter",
       llm_api_key="your-key-here"
   )
   
   result = rag.query("Your agriculture question here")
   rag.print_answer(result)
   ```

4. Recommended Models:
   • OpenRouter: "google/gemini-2.0-flash-exp:free" (FREE!)
   • Gemini: "gemini-1.5-flash"
   • Groq: "llama-3.1-70b-versatile"

5. Pro Tips:
   • Adjust top_k (3-7) based on question complexity
   • Adjust similarity_threshold (0.3-0.4) to control relevance
   • Lower temperature (0.5-0.7) for factual answers
   • Use batch_query() for multiple questions efficiently
""")


🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾
EXAMPLE 1: OpenRouter (Free Tier Models Available)
🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾🌾
🌾 AGRICULTURE RAG PIPELINE INITIALIZATION
🔄 Loading embedding model: all-mpnet-base-v2


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Model loaded. Embedding dimension: 768

🔄 Loading FAISS index from: /kaggle/input/rag-vectordb/faiss_index.bin
✅ Index loaded with 99271 vectors
🔄 Loading metadata from: /kaggle/input/rag-vectordb/documents_metadata.pkl
✅ Loaded 99271 document chunks

🤖 LLM Generator initialized
   Provider: openrouter
   Model: google/gemini-2.0-flash-exp:free
   API Key: ✅ Configured

✅ RAG PIPELINE READY!

❓ QUESTION: What are the best practices for organic pest control in tomato cultivation?

⚙️  Step 1/4: Embedding query...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

⚙️  Step 2/4: Retrieving top-3 passages (threshold: 0.35)...
📊 Retrieved 3 passages above threshold 0.35

   📚 Retrieved passages:
      1. agronomy_textbook.pdf (score: 0.767)
      2. basic_agriculture_cbse.pdf (score: 0.690)
      3. agronomy_textbook.pdf (score: 0.656)

⚙️  Step 3/4: Assembling prompt with context...
⚙️  Step 4/4: Generating answer with LLM...

✅ ANSWER GENERATED


🌾 AGRICULTURE Q&A RESULT

❓ QUESTION:
   What are the best practices for organic pest control in tomato cultivation?

💡 ANSWER:
   ❌ OpenRouter Error: 429 Client Error: Too Many Requests for url: https://openrouter.ai/api/v1/chat/completions

Tip: Check your API key and model name.

📚 SOURCES (3 passages):

   [1] agronomy_textbook.pdf (relevance: 76.7%)
       Type: pdf
       Preview: of row crops to reduce the weed menace. No herbicides are
applied as they lead to environmental pollution.
3. Biological pest management: The control ...

   [2] basic_agriculture_cbse.pdf (relevance: 69.0%)
       Type: 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

⚙️  Step 2/4: Retrieving top-5 passages (threshold: 0.3)...
📊 Retrieved 5 passages above threshold 0.3

   📚 Retrieved passages:
      1. basic_agriculture_cbse.pdf (score: 0.712)
      2. agronomy_textbook.pdf (score: 0.703)
      3. agronomy_textbook.pdf (score: 0.687)
      4. agronomy_textbook.pdf (score: 0.670)
      5. agronomy_textbook.pdf (score: 0.660)

⚙️  Step 3/4: Assembling prompt with context...
⚙️  Step 4/4: Generating answer with LLM...

✅ ANSWER GENERATED


🌾 AGRICULTURE Q&A RESULT

❓ QUESTION:
   How to improve soil fertility for wheat cultivation?

💡 ANSWER:
   To improve soil fertility for wheat cultivation, the following practices are recommended based on the provided context:

1.  **Application of Organic Manures:**
    *   Apply Farmyard Manure (FYM) or other organic manures at a rate of 10 tonnes per hectare (t/ha) at the time of sowing. This practice is specifically highlighted as beneficial for long-term fertility maintenance (Context 1).

2.  **Balanced Use o

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

⚙️  Step 2/4: Retrieving top-4 passages (threshold: 0.35)...
📊 Retrieved 4 passages above threshold 0.35

   📚 Retrieved passages:
      1. agronomy_textbook.pdf (score: 0.871)
      2. agronomy_textbook.pdf (score: 0.846)
      3. agronomy_textbook.pdf (score: 0.808)
      4. agronomy_textbook.pdf (score: 0.800)

⚙️  Step 3/4: Assembling prompt with context...
⚙️  Step 4/4: Generating answer with LLM...

✅ ANSWER GENERATED


🌾 AGRICULTURE Q&A RESULT

❓ QUESTION:
   What is crop rotation and why is it important?

💡 ANSWER:
   ❌ Groq Error: Error code: 400 - {'error': {'message': 'The model `llama-3.1-70b-versatile` has been decommissioned and is no longer supported. Please refer to https://console.groq.com/docs/deprecations for a recommendation on which model to use instead.', 'type': 'invalid_request_error', 'code': 'model_decommissioned'}}

Tip: Get API key at https://console.groq.com/keys

📚 SOURCES (4 passages):

   [1] agronomy_textbook.pdf (relevance: 87.1%)
       Type: pdf
    

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

📊 Retrieved 3 passages above threshold 0.35
   ✅ Completed

📝 Processing question 2/3...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

📊 Retrieved 3 passages above threshold 0.35
   ✅ Completed

📝 Processing question 3/3...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

📊 Retrieved 3 passages above threshold 0.35
   ✅ Completed

✅ BATCH COMPLETE: 3 answers generated


Batch Result 1/3

🌾 AGRICULTURE Q&A RESULT

❓ QUESTION:
   What are the water requirements for rice cultivation?

💡 ANSWER:
   ❌ OpenRouter Error: 429 Client Error: Too Many Requests for url: https://openrouter.ai/api/v1/chat/completions

Tip: Check your API key and model name.

📚 SOURCES (3 passages):

   [1] agronomy_textbook.pdf (relevance: 81.3%)
       Type: pdf
       Preview: tion in
low land rice, bunded rainfed rice and forage grasses, where water is stagnated to the required...

   [2] agronomy_textbook.pdf (relevance: 77.9%)
       Type: pdf
       Preview: 396 A T EXTBOOK  OF AGRONOMY
units (irrigation groups). According to the availa bility of irrigation water, stabilized field channels and
group-wise i...

   [3] agronomy_textbook.pdf (relevance: 76.8%)
       Type: pdf
       Preview: ilable for uptake by the crop. Typical field application
efficiencies vary from 60-70% us

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

⚙️  Step 2/4: Retrieving top-5 passages (threshold: 0.35)...
📊 Retrieved 5 passages above threshold 0.35

   📚 Retrieved passages:
      1. agronomy_textbook.pdf (score: 0.499)
      2. agronomy_textbook.pdf (score: 0.493)
      3. agronomy_textbook.pdf (score: 0.489)
      4. agronomy_textbook.pdf (score: 0.458)
      5. agronomy_textbook.pdf (score: 0.447)

⚙️  Step 3/4: Assembling prompt with context...
⚙️  Step 4/4: Generating answer with LLM...

✅ ANSWER GENERATED


🌾 AGRICULTURE Q&A RESULT

❓ QUESTION:
   What are the benefits of vermicomposting?

💡 ANSWER:
   ❌ No API key configured!

Please provide an API key:
1. OpenRouter: https://openrouter.ai/keys
2. Gemini: https://makersuite.google.com/app/apikey
3. Groq: https://console.groq.com/keys

Set it as environment variable or pass to RAGPipeline constructor.

📚 SOURCES (5 passages):

   [1] agronomy_textbook.pdf (relevance: 49.9%)
       Type: pdf
       Preview: to be used as green leaf manure. Crop rotation and intercropping
w

In [12]:
# Option 1: Groq (FASTEST - Recommended)
rag = AgricultureRAGPipeline(
    llm_provider="groq",
    llm_api_key="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",
    llm_model="llama-3.3-70b-versatile",  # ✅ Updated model
    top_k=3,
    similarity_threshold=0.35
)

result = rag.query("What are the best practices for wheat cultivation?")
rag.print_answer(result)

🌾 AGRICULTURE RAG PIPELINE INITIALIZATION
🔄 Loading embedding model: all-mpnet-base-v2
✅ Model loaded. Embedding dimension: 768

🔄 Loading FAISS index from: /kaggle/input/rag-vectordb/faiss_index.bin
✅ Index loaded with 99271 vectors
🔄 Loading metadata from: /kaggle/input/rag-vectordb/documents_metadata.pkl
✅ Loaded 99271 document chunks

🤖 LLM Generator initialized
   Provider: groq
   Model: llama-3.3-70b-versatile
   API Key: ✅ Configured

✅ RAG PIPELINE READY!

❓ QUESTION: What are the best practices for wheat cultivation?

⚙️  Step 1/4: Embedding query...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

⚙️  Step 2/4: Retrieving top-3 passages (threshold: 0.35)...
📊 Retrieved 3 passages above threshold 0.35

   📚 Retrieved passages:
      1. agronomy_textbook.pdf (score: 0.770)
      2. basic_agriculture_cbse.pdf (score: 0.714)
      3. basic_agriculture_cbse.pdf (score: 0.707)

⚙️  Step 3/4: Assembling prompt with context...
⚙️  Step 4/4: Generating answer with LLM...

✅ ANSWER GENERATED


🌾 AGRICULTURE Q&A RESULT

❓ QUESTION:
   What are the best practices for wheat cultivation?

💡 ANSWER:
   Based on the provided context, the best practices for wheat cultivation can be summarized as follows:

1. **Temperature Requirements**: Wheat growth stages have specific temperature requirements:
   - Germination: 20–25°C
   - Tillering: 16–20°C
   - Accelerated growth: 20–23°C
   - Proper grain filling: 23–25°C

2. **Sowing Dates**: The ideal sowing dates for wheat varieties in India are:
   - Indigenous wheat: Last week of October
   - Long duration dwarf wheat: 1st fortnight of November
   - 

In [14]:
# Option 2: Gemini (RELIABLE)
rag = AgricultureRAGPipeline(
    llm_provider="gemini",
    llm_api_key="AIzaSyCnKfBDX6eChKw7V5RHv9E4ni0w5NVXLqc",
    llm_model="gemini-2.5-flash",  # ✅ Fixed model name
    top_k=3,
    similarity_threshold=0.35
)

result = rag.query("What are the best practices for wheat cultivation?")
rag.print_answer(result)

🌾 AGRICULTURE RAG PIPELINE INITIALIZATION
🔄 Loading embedding model: all-mpnet-base-v2
✅ Model loaded. Embedding dimension: 768

🔄 Loading FAISS index from: /kaggle/input/rag-vectordb/faiss_index.bin
✅ Index loaded with 99271 vectors
🔄 Loading metadata from: /kaggle/input/rag-vectordb/documents_metadata.pkl
✅ Loaded 99271 document chunks

🤖 LLM Generator initialized
   Provider: gemini
   Model: gemini-2.5-flash
   API Key: ✅ Configured

✅ RAG PIPELINE READY!

❓ QUESTION: What are the best practices for wheat cultivation?

⚙️  Step 1/4: Embedding query...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

⚙️  Step 2/4: Retrieving top-3 passages (threshold: 0.35)...
📊 Retrieved 3 passages above threshold 0.35

   📚 Retrieved passages:
      1. agronomy_textbook.pdf (score: 0.770)
      2. basic_agriculture_cbse.pdf (score: 0.714)
      3. basic_agriculture_cbse.pdf (score: 0.707)

⚙️  Step 3/4: Assembling prompt with context...
⚙️  Step 4/4: Generating answer with LLM...

✅ ANSWER GENERATED


🌾 AGRICULTURE Q&A RESULT

❓ QUESTION:
   What are the best practices for wheat cultivation?

💡 ANSWER:
   Based on the provided context, here are the best practices for wheat cultivation:

**1. Optimal Temperature Requirements for Growth Stages:**
*   **Germination:** 20–25°C
*   **Tillering:** 16–20°C
*   **Accelerated growth:** 20–23°C
*   **Proper grain filling:** 23–25°C

**2. Land Preparation:**
*   Wheat generally requires a well-pulverized but compact seedbed to ensure good and uniform germination.
*   **For Irrigated Wheat:** After Kharif crops, the field should be ploughed with a disc or mo

Hey its paid

In [15]:
rag = AgricultureRAGPipeline(
    llm_provider="openrouter",
    llm_api_key="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",
    llm_model="anthropic/claude-3.5-haiku",  # ✅ Paid but cheap model
    top_k=3,
    similarity_threshold=0.35
)

result = rag.query("What are the best practices for wheat cultivation?")
rag.print_answer(result)

🌾 AGRICULTURE RAG PIPELINE INITIALIZATION
🔄 Loading embedding model: all-mpnet-base-v2
✅ Model loaded. Embedding dimension: 768

🔄 Loading FAISS index from: /kaggle/input/rag-vectordb/faiss_index.bin
✅ Index loaded with 99271 vectors
🔄 Loading metadata from: /kaggle/input/rag-vectordb/documents_metadata.pkl
✅ Loaded 99271 document chunks

🤖 LLM Generator initialized
   Provider: openrouter
   Model: anthropic/claude-3.5-haiku
   API Key: ✅ Configured

✅ RAG PIPELINE READY!

❓ QUESTION: What are the best practices for wheat cultivation?

⚙️  Step 1/4: Embedding query...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

⚙️  Step 2/4: Retrieving top-3 passages (threshold: 0.35)...
📊 Retrieved 3 passages above threshold 0.35

   📚 Retrieved passages:
      1. agronomy_textbook.pdf (score: 0.770)
      2. basic_agriculture_cbse.pdf (score: 0.714)
      3. basic_agriculture_cbse.pdf (score: 0.707)

⚙️  Step 3/4: Assembling prompt with context...
⚙️  Step 4/4: Generating answer with LLM...

✅ ANSWER GENERATED


🌾 AGRICULTURE Q&A RESULT

❓ QUESTION:
   What are the best practices for wheat cultivation?

💡 ANSWER:
   Based on the provided context, here are the best practices for wheat cultivation:

Sowing Timing:
1. Irrigated Wheat:
- North India: First fortnight of November
- North-east and Central India: Middle of November
- Late-sown conditions: First fortnight of December

2. Rainfed Wheat:
- Second fortnight of October to early November
- Aim to utilize residual soil moisture

Seed and Spacing:
1. Seed Rate:
- Normal conditions: 100 kg/ha
- Late-sown conditions: Increase seed rate by 25%

2. Spacing:
- 