In [1]:
!pip install -q gradio transformers accelerate langchain langchain-community faiss-cpu sentence-transformers pymupdf SpeechRecognition pyttsx3 deep_translator langdetect langchain_huggingface

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m77.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m61.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m81.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m56.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m561.5/561.5 kB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
import pickle
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.llms import HuggingFacePipeline
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

# Paths
DATA_FOLDER = "/kaggle/input/agrollm-data/agrollm_dataset"
INDEX_DIR = "/kaggle/working/AgroRAG/faiss_index"
CHUNKS_PATH = "/kaggle/working/AgroRAG/chunks.pkl"

# 1. Load PDFs from subfolders
def load_documents_from_folders(root_dir):
    all_docs = []
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            if file.endswith(".pdf"):
                path = os.path.join(subdir, file)
                print(f"Loading {path}...")
                loader = PyMuPDFLoader(path)
                docs = loader.load()
                for doc in docs:
                    doc.metadata['source'] = path
                    doc.metadata['topic'] = os.path.basename(subdir)
                all_docs.extend(docs)
    return all_docs

# 2. Split text into chunks
def split_documents(documents, chunk_size=500, chunk_overlap=50):
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return splitter.split_documents(documents)

# 3. Embed and store
def build_or_load_faiss_index(chunks, index_path=INDEX_DIR):
    embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    if os.path.exists(os.path.join(index_path, "index.faiss")):
        print("Loading existing FAISS index...")
        return FAISS.load_local(index_path, embedder)
    print("Creating new FAISS index...")
    vectordb = FAISS.from_documents(chunks, embedder)
    os.makedirs(index_path, exist_ok=True)
    vectordb.save_local(index_path)
    return vectordb

# 4. Load Mistral-7B-Instruct for Local RAG
def load_local_llm():
    model_id = "mistralai/Mistral-7B-Instruct-v0.2"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, temperature=0.7)
    return HuggingFacePipeline(pipeline=pipe)

# 5. RAG QA Chain with Local Model
def run_qa_chain(vectorstore, question):
    llm = load_local_llm()
    retriever = vectorstore.as_retriever(search_type="similarity", k=4)
    qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return qa.run(question)

# Main Pipeline
if __name__ == "__main__":
    if os.path.exists(CHUNKS_PATH):
        print("✅ Loading pre-saved chunks...")
        with open(CHUNKS_PATH, 'rb') as f:
            chunks = pickle.load(f)
    else:
        print("📄 Loading and chunking documents...")
        docs = load_documents_from_folders(DATA_FOLDER)
        chunks = split_documents(docs)
        os.makedirs(os.path.dirname(CHUNKS_PATH), exist_ok=True)
        with open(CHUNKS_PATH, 'wb') as f:
            pickle.dump(chunks, f)

    print("🧠 Building or loading FAISS index...")
    db = build_or_load_faiss_index(chunks)

    print("\n✅ Chunking and indexing complete. You can now use the inference script to ask questions.")

In [None]:
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers import BlipProcessor, BlipForConditionalGeneration, ViTImageProcessor, ViTForImageClassification
from langchain_community.llms import HuggingFacePipeline
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from huggingface_hub import login
import speech_recognition as sr
import numpy as np
from PIL import Image
import gc
import io
import wave
import tempfile
import json
from datetime import datetime

# NEW: Multilingual imports
import re
import warnings
from deep_translator import GoogleTranslator
from langdetect import detect

# Login to Hugging Face
login("YOUR HUGGING FACE TOKEN")

# OPTIMIZATION: Set optimal GPU settings for P100
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

# Global variables
qa_chain = None
classifier_pipeline = None
your_llm_pipeline = None
recognizer = sr.Recognizer()
blip_model = None
blip_processor = None
vit_model = None
vit_processor = None
INDEX_PATH = "/kaggle/working/AgroRAG/faiss_index"

# NEW: Global conversation history for session-based memory
conversation_history = []
MAX_HISTORY_LENGTH = 10  # Keep last 10 exchanges

# NEW: Multilingual utility functions
def detect_language(text):
    """Detect language of input text"""
    try:
        return detect(text)
    except:
        return 'en'  # Default to English if detection fails

def translate_to_english(text):
    """Translate text to English if it's not already in English"""
    lang = detect_language(text)
    if lang == 'en':
        return text, lang
    try:
        translated = GoogleTranslator(source=lang, target='en').translate(text)
        return translated, lang
    except:
        return text, lang  # Return original if translation fails

def translate_from_english(text, target_lang):
    """Translate text from English to target language"""
    if target_lang == 'en':
        return text
    try:
        return GoogleTranslator(source='en', target=target_lang).translate(text)
    except:
        return text  # Return original if translation fails

def clean_answer(text):
    """Clean answer for better display"""
    text = re.sub(r"https?://\S+", "", text)             # remove URLs
    text = re.sub(r"\[[^\]]*\]", "", text)               # remove citations
    text = re.sub(r"\([^)]*\)", "", text)                # remove parenthesis
    text = re.sub(r"\bn\d+\b", "", text)                 # remove n1/n2
    text = re.sub(r"\s{2,}", " ", text)                  # remove extra spaces
    return text.strip()

def cleanup_memory():
    """Clean up GPU memory"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

# NEW: Function to add conversation to history
def add_to_history(user_input, bot_response, image_context="", original_lang="en"):
    """Add conversation exchange to history with language info"""
    global conversation_history
    
    exchange = {
        "timestamp": datetime.now().isoformat(),
        "user_input": user_input,
        "bot_response": bot_response,
        "image_context": image_context,
        "language": original_lang  # NEW: Track original language
    }
    
    conversation_history.append(exchange)
    
    # Keep only recent history to manage memory
    if len(conversation_history) > MAX_HISTORY_LENGTH:
        conversation_history = conversation_history[-MAX_HISTORY_LENGTH:]

# NEW: Function to get conversation context
def get_conversation_context():
    """Get formatted conversation history for context"""
    if not conversation_history:
        return ""
    
    context = "\nRecent conversation history:\n"
    for i, exchange in enumerate(conversation_history[-3:], 1):  # Last 3 exchanges
        context += f"{i}. User: {exchange['user_input']}\n"
        context += f"   Assistant: {exchange['bot_response'][:150]}...\n"
        if exchange.get('image_context'):
            context += f"   Image: {exchange['image_context']}\n"
    
    return context

# NEW: Function to check for interactive/greeting inputs
def is_interactive_input(text):
    """Check if input is a greeting or interactive message"""
    if not text or len(text.strip()) < 2:
        return False
    
    text_lower = text.lower().strip()
    
    # Greeting patterns
    greetings = [
        'hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening',
        'greetings', 'howdy', 'hiya', 'sup', 'what\'s up', 'whats up'
    ]
    
    # Interactive patterns
    interactive_patterns = [
        'how are you', 'how do you do', 'nice to meet you', 'pleasure to meet you',
        'thanks', 'thank you', 'bye', 'goodbye', 'see you', 'take care',
        'help me', 'can you help', 'what can you do', 'what do you do'
    ]
    
    # Check exact matches for greetings
    if text_lower in greetings:
        return True
    
    # Check if any interactive pattern is in the text
    for pattern in interactive_patterns:
        if pattern in text_lower:
            return True
    
    return False

# NEW: Function to generate interactive responses
def generate_interactive_response(text):
    """Generate appropriate interactive response"""
    text_lower = text.lower().strip()
    
    # Greeting responses
    if any(greeting in text_lower for greeting in ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening', 'greetings', 'howdy', 'hiya']):
        responses = [
            "Hello! 👋 I'm your AgroRAG assistant, ready to help you with all your agriculture-related questions. How can I assist you today?",
            "Hi there! 🌱 I'm here to help you with farming, crops, livestock, and all things agriculture. What would you like to know?",
            "Greetings! 🚜 I'm your agricultural expert assistant. Feel free to ask me about crops, soil, pests, diseases, or any farming topics!",
            "Hello! 🌾 Welcome to AgroRAG. I'm specialized in agriculture and farming advice. What agricultural topic can I help you with today?"
        ]
        import random
        return random.choice(responses)
    
    # How are you responses
    elif any(phrase in text_lower for phrase in ['how are you', 'how do you do']):
        return "I'm doing great, thank you for asking! 😊 I'm ready and excited to help you with any agriculture-related questions. What farming or crop topic would you like to explore today?"
    
    # Thank you responses
    elif any(phrase in text_lower for phrase in ['thanks', 'thank you']):
        return "You're very welcome! 🌟 I'm always happy to help with agriculture and farming questions. Feel free to ask me anything else about crops, livestock, soil, or farming techniques!"
    
    # Goodbye responses
    elif any(phrase in text_lower for phrase in ['bye', 'goodbye', 'see you', 'take care']):
        return "Goodbye! 👋 Take care and happy farming! Feel free to come back anytime you have agriculture-related questions. Have a great day! 🌱"
    
    # Help/capability responses
    elif any(phrase in text_lower for phrase in ['help me', 'can you help', 'what can you do', 'what do you do']):
        return """I'm your specialized agriculture assistant! 🌾 Here's how I can help you:

🌱 *Crop Management*: Planting, growing, harvesting advice
🦠 *Disease & Pest Control*: Identification and treatment solutions  
🌍 *Soil & Fertilization*: Soil health, nutrients, fertilizer recommendations
💧 *Irrigation*: Water management and irrigation techniques
🐄 *Livestock Care*: Animal husbandry and livestock management
🌿 *Organic Farming*: Sustainable and organic farming practices
🚜 *Equipment & Tools*: Agricultural machinery and tool guidance
📊 *Farm Planning*: Crop rotation, seasonal planning, farm economics

You can ask me questions, upload images of plants or crops for analysis, or even use voice input! What agricultural topic interests you today?"""
    
    # Default friendly response
    else:
        return "Hello! 😊 I'm here to help you with agriculture and farming questions. Whether you want to know about crops, soil, livestock, or farming techniques, I'm ready to assist! What would you like to learn about today? 🌱"

# NEW: Function to check if question relates to conversation history
def relates_to_conversation_history(current_input):
    """Check if current input relates to previous conversation"""
    if not conversation_history:
        return False, ""
    
    # Keywords that suggest reference to previous conversation
    reference_keywords = [
        'this', 'that', 'it', 'more about', 'tell me more', 'expand on',
        'details about', 'explain further', 'what about', 'regarding',
        'continue', 'elaborate', 'additional', 'furthermore'
    ]
    
    current_lower = current_input.lower()
    
    # Check if any reference keywords are present
    has_reference = any(keyword in current_lower for keyword in reference_keywords)
    
    if has_reference and len(conversation_history) > 0:
        # Get the last exchange for context
        last_exchange = conversation_history[-1]
        context = f"Previous question: {last_exchange['user_input']}\nPrevious answer: {last_exchange['bot_response'][:200]}..."
        return True, context
    
    return False, ""

def initialize_speech_recognition():
    """Initialize speech recognition"""
    global recognizer
    
    try:
        recognizer = sr.Recognizer()
        # Adjust for ambient noise
        recognizer.energy_threshold = 4000
        recognizer.dynamic_energy_threshold = True
        print("✅ Speech Recognition initialized successfully")
        return True
        
    except Exception as e:
        print(f"❌ Error initializing speech recognition: {e}")
        return False

def initialize_image_models():
    """Initialize optimized image models"""
    global blip_model, blip_processor, vit_model, vit_processor
    
    try:
        print("Loading optimized BLIP model...")
        blip_model_id = "Salesforce/blip-image-captioning-base"
        blip_processor = BlipProcessor.from_pretrained(blip_model_id)
        blip_model = BlipForConditionalGeneration.from_pretrained(
            blip_model_id,
            torch_dtype=torch.float16,
            device_map="cuda:0"
        ).eval()
        print("✅ BLIP model loaded and optimized")
        
        print("Loading crop disease ViT model...")
        vit_model_id = "wambugu71/crop_leaf_diseases_vit"
        vit_processor = ViTImageProcessor.from_pretrained(vit_model_id)
        vit_model = ViTForImageClassification.from_pretrained(
            vit_model_id,
            torch_dtype=torch.float16,
            device_map="cuda:0"
        ).eval()
        print("✅ ViT model loaded and optimized")
        
        return True
        
    except Exception as e:
        print(f"❌ Failed to load image models: {e}")
        return False

@torch.inference_mode()  # OPTIMIZATION: Disable gradient computation
def analyze_uploaded_image(image):
    """OPTIMIZED image analysis with faster inference"""
    global blip_model, blip_processor, vit_model, vit_processor
    
    if image is None:
        return "", ""
    
    if blip_model is None or vit_model is None:
        return "Image models not initialized", "Unknown"
    
    try:
        # Ensure image is in PIL format and resize for faster processing
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image).convert('RGB')
        else:
            image = image.convert('RGB')
        
        # OPTIMIZATION: Resize image to reduce processing time
        image = image.resize((224, 224), Image.Resampling.LANCZOS)
        
        # BLIP Captioning with optimizations
        blip_inputs = blip_processor(images=image, return_tensors="pt")
        blip_inputs = {k: v.to("cuda:0", dtype=torch.float16) for k, v in blip_inputs.items()}
        
        caption_ids = blip_model.generate(
            **blip_inputs, 
            max_new_tokens=20,  # Reduced from 30
            num_beams=2,        # Reduced from default 5
            do_sample=False,
            early_stopping=True
        )
        caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
        
        # ViT Classification with optimizations
        vit_inputs = vit_processor(images=image, return_tensors="pt")
        vit_inputs = {k: v.to("cuda:0", dtype=torch.float16) for k, v in vit_inputs.items()}
        
        outputs = vit_model(**vit_inputs)
        label_idx = outputs.logits.argmax(-1).item()
        label = vit_model.config.id2label[label_idx]
        
        print(f"Fast analysis - Caption: {caption}, Disease: {label}")
        return caption, label
        
    except Exception as e:
        print(f"Error in image analysis: {e}")
        return "Error analyzing image", "Error in classification"

def transcribe_audio(audio):
    """Audio transcription using speech_recognition library"""
    global recognizer
    
    if recognizer is None:
        return "❌ Speech recognition not initialized"
    
    if audio is None:
        return "❌ No audio provided"
    
    try:
        sample_rate, audio_data = audio
        
        # Convert numpy array to audio data that speech_recognition can use
        if audio_data.dtype == np.int16:
            audio_data = audio_data.astype(np.float32) / 32768.0
        elif audio_data.dtype == np.int32:
            audio_data = audio_data.astype(np.float32) / 2147483648.0
        else:
            audio_data = audio_data.astype(np.float32)
        
        # Convert to 16-bit PCM
        audio_data = (audio_data * 32767).astype(np.int16)
        
        # Create a temporary wav file
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
            # Write WAV file
            with wave.open(tmp_file.name, 'wb') as wav_file:
                wav_file.setnchannels(1 if len(audio_data.shape) == 1 else audio_data.shape[1])
                wav_file.setsampwidth(2)  # 16-bit
                wav_file.setframerate(sample_rate)
                wav_file.writeframes(audio_data.tobytes())
            
            # Use speech_recognition to transcribe
            with sr.AudioFile(tmp_file.name) as source:
                # Adjust for ambient noise
                recognizer.adjust_for_ambient_noise(source, duration=0.5)
                audio = recognizer.record(source)
                
                try:
                    # Use Google Web Speech API (free)
                    text = recognizer.recognize_google(audio)
                    return text
                except sr.UnknownValueError:
                    return "❌ Could not understand audio"
                except sr.RequestError as e:
                    # Fallback to offline recognition if available
                    try:
                        text = recognizer.recognize_sphinx(audio)
                        return text
                    except:
                        return f"❌ Speech recognition service error: {e}"
        
        # Clean up temporary file
        try:
            os.unlink(tmp_file.name)
        except:
            pass
            
    except Exception as e:
        print(f"Error in transcription: {e}")
        return f"❌ Error transcribing audio: {str(e)}"

def initialize_classifier():
    """Use the main Mistral model for classification"""
    global classifier_pipeline
    
    print("✅ Using main Mistral 7B model for LLM-based classification (shared with QA)")
    # We'll use the same pipeline as the QA system for classification
    classifier_pipeline = "mistral_shared"  # Flag to indicate we're using the shared model
    return True 

def classify_question_with_llm(question, conversation_context=""):
    """Pure LLM-based classification for accurate context understanding - MODIFIED to include conversation context"""
    global your_llm_pipeline
    
    if your_llm_pipeline is None:
        print("Warning: LLM not available for classification, defaulting to allow")
        return True  # Default to allowing questions when LLM not available
    
    try:
        # Clean and prepare the question
        question_clean = question.strip()
        if len(question_clean) < 3:
            return False
        
        # Create a detailed classification prompt for LLM - MODIFIED to include conversation context
        classification_prompt = f"""<s>[INST] You are an expert agriculture classifier. Your task is to determine if the given question or statement is related to agriculture, farming, crops, livestock, plants, or rural/agricultural topics.

AGRICULTURE-RELATED TOPICS INCLUDE:
- Crop cultivation, planting, harvesting
- Plant diseases, pests, plant health
- Soil management, fertilizers, nutrients
- Irrigation, water management
- Livestock care, animal husbandry
- Organic farming, sustainable agriculture
- Agricultural tools, equipment, machinery
- Farm management, agricultural economics
- Plant biology, botany (in agricultural context)
- Weather/climate effects on farming
- Food production and processing
- Gardening and horticulture

NON-AGRICULTURE TOPICS INCLUDE:
- General technology, computers, software
- Medicine, human health (unless related to farm safety)
- General science (unless agricultural science)
- Entertainment, sports, politics
- General education topics
- Urban planning, city topics

IMPORTANT: If the current question refers to previous conversation about agriculture (using words like "this", "that", "more about it", "tell me more", etc.), and the conversation history shows agriculture-related topics, then classify it as agriculture-related.

{conversation_context}

Current question to classify: "{question_clean}"

Based on the question above and any conversation history, is this about agriculture, farming, plants, or related rural topics?

Answer with only "YES" if it's agriculture-related, or "NO" if it's not agriculture-related.

Classification: [/INST]"""

        print(f"🧠 Using LLM to classify: '{question_clean[:50]}...'")
        
        # Get classification result from LLM
        try:
            result = your_llm_pipeline(
                classification_prompt,
                max_new_tokens=5,  # Just need YES/NO
                do_sample=False,
                temperature=0.1,
                pad_token_id=your_llm_pipeline.tokenizer.eos_token_id
            )
            
            if result and len(result) > 0:
                response = result[0]['generated_text'].strip().upper()
                print(f"🤖 LLM Classification result: {response}")
                
                # Check for positive indicators
                is_agriculture = "YES" in response or "AGRICULTURE" in response or "FARMING" in response
                print(f"✅ Final classification: {is_agriculture}")
                return is_agriculture
                        
        except Exception as e:
            print(f"❌ LLM classification error: {e}")
            # If LLM fails, default to allowing (better user experience)
            print("⚠ LLM classification failed, defaulting to ALLOW")
            return True
        
        # If we get here, something went wrong - default to allowing
        return True
        
    except Exception as e:
        print(f"❌ Classification error: {e}")
        return True  # Default to allowing questions

def classify_image_with_llm(image_caption, classification_label):
    """Pure LLM-based image classification"""
    global your_llm_pipeline
    
    if your_llm_pipeline is None:
        print("Warning: LLM not available for image classification, defaulting to allow")
        return True
    
    if not image_caption and not classification_label:
        print("No image context available")
        return False
    
    try:
        # Combine image analysis results
        image_context = f"Image caption: {image_caption}\nImage classification: {classification_label}"
        
        # Create LLM prompt for image classification
        image_classification_prompt = f"""<s>[INST] You are an expert agriculture classifier analyzing image descriptions. Your task is to determine if the described image is related to agriculture, farming, crops, livestock, plants, or rural/agricultural topics.

AGRICULTURE-RELATED IMAGES INCLUDE:
- Plants, leaves, flowers, fruits, vegetables
- Crops in fields, gardens, greenhouses  
- Plant diseases, pests, plant health issues
- Farm animals, livestock
- Agricultural tools, machinery, equipment
- Soil, farming landscapes, rural settings
- Seeds, grains, harvest scenes
- Botanical specimens in agricultural context

NON-AGRICULTURE IMAGES INCLUDE:
- Indoor objects, furniture, electronics
- Urban scenes, buildings, roads
- Prepared food, cooked meals, kitchen scenes
- People in non-agricultural settings
- Vehicles (unless farm equipment)
- General household items
- Technology devices, computers

Image description to analyze:
{image_context}

Based on the image description above, does this image show agriculture, farming, plants, crops, or related rural content?

Answer with only "YES" if it shows agriculture-related content, or "NO" if it does not.

Classification: [/INST]"""

        print(f"🖼 Using LLM to classify image: {image_context[:100]}...")
        
        try:
            result = your_llm_pipeline(
                image_classification_prompt,
                max_new_tokens=5,
                do_sample=False,
                temperature=0.1,
                pad_token_id=your_llm_pipeline.tokenizer.eos_token_id
            )
            
            if result and len(result) > 0:
                response = result[0]['generated_text'].strip().upper()
                print(f"🤖 LLM Image Classification result: {response}")
                
                is_agriculture = "YES" in response or "AGRICULTURE" in response or "FARMING" in response
                print(f"✅ Final image classification: {is_agriculture}")
                return is_agriculture
                        
        except Exception as e:
            print(f"❌ LLM image classification error: {e}")
            return True  # Default to allowing
        
        return True
        
    except Exception as e:
        print(f"❌ Image classification error: {e}")
        return True

def load_vectorstore(index_path):
    """Load FAISS vectorstore with optimizations"""
    try:
        embedder = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={'device': 'cuda:0'}  # Force embeddings to GPU
        )
        vectorstore = FAISS.load_local(index_path, embedder, allow_dangerous_deserialization=True)
        print("✅ FAISS vectorstore loaded with GPU embeddings")
        return vectorstore
    except Exception as e:
        print(f"Error loading vectorstore: {e}")
        return None

def build_qa_chain(vectorstore):
    """Build OPTIMIZED QA chain"""
    global your_llm_pipeline
    
    try:
        # OPTIMIZATION: Use smaller Mistral model or consider alternatives
        model_id = "mistralai/Mistral-7B-Instruct-v0.2"
        
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        tokenizer.pad_token = tokenizer.eos_token
        
        print("Loading Mistral model with optimizations...")
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.float16,
            device_map="cuda:0",
            trust_remote_code=True,
            low_cpu_mem_usage=True,
            use_cache=True  # Enable KV cache for faster generation
        ).eval()
        
        # OPTIMIZATION: Faster pipeline settings
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=256,    # Reduced from 512
            temperature=0.1,       # Lower temperature for faster, more focused responses
            do_sample=False,       # Disable sampling for speed
            return_full_text=False,
            pad_token_id=tokenizer.eos_token_id,
            batch_size=1
        )
        
        your_llm_pipeline = pipe
        llm = HuggingFacePipeline(pipeline=pipe)
        
        # OPTIMIZATION: Reduce retrieval count
        retriever = vectorstore.as_retriever(search_type="similarity", k=2)  # Reduced from 4
        
        return RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    
    except Exception as e:
        print(f"Error building QA chain: {e}")
        return None

def initialize_system():
    """Initialize the optimized system"""
    global qa_chain
    
    try:
        cleanup_memory()  # Start with clean memory
        
        print("Initializing speech recognition...")
        speech_success = initialize_speech_recognition()
        if not speech_success:
            print("Warning: Speech recognition initialization failed")
        
        print("Initializing optimized image models...")
        image_success = initialize_image_models()
        if not image_success:
            print("Warning: Image models failed to load")
        
        print("Initializing LLM-based classifier...")
        classifier_success = initialize_classifier()
        
        if not os.path.exists(INDEX_PATH):
            return "Error: FAISS index not found"
            
        print("Loading optimized FAISS index...")
        vectordb = load_vectorstore(INDEX_PATH)
        
        if vectordb is None:
            return "Error: Failed to load FAISS index"
        
        print("Loading optimized Mistral model...")
        qa_chain = build_qa_chain(vectordb)
        
        if qa_chain is None:
            return "Error: Failed to build QA chain"
        
        cleanup_memory()  # Clean up after initialization
        
        return "🚀 OPTIMIZED System ready! Speech recognition, image analysis, conversational memory, interactive responses, and multilingual support initialized."
            
    except Exception as e:
        return f"Error initializing system: {str(e)}"

def review_and_refine_answer(initial_answer, original_question, image_context="", conversation_context=""):
    """Review and refine the initial answer using Mistral 7B - MODIFIED to include conversation context"""
    global your_llm_pipeline
    
    if your_llm_pipeline is None:
        print("Warning: Mistral model not available for review, returning original answer")
        return initial_answer
    
    try:
        # Create a comprehensive review prompt - MODIFIED to include conversation context
        review_prompt = f"""<s>[INST] You are an expert agricultural consultant reviewing an AI-generated answer for accuracy, completeness, and clarity.

{conversation_context}

Original Question: "{original_question}"
{f"Image Context: {image_context}" if image_context else ""}

Initial AI Answer:
{initial_answer}

Your task is to review this answer and improve it by:

1. *ACCURACY CHECK*: Verify all agricultural/botanical facts are correct
2. *COMPLETENESS*: Add any important missing information
3. *CLARITY*: Make the explanation clearer and more structured
4. *PRACTICAL VALUE*: Include actionable advice when appropriate
5. *SAFETY*: Ensure recommendations are safe and environmentally sound
6. *CONTEXT AWARENESS*: Consider the conversation history when refining the answer

Guidelines for review:
- If the answer is factually incorrect, correct it
- If important information is missing, add it
- If the answer is unclear, restructure it
- If there are contradictions, resolve them
- If the answer seems incomplete, expand it appropriately
- Keep the tone professional but accessible
- Focus on practical, actionable information
- If this question relates to previous conversation, acknowledge that connection

Provide the REVISED AND IMPROVED answer: [/INST]"""

        print("🔍 Reviewing and refining the initial answer...")
        
        # Get refined answer from Mistral
        refined_result = your_llm_pipeline(
            review_prompt,
            max_new_tokens=400,  # Allow more tokens for comprehensive review
            temperature=0.2,     # Slightly higher for more natural refinement
            do_sample=False,
            return_full_text=False
        )
        
        if refined_result and len(refined_result) > 0:
            refined_answer = refined_result[0]['generated_text'].strip()
            
            # Basic quality check on refined answer
            if len(refined_answer) > 50 and len(refined_answer) < 3000:  # Reasonable length
                print("✅ Answer successfully reviewed and refined")
                return refined_answer
            else:
                print("⚠ Refined answer seems problematic, using original")
                return initial_answer
        else:
            print("⚠ Review process failed, using original answer")
            return initial_answer
            
    except Exception as e:
        print(f"❌ Error in review process: {e}")
        return initial_answer

def answer_question(question, image=None):
    """OPTIMIZED answer function with PURE LLM-based classification, conversation history, interactive responses, and MULTILINGUAL support"""
    global qa_chain
    
    if qa_chain is None:
        return "❌ System not initialized"
    
    if not question.strip() and image is None:
        return "❓ Please enter a question or upload an image"
        
    try:
        question_text = question.strip()
        original_lang = "en"  # Default language
        english_question = question_text
        
        # NEW: Language detection and translation
        if question_text:
            english_question, original_lang = translate_to_english(question_text)
            if original_lang != "en":
                print(f"🌍 Detected language: {original_lang}, translated to English for processing")
        
        # NEW: Check for interactive/greeting inputs first (use English version for consistency)
        if english_question and is_interactive_input(english_question):
            print("🎯 Interactive input detected")
            interactive_response = generate_interactive_response(english_question)
            
            # NEW: Translate interactive response back to original language
            if original_lang != "en":
                interactive_response = translate_from_english(interactive_response, original_lang)
                print(f"🌍 Interactive response translated back to {original_lang}")
            
            # Add to conversation history
            add_to_history(question_text, interactive_response, "", original_lang)
            return interactive_response
        
        # Fast image analysis
        image_caption = ""
        classification_label = ""
        if image is not None:
            image_caption, classification_label = analyze_uploaded_image(image)

        # NEW: Check if question relates to conversation history
        relates_to_history, history_context = relates_to_conversation_history(english_question)
        conversation_context = get_conversation_context()
        
        print(f"🔗 Relates to conversation history: {relates_to_history}")
        
        # PURE LLM-BASED CLASSIFICATION - No rule-based fallbacks
        is_agriculture = False
        
        if image is not None:
            print("🖼 Image uploaded - using PURE LLM-based classification")
            
            # Use LLM to classify image content
            image_is_agriculture = classify_image_with_llm(image_caption, classification_label)
            print(f"🤖 LLM Image classification: {image_is_agriculture}")
            
            if english_question:
                # Use LLM to classify the question (with conversation context)
                question_is_agriculture = classify_question_with_llm(english_question, conversation_context)
                print(f"🤖 LLM Question classification: {question_is_agriculture}")
                
                # Allow if EITHER question OR image is agriculture-related (OR logic)
                if question_is_agriculture or image_is_agriculture:
                    is_agriculture = True
                    if question_is_agriculture and image_is_agriculture:
                        print("✅ LLM Decision: Both question and image are agriculture-related")
                    elif question_is_agriculture and not image_is_agriculture:
                        print("✅ LLM Decision: Question is agriculture-related (image is not)")
                    elif not question_is_agriculture and image_is_agriculture:
                        print("✅ LLM Decision: Image is agriculture-related (question is not)")
                else:
                    is_agriculture = False
                    print("❌ LLM Decision: Neither question nor image is agriculture-related")
            else:
                # No question provided, just check image with LLM
                if image_is_agriculture:
                    is_agriculture = True
                    print("✅ LLM Decision: No question provided but image is agriculture-related")
                else:
                    is_agriculture = False
                    print("❌ LLM Decision: No question provided and image is not agriculture-related")
                
        else:
            # No image, just classify the question with LLM (including conversation context)
            is_agriculture = classify_question_with_llm(english_question, conversation_context)
            print(f"🤖 LLM Text-only classification: {is_agriculture}")

        if not is_agriculture:
            non_agriculture_response = """🚫 *Agriculture Classification Result*:

Based on my AI analysis, your question/image doesn't appear to be related to agriculture, farming, crops, or livestock.

I'm specifically designed to help with agriculture-related topics such as:
- Crop cultivation and management
- Soil health and fertilization  
- Irrigation and water management
- Pest and disease control
- Livestock care and management
- Organic farming practices
- Agricultural equipment and techniques
- Plant biology and botany
- Agricultural economics and planning

Please ask a question related to agriculture or upload an agriculture-related image, and I'll be happy to help! 🌱"""
            
            # NEW: Translate non-agriculture response back to original language
            if original_lang != "en":
                non_agriculture_response = translate_from_english(non_agriculture_response, original_lang)
                print(f"🌍 Non-agriculture response translated back to {original_lang}")
            
            # Add to conversation history even for non-agriculture responses
            add_to_history(question_text, non_agriculture_response, 
                         f"{image_caption} - {classification_label}" if image else "", original_lang)
            return non_agriculture_response
        
        # Combine context for RAG (use English for RAG processing)
        full_context = ""
        if image_caption or classification_label:
            full_context += f"Image: {image_caption}\nIssue: {classification_label}\n"
        
        # NEW: Add conversation context if available
        if relates_to_history:
            full_context += f"Previous context: {history_context}\n"
        
        full_context += f"Question: {english_question}"
        
        print("🤖 Generating initial answer from RAG system...")
        
        # STEP 1: Get initial RAG response (in English)
        initial_result = qa_chain.run(full_context)
        
        print("🔍 Reviewing and refining the answer...")
        
        # STEP 2: Review and refine the answer (in English)
        image_context = ""
        if image_caption or classification_label:
            image_context = f"Image shows: {image_caption}. Analysis: {classification_label}"
        
        final_result = review_and_refine_answer(
            initial_answer=initial_result,
            original_question=english_question,
            image_context=image_context,
            conversation_context=conversation_context
        )
        
        # NEW: Clean and translate final answer back to original language
        cleaned_result = clean_answer(final_result)
        
        if original_lang != "en":
            cleaned_result = translate_from_english(cleaned_result, original_lang)
            print(f"🌍 Final answer translated back to {original_lang}")
        
        # NEW: Add to conversation history with original language info
        add_to_history(question_text, cleaned_result, image_context, original_lang)
        
        # Cleanup after each query
        cleanup_memory()
        
        return f"*LLM-Classified Agriculture Answer* ({original_lang.upper()}):\n\n{cleaned_result}"
            
    except Exception as e:
        cleanup_memory()
        error_response = f"❌ Error: {str(e)}"
        
        # NEW: Translate error response if needed
        if 'original_lang' in locals() and original_lang != "en":
            error_response = translate_from_english(error_response, original_lang)
        
        # Add error to history too
        if 'question_text' in locals():
            add_to_history(question_text, error_response, "", original_lang if 'original_lang' in locals() else "en")
        return error_response

# NEW: Function to clear conversation history
def clear_conversation_history():
    """Clear the conversation history"""
    global conversation_history
    conversation_history = []
    return "🗑 Conversation history cleared! Starting fresh."

# Initialize optimized system
print("🚀 Initializing OPTIMIZED AgroRAG system with PURE LLM Classification, Conversational Memory, and Multilingual Support...")
init_status = initialize_system()
print(init_status)

# Streamlined Gradio interface for speed - MODIFIED to include multilingual support and conversation history management
with gr.Blocks(title="🚀 Fast AgroRAG Assistant - Pure LLM Classification + Memory + Multilingual") as demo:
    gr.Markdown("""
    # 🚀 Fast AgroRAG Assistant (Pure LLM Classification + Memory + 🌍 Multilingual)
    
    *New Features:*
    - 🎤 Speech Recognition using speech_recognition library
    - 🖼 Optimized image processing with resizing
    - 🧠 *PURE LLM-BASED CLASSIFICATION* (No keyword rules - everything decided by AI)
    - ⚡ Flexible classification (allows if EITHER question OR image is agriculture-related)
    - 🔧 GPU memory management and cleanup
    - 💭 *SESSION-BASED CONVERSATIONAL MEMORY* (Remembers previous exchanges!)
    - 🤖 *INTERACTIVE RESPONSES* (Responds to greetings and casual conversation)
    - 🌍 **MULTILINGUAL SUPPORT** (Ask questions in any language - auto-detected and translated!)
    
    ## 🌍 Multilingual Features:
    - **Auto Language Detection**: Automatically detects your language
    - **Real-time Translation**: Questions translated to English for processing, answers translated back
    - **Supported Languages**: Spanish, French, German, Hindi, Chinese, Arabic, Portuguese, and many more!
    - **Conversation Memory**: Remembers conversations in your original language
    
    ## New Interactive Features:
    - Say "Hi", "Hello", "Good morning" in any language for friendly greetings
    - Ask "How are you?" or "What can you do?" in your preferred language
    - Use follow-up questions like "Tell me more about this" or "Can you explain further?"
    - The bot remembers your conversation and can reference previous topics!
    
    ## Sample Questions to Try (in any language):
    - English: "What are the symptoms of tomato blight disease?"
    - Spanish: "¿Cuáles son los síntomas de la enfermedad del tizón del tomate?"
    - French: "Quels sont les symptômes de la maladie du mildiou de la tomate?"
    - Hindi: "टमाटर की झुलसा रोग के लक्षण क्या हैं?"
    - German: "Was sind die Symptome der Tomatenfäule?"
    
    *Note:* All classification decisions are made by the Mistral 7B LLM with conversation and multilingual awareness!
    """)
    
    # NEW: Add conversation history display and clear button
    with gr.Row():
        with gr.Column(scale=3):
            pass  # Empty space for layout
        with gr.Column(scale=1):
            clear_history_btn = gr.Button("🗑 Clear History", variant="secondary")
    
    with gr.Tab("🎤 Voice + Image + 🌍 Multilingual"):        
        with gr.Row():
            with gr.Column():
                audio_input = gr.Audio(label="🎤 Record Question (Any Language)", type="numpy")
                transcription_output = gr.Textbox(label="📝 Transcribed Text", lines=2, interactive=True)
                image_input_voice = gr.Image(label="📸 Upload Image (optional)", type="pil")
                submit_button = gr.Button("🚀 Get Answer", variant="primary")
            
            with gr.Column():
                answer_output = gr.Textbox(label="🌾 Answer", lines=8)
        
        def transcribe_only(audio):
            return transcribe_audio(audio) if audio else ""
        
        def get_answer(question_text, image):
            return answer_question(question_text, image)
        
        audio_input.change(fn=transcribe_only, inputs=[audio_input], outputs=[transcription_output])
        submit_button.click(fn=get_answer, inputs=[transcription_output, image_input_voice], outputs=[answer_output])
    
    with gr.Tab("📝 Text + Image + 🌍 Multilingual"):
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    label="Ask about Agriculture in ANY Language! (Auto-detected)", 
                    placeholder="¡Hola! / Bonjour! / नमस्ते! / What disease is this? / Tell me more about organic farming", 
                    lines=2
                )
                
                # Sample questions buttons - MODIFIED to include multilingual examples
                gr.Markdown("*Quick Sample Questions (Multiple Languages):*")
                with gr.Row():
                    sample_btn1 = gr.Button("👋 Hi there! (EN)", size="sm")
                    sample_btn2 = gr.Button("🌱 ¿Rotación de cultivos? (ES)", size="sm")
                    sample_btn3 = gr.Button("💧 सिंचाई तकनीक? (HI)", size="sm")
                
                image_input = gr.Image(label="📸 Upload Plant Image", type="pil")
                submit_text_button = gr.Button("🚀 Get Answer", variant="primary")
            
            with gr.Column():
                text_output = gr.Textbox(label="🌾 Answer", lines=8)
        
        # NEW: Functions for multilingual sample buttons
        def set_sample_text1():
            return "Hi there! How are you?"
        
        def set_sample_text2():
            return "¿Qué es la rotación de cultivos y cómo ayuda?"
        
        def set_sample_text3():
            return "सब्जियों के लिए सबसे अच्छी सिंचाई तकनीक क्या हैं?"
        
        # NEW: Connect sample buttons
        sample_btn1.click(fn=set_sample_text1, outputs=[text_input])
        sample_btn2.click(fn=set_sample_text2, outputs=[text_input])
        sample_btn3.click(fn=set_sample_text3, outputs=[text_input])
        
        # Add the click event handler for the text submit button
        submit_text_button.click(
            fn=answer_question, 
            inputs=[text_input, image_input], 
            outputs=[text_output]
        )
        
        # Examples section - MODIFIED to include multilingual examples
        examples = gr.Examples(
            examples=[
                ["Hello! How can you help me? 🇺🇸"],
                ["¿Qué es la rotación de cultivos? 🇪🇸"],
                ["Comment fonctionne l'agriculture biologique? 🇫🇷"],
                ["जैविक खेती के क्या फायदे हैं? 🇮🇳"],
                ["Was sind die Vorteile der Tröpfchenbewässerung? 🇩🇪"],
                ["Qual é a importância do pH do solo? 🇧🇷"],
                ["谢谢你的帮助! 🇨🇳"],
                ["Can you tell me more about this? 🇺🇸"],  # Follow-up example
            ],
            inputs=[text_input]
        )
    
    with gr.Tab("📊 Conversation History + 🌍 Languages"):
        gr.Markdown("""
        ## 💭 Your Multilingual Conversation with AgroRAG
        
        This tab shows your recent conversation history in all languages. The bot remembers your previous questions and can reference them in follow-up questions.
        
        *Tips for using multilingual conversational memory:*
        - After asking about a topic in any language, you can say "Tell me more about this" in the same or different language
        - Use phrases like "Can you explain that further?" in your preferred language
        - The bot will understand what "this" and "that" refer to based on previous conversation
        - Switch languages anytime - the bot maintains context across languages!
        
        **Language Coverage**: English, Spanish, French, German, Hindi, Chinese, Arabic, Portuguese, Italian, Russian, Japanese, Korean, and many more!
        """)
        
        history_display = gr.JSON(
            label="Recent Multilingual Conversations",
            value=lambda: conversation_history[-5:] if conversation_history else []  # Show last 5
        )
        
        refresh_history_btn = gr.Button("🔄 Refresh History", variant="secondary")
        
        def refresh_history():
            return conversation_history[-5:] if conversation_history else []
        
        refresh_history_btn.click(fn=refresh_history, outputs=[history_display])
    
    # NEW: Connect clear history button
    clear_history_btn.click(fn=clear_conversation_history, outputs=[])
        

if __name__ == "__main__":
    # Install required packages if not available
    try:
        import speech_recognition as sr
    except ImportError:
        os.system("pip install SpeechRecognition")
        import speech_recognition as sr
    
    try:
        import pocketsphinx
    except ImportError:
        os.system("pip install pocketsphinx")
    
    # NEW: Install multilingual packages
    try:
        from deep_translator import GoogleTranslator
    except ImportError:
        os.system("pip install deep-translator")
        from deep_translator import GoogleTranslator
    
    try:
        from langdetect import detect
    except ImportError:
        os.system("pip install langdetect")
        from langdetect import detect
    
    try:
        demo.launch(share=True, server_name="0.0.0.0")
    except OSError as e:
        if "Cannot find empty port" in str(e):
            demo.launch(share=True)
        else:
            raise e

2025-08-19 06:40:00.158049: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755585600.336514      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755585600.388213      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


🚀 Initializing OPTIMIZED AgroRAG system with PURE LLM Classification, Conversational Memory, and Multilingual Support...
Initializing speech recognition...
✅ Speech Recognition initialized successfully
Initializing optimized image models...
Loading optimized BLIP model...


preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

✅ BLIP model loaded and optimized
Loading crop disease ViT model...


preprocessor_config.json:   0%|          | 0.00/325 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/22.1M [00:00<?, ?B/s]

✅ ViT model loaded and optimized
Initializing LLM-based classifier...
✅ Using main Mistral 7B model for LLM-based classification (shared with QA)
Loading optimized FAISS index...


  embedder = HuggingFaceEmbeddings(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ FAISS vectorstore loaded with GPU embeddings
Loading optimized Mistral model...


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Loading Mistral model with optimizations...


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  llm = HuggingFacePipeline(pipeline=pipe)


🚀 OPTIMIZED System ready! Speech recognition, image analysis, conversational memory, interactive responses, and multilingual support initialized.
Collecting pocketsphinx
  Downloading pocketsphinx-5.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting sounddevice (from pocketsphinx)
  Downloading sounddevice-0.5.2-py3-none-any.whl.metadata (1.6 kB)
Downloading pocketsphinx-5.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.2 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 29.2/29.2 MB 40.1 MB/s eta 0:00:00
Downloading sounddevice-0.5.2-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, pocketsphinx
Successfully installed pocketsphinx-5.0.4 sounddevice-0.5.2
* Running on local URL:  http://0.0.0.0:7860
* Running on public URL: https://d6525813249fd8b13e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to dep

🌍 Detected language: sw, translated to English for processing
🎯 Interactive input detected
🌍 Interactive response translated back to sw
🌍 Detected language: fi, translated to English for processing
🎯 Interactive input detected
🌍 Interactive response translated back to fi
🎯 Interactive input detected


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🌍 Detected language: hi, translated to English for processing
🔗 Relates to conversation history: False
🧠 Using LLM to classify: 'What are the best irrigation techniques for vegeta...'
🤖 LLM Classification result: YES. THE QUESTION
✅ Final classification: True
🤖 LLM Text-only classification: True
🤖 Generating initial answer from RAG system...


  initial_result = qa_chain.run(full_context)
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Reviewing and refining the answer...
🔍 Reviewing and refining the initial answer...
✅ Answer successfully reviewed and refined
🌍 Final answer translated back to hi


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔗 Relates to conversation history: False
🧠 Using LLM to classify: 'what is crop rotation...'


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🤖 LLM Classification result: YES. CROP
✅ Final classification: True
🤖 LLM Text-only classification: True
🤖 Generating initial answer from RAG system...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Reviewing and refining the answer...
🔍 Reviewing and refining the initial answer...
✅ Answer successfully reviewed and refined
