In [None]:
#!/usr/bin/env python3
"""
Enhanced Groq Chatbot with Modern UI using Gradio
Specialized for Arabic Land Registry Documents
Supports PDF, Word, Text, CSV, and JSON files
Version: 5.0
"""

import gradio as gr
import requests
import json
import os
import sys
import tempfile
import shutil
import re
from pathlib import Path
from typing import Optional, List, Dict, Any

# Import robust PDF extraction from pdf_utils.py
sys.path.append(".")
from pdf_utils import read_pdf_file as robust_read_pdf_file

# RAG dependencies
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

# Try to import optional file parsers
try:
    import PyPDF2
    PDF_SUPPORT = True
except ImportError:
    PDF_SUPPORT = False

try:
    import docx
    DOCX_SUPPORT = True
except ImportError:
    DOCX_SUPPORT = False

import csv
from io import StringIO

# Available Groq models
GROQ_MODELS = [
    "llama-3.1-8b-instant",
    "llama-3.1-70b-versatile",
    "llama-3.3-70b-versatile",
    "mixtral-8x7b-32768",
    "gemma2-9b-it",
    "llama3-groq-70b-8192-tool-use-preview",
    "llama3-groq-8b-8192-tool-use-preview"
]

# Arabic Land Registry Extraction Prompt
ARABIC_LAND_REGISTRY_PROMPT = """
You are an expert Arabic document processor. Your task is to meticulously extract information from the provided land registry document and return ONLY a JSON object. Adhere strictly to the specified structure and extraction rules. DO NOT add any explanatory text, comments, or additional characters before or after the JSON.

INSTRUCTIONS FOR EXTRACTION:
- Extract all text exactly as it appears in the document. Do not summarize, interpret, or rephrase.
- If a field is explicitly present but its value is empty in the document, use null for that field.
- If a field is not found or not applicable, use null for that field.
- Pay close attention to Arabic characters, numbers, and symbols (e.g., "م م", "جزء", "دت").

JSON STRUCTURE AND FIELD-SPECIFIC INSTRUCTIONS:

{
  "General Information": {
    "معرف الرسم العقاري": "Extract the full property registration ID, e.g., '63637 أريانة'.",
    "إسم العقار": "Extract the property name, e.g., 'جنان سكرة 16'. If not found, use null.",
    "المساحة": "Extract the area, e.g., '400 م م'. If not found, use null.",
    "التجزئة": "Extract the subdivision details, e.g., '400 جزء'. If not found, use null.",
    "محتوى العقار": "Extract the property content/description, e.g., 'أرض بيضاء'. If not found, use null.",
    "صبغة العقار": "Extract the property nature/type. If not found or empty, use null.",
    "مجموع القطع": "Extract the total number of parcels, e.g., '1'. If not found, use null.",
    "عدد القطع": "Extract the number of pieces, e.g., '15'. If not found, use null.",
    "موقع العقار": "Extract the location, e.g., 'سيدي فرج أريانة'. If not found, use null.",
    "طبيعة الإحداث": "Extract the nature of creation, e.g., 'إستخراج'. If not found, use null.",
    "عدد الحكم بالتسجيل": "Extract the registration judgment number. If not found or empty, use null.",
    "تاريخه": "Extract the date associated with the registration judgment. If not found or empty, use null."
  },
  "سوية المالك (ين) وموضوع الملكية ومراجع ترسيمها": [
    {
      "عدد الرتبي للمالك و الحق": "Extract the sequential number for the owner and right, e.g., '1/1'.",
      "السوية أو التسمية - الجنسية - تاريخ الولادة و مكانها - العنوان - الشكل القانوني - المقر الاجتماعي": "Extract the full descriptive text for the owner, including name, ID, nationality, birth date/place, marital status, etc., as one continuous string. E.g., 'كمال بن حسن بن الصادق الشرعبي صاحب بطاقة تعريف وطنية ع 00810171 دد جنسيته تونسية مولود في 23 مارس 1961 متزوج'.",
      "موضوع الملكية": "Extract the subject of ownership, e.g., '400 جزء'.",
      "مراجع الترسيم": {
        "التاريخ": "Extract the date from 'مراجع الترسيم', e.g., '2014/01/10'.",
        "الإبداع": "Extract the 'الإبداع' (creation/registration place), e.g., 'أريانة'.",
        "المجلد": "Extract the 'المجلد' (volume), e.g., '2014/1'.",
        "العدد": "Extract the 'العدد' (number), e.g., '1/267'."
      },
      "عدد السند المسلم": "Extract the document delivery number. If not found or empty, use null."
    }
    // Add more objects if multiple owners/rights are found.
  ],
  "التحملات و التنديدات المتعلقة بالحقوق المرسمة حسب الأعداد الرتبية لتلك الحقوق": [
    {
      "هوية المستفيد من التحملات": "Extract the beneficiary's identity, e.g., 'بنك الامان'.",
      "نوع التحمل": "Extract the type of encumbrance, e.g., 'رهن اختياري'.",
      "الحق الموظف عليه التحمل": "Extract the right subject to encumbrance, e.g., 'جميع العقار'.",
      "القيمة بالدينار": "Extract the value in dinars, including 'دت' if present, e.g., '250.000.000دت'.",
      "الفائض (%)": "Extract the percentage of surplus/interest. If not found or empty, use null.",
      "مراجع الترسيم": {
        "التاريخ": "Extract the date from 'مراجع الترسيم', e.g., '2017/10/12'.",
        "الإبداع": "Extract the 'الإبداع' (creation/registration place), e.g., 'أريانة'.",
        "المجلد": "Extract the 'المجلد' (volume), e.g., '02/2017'.",
        "العدد": "Extract the 'العدد' (number), e.g., '1/6085'."
      }
    }
    // Add more objects if multiple encumbrances are found.
  ]
}

DOCUMENT CONTENT:
"""

uploaded_file_content = ""
uploaded_file_name = ""
file_type = ""
retriever = None

# Enhanced CSS for Arabic support
custom_css = """
/* Modern Gradio Styling with Arabic Support */
.gradio-container {
    max-width: 1400px !important;
    margin: 0 auto !important;
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Noto Sans Arabic', sans-serif !important;
}

/* Arabic text support */
.arabic-text {
    direction: rtl;
    text-align: right;
    font-family: 'Noto Sans Arabic', 'Arial Unicode MS', sans-serif;
    line-height: 1.6;
}

/* Header styling */
.gradio-container .app-header {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    color: white;
    padding: 24px;
    border-radius: 16px 16px 0 0;
    text-align: center;
    margin-bottom: 0;
}

.gradio-container .app-header h1 {
    margin: 0;
    font-size: 32px;
    font-weight: 700;
}

.gradio-container .app-header p {
    margin: 8px 0 0 0;
    opacity: 0.9;
    font-size: 16px;
}

/* Special styling for land registry mode */
.land-registry-mode {
    background: linear-gradient(135deg, #2d5a87 0%, #4a90a4 100%) !important;
}

.land-registry-mode h1::before {
    content: "🏛️ ";
}

/* JSON output styling */
.json-output {
    background: #1e1e1e;
    color: #d4d4d4;
    font-family: 'Courier New', monospace;
    border-radius: 8px;
    padding: 16px;
    white-space: pre-wrap;
    font-size: 12px;
    line-height: 1.4;
    max-height: 400px;
    overflow-y: auto;
}

/* Enhanced buttons */
.extract-button {
    background: linear-gradient(135deg, #059669 0%, #10b981 100%) !important;
    color: white !important;
    border: none !important;
    border-radius: 8px !important;
    padding: 12px 24px !important;
    font-weight: 600 !important;
    font-size: 14px !important;
}

.extract-button:hover {
    background: linear-gradient(135deg, #047857 0%, #059669 100%) !important;
    transform: translateY(-1px) !important;
    box-shadow: 0 4px 12px rgba(5, 150, 105, 0.3) !important;
}

/* Status indicators */
.status-success {
    background: #d1fae5 !important;
    border-left: 4px solid #10b981 !important;
    color: #065f46 !important;
}

.status-error {
    background: #fee2e2 !important;
    border-left: 4px solid #ef4444 !important;
    color: #991b1b !important;
}

.status-warning {
    background: #fef3c7 !important;
    border-left: 4px solid #f59e0b !important;
    color: #92400e !important;
}

/* Rest of the existing CSS */
.gradio-container .main-content {
    background: white;
    border-radius: 0 0 16px 16px;
    box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
    padding: 24px;
}

.gradio-container .file-upload {
    background: #f8fafc;
    border: 2px dashed #cbd5e1;
    border-radius: 12px;
    padding: 24px;
    text-align: center;
    transition: all 0.3s ease;
    margin-bottom: 24px;
}

.gradio-container .file-upload:hover {
    border-color: #667eea;
    background: #f1f5f9;
}

.gradio-container .status-box {
    background: #f0f9ff;
    border-left: 4px solid #0ea5e9;
    padding: 16px;
    border-radius: 8px;
    margin: 16px 0;
}

.gradio-container .preview-box {
    background: #f8fafc;
    border: 1px solid #e2e8f0;
    border-radius: 8px;
    padding: 16px;
    max-height: 200px;
    overflow-y: auto;
    font-family: monospace;
    font-size: 12px;
    line-height: 1.4;
}

.gradio-container .settings-section {
    background: #f8fafc;
    border-radius: 12px;
    padding: 20px;
    margin-bottom: 24px;
}

.gradio-container .settings-section h3 {
    margin: 0 0 16px 0;
    color: #1e293b;
    font-size: 18px;
    font-weight: 600;
}

.gradio-container input[type="text"],
.gradio-container input[type="password"],
.gradio-container select,
.gradio-container textarea {
    border: 1px solid #d1d5db !important;
    border-radius: 8px !important;
    padding: 12px !important;
    font-size: 14px !important;
    transition: border-color 0.2s !important;
}

.gradio-container input[type="text"]:focus,
.gradio-container input[type="password"]:focus,
.gradio-container select:focus,
.gradio-container textarea:focus {
    border-color: #667eea !important;
    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
    outline: none !important;
}

.gradio-container .gr-button {
    background: #667eea !important;
    color: white !important;
    border: none !important;
    border-radius: 8px !important;
    padding: 12px 24px !important;
    font-weight: 500 !important;
    transition: all 0.2s !important;
    cursor: pointer !important;
}

.gradio-container .gr-button:hover {
    background: #5a67d8 !important;
    transform: translateY(-1px) !important;
    box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3) !important;
}

.gradio-container .response-area {
    background: white;
    border: 1px solid #e2e8f0;
    border-radius: 12px;
    padding: 20px;
    min-height: 200px;
    font-size: 14px;
    line-height: 1.6;
}

.gradio-container .gr-checkbox {
    accent-color: #667eea !important;
}

.gradio-container .gr-slider input[type="range"] {
    accent-color: #667eea !important;
}

@media (max-width: 768px) {
    .gradio-container {
        margin: 10px !important;
    }
}
"""

class ArabicTextProcessor:
    """Enhanced Arabic text processing for land registry documents"""
    
    @staticmethod
    def clean_arabic_text(text: str) -> str:
        """Clean and normalize Arabic text"""
        if not text:
            return ""
        
        # Remove extra whitespaces
        text = re.sub(r'\s+', ' ', text.strip())
        
        # Remove common OCR artifacts
        text = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', text)
        
        # Normalize Arabic characters
        arabic_normalizations = {
            'ﻱ': 'ي', 'ﻯ': 'ى', 'ﺍ': 'ا', 'ﺎ': 'ا',
            'ﻪ': 'ه', 'ﺔ': 'ة', 'ﻚ': 'ك'
        }
        
        for old, new in arabic_normalizations.items():
            text = text.replace(old, new)
        
        return text
    
    @staticmethod
    def validate_json_structure(json_str: str) -> Dict[str, Any]:
        """Validate and clean JSON structure for land registry"""
        try:
            data = json.loads(json_str)
            
            # Required structure validation
            required_keys = [
                "General Information",
                "سوية المالك (ين) وموضوع الملكية ومراجع ترسيمها",
                "التحملات و التنديدات المتعلقة بالحقوق المرسمة حسب الأعداد الرتبية لتلك الحقوق"
            ]
            
            for key in required_keys:
                if key not in data:
                    data[key] = {} if key == "General Information" else []
            
            return data
            
        except json.JSONDecodeError:
            return None

class EnhancedFileProcessor:
    @staticmethod
    def read_text_file(path):
        for enc in ['utf-8', 'utf-16', 'latin1', 'cp1252']:
            try:
                with open(path, 'r', encoding=enc) as f:
                    content = f.read()
                    return ArabicTextProcessor.clean_arabic_text(content)
            except UnicodeDecodeError:
                continue
        raise Exception("Failed decoding file")

    @staticmethod
    def read_pdf_file(path):
        content = robust_read_pdf_file(path)
        return ArabicTextProcessor.clean_arabic_text(content)

    @staticmethod
    def read_docx_file(path):
        if not DOCX_SUPPORT:
            return "DOCX support unavailable"
        doc = docx.Document(path)
        parts = [p.text for p in doc.paragraphs if p.text.strip()]
        content = "\n".join(parts)
        return ArabicTextProcessor.clean_arabic_text(content)

    @staticmethod
    def read_csv_file(path):
        with open(path, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            content = "\n".join([" | ".join(row) for row in reader])
            return ArabicTextProcessor.clean_arabic_text(content)

    @staticmethod
    def read_json_file(path):
        with open(path, 'r', encoding='utf-8') as f:
            return json.dumps(json.load(f), indent=2, ensure_ascii=False)

def read_file_content(path):
    ext = os.path.splitext(path)[1].lower()
    processor = EnhancedFileProcessor()
    if ext == ".txt":
        return processor.read_text_file(path), "Text"
    elif ext == ".pdf":
        return processor.read_pdf_file(path), "PDF"
    elif ext in [".docx", ".doc"]:
        return processor.read_docx_file(path), "Word"
    elif ext == ".csv":
        return processor.read_csv_file(path), "CSV"
    elif ext == ".json":
        return processor.read_json_file(path), "JSON"
    return processor.read_text_file(path), "Unknown"

def prepare_rag_context(raw_text):
    # Enhanced chunking for Arabic documents
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,  # Larger chunks for better context
        chunk_overlap=100,
        separators=["\n\n", "\n", ".", "!", "?", "،", "؛", " "]
    )
    docs = splitter.create_documents([raw_text])
    
    # Use multilingual embeddings for better Arabic support
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
    )
    vectorstore = Chroma.from_documents(docs, embedding=embeddings)
    return vectorstore

def call_groq_api(prompt, api_key, model, temperature, rag_db=None, is_extraction=False):
    if rag_db and not is_extraction:
        docs = rag_db.similarity_search(prompt, k=4)
        context = "\n\n".join([doc.page_content for doc in docs])
        prompt = f"""You are an AI assistant helping analyze Arabic documents.\n\nCONTEXT:\n{context}\n\nQUESTION:\n{prompt}"""
    
    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
    
    system_content = "You are a helpful assistant specialized in Arabic document analysis." if not is_extraction else "You are an expert at extracting structured information from Arabic land registry documents. Always return valid JSON."
    
    data = {
        "messages": [
            {"role": "system", "content": system_content},
            {"role": "user", "content": prompt}
        ],
        "model": model,
        "temperature": temperature if not is_extraction else 0.1,  # Lower temperature for extraction
        "max_tokens": 4096,
        "stream": False
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=60)
        if response.status_code != 200:
            return f"API Error: {response.status_code} - {response.text}"
        return response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        return f"API Error: {str(e)}"

def extract_json_from_response(response: str) -> str:
    """Aggressively extract JSON from AI response"""
    # Method 1: Find first { to last }
    json_start = response.find('{')
    json_end = response.rfind('}') + 1
    
    if json_start != -1 and json_end > json_start:
        potential_json = response[json_start:json_end]
        try:
            # Test if it's valid JSON
            json.loads(potential_json)
            return potential_json
        except:
            pass
    
    # Method 2: Look for ```json blocks
    import re
    json_blocks = re.findall(r'```json\s*\n(.*?)\n```', response, re.DOTALL)
    for block in json_blocks:
        try:
            json.loads(block)
            return block
        except:
            continue
    
    # Method 3: Look for lines that start with {
    lines = response.split('\n')
    json_lines = []
    in_json = False
    brace_count = 0
    
    for line in lines:
        if line.strip().startswith('{') and not in_json:
            in_json = True
            brace_count = line.count('{') - line.count('}')
            json_lines = [line]
        elif in_json:
            json_lines.append(line)
            brace_count += line.count('{') - line.count('}')
            if brace_count <= 0:
                break
    
    if json_lines:
        potential_json = '\n'.join(json_lines)
        try:
            json.loads(potential_json)
            return potential_json
        except:
            pass
    
    return None

def extract_land_registry_info(api_key, model, temperature):
    """Extract structured information from Arabic land registry document"""
    global uploaded_file_content
    
    if not uploaded_file_content:
        return "❌ Please upload a document first."
    
    if not api_key.strip():
        return "❌ Please enter your Groq API key."
    
    full_prompt = ARABIC_LAND_REGISTRY_PROMPT + "\n\n" + uploaded_file_content
    
    try:
        # Try multiple times with different approaches if needed
        for attempt in range(2):
            # Call API for extraction
            response = call_groq_api(full_prompt, api_key, model, 0.05, is_extraction=True)
            
            if response.startswith("API Error"):
                return response
            
            # Aggressive JSON extraction
            json_content = extract_json_from_response(response)
            
            if json_content:
                # Validate and clean the JSON
                validated_data = ArabicTextProcessor.validate_json_structure(json_content)
                if validated_data:
                    return json.dumps(validated_data, ensure_ascii=False, indent=2)
            
            # If first attempt failed, modify prompt for second attempt
            if attempt == 0:
                full_prompt = f"""
CRITICAL: Return ONLY the JSON object. No explanations, no text before or after.

{ARABIC_LAND_REGISTRY_PROMPT}

{uploaded_file_content}

JSON ONLY:"""
            
        return f"❌ Could not extract valid JSON after multiple attempts. Raw response:\n\n{response}"
            
    except Exception as e:
        return f"❌ Extraction Error: {str(e)}"

def extract_general_info_only(api_key, model, temperature):
    """Extract only general information from Arabic land registry document"""
    global uploaded_file_content
    
    if not uploaded_file_content:
        return "❌ Please upload a document first."
    
    if not api_key.strip():
        return "❌ Please enter your Groq API key."
    
    # Simplified prompt for just general information
    general_prompt = f"""
You are an Arabic document processor. Extract ONLY the general information from this land registry document. Return ONLY the JSON structure. DO NOT add any other text.

INSTRUCTIONS:
- Extract text exactly as it appears.
- If a field is not found or empty, use null.

Extract these specific fields:

{{
  "معرف الرسم العقاري": "Extract the full property registration ID (e.g., '63637 أريانة').",
  "إسم العقار": "Extract the property name (e.g., 'جنان سكرة 16').",
  "المساحة": "Extract the area (e.g., '400 م م').",
  "التجزئة": "Extract the subdivision details (e.g., '400 جزء').",
  "محتوى العقار": "Extract the property content/description (e.g., 'أرض بيضاء').",
  "صبغة العقار": "Extract the property nature/type.",
  "مجموع القطع": "Extract the total number of parcels (e.g., '1').",
  "عدد القطع": "Extract the number of pieces (e.g., '15').",
  "موقع العقار": "Extract the location (e.g., 'سيدي فرج أريانة').",
  "طبيعة الإحداث": "Extract the nature of creation (e.g., 'إستخراج').",
  "عدد الحكم بالتسجيل": "Extract the registration judgment number.",
  "تاريخه": "Extract the date associated with the registration judgment."
}}

DOCUMENT:
{uploaded_file_content}
"""
    
    try:
        # Call API with very low temperature for consistent extraction
        response = call_groq_api(general_prompt, api_key, model, 0.01, is_extraction=True)
        
        if response.startswith("API Error"):
            return response
        
        # Extract JSON from response
        json_content = extract_json_from_response(response)
        
        if json_content:
            try:
                # Validate it's proper JSON
                parsed = json.loads(json_content)
                return json.dumps(parsed, ensure_ascii=False, indent=2)
            except:
                pass
        
        return f"❌ Could not extract valid JSON. Raw response:\n\n{response}"
            
    except Exception as e:
        return f"❌ Extraction Error: {str(e)}"


def handle_file_upload(file):
    global uploaded_file_content, uploaded_file_name, file_type, retriever
    if not file:
        uploaded_file_content = ""
        uploaded_file_name = ""
        file_type = ""
        retriever = None
        return "No file uploaded", "", "📄 No document loaded"
    
    try:
        uploaded_file_name = os.path.basename(file.name)
        content, detected = read_file_content(file.name)
        uploaded_file_content = content
        file_type = detected
        retriever = prepare_rag_context(content)
        
        # Enhanced preview for Arabic content
        preview = content[:1500] + ("\n\n... (truncated)" if len(content) > 1500 else "")
        
        status_message = f"✅ Successfully loaded: {uploaded_file_name} ({len(content):,} characters)"
        file_info = f"📄 Document loaded: {uploaded_file_name} ({file_type}) - Ready for extraction"
        
        return status_message, preview, file_info
        
    except Exception as e:
        return f"❌ Error loading file: {str(e)}", "", "📄 Error loading document"

def groq_interface(prompt, api_key, model, temperature, use_rag):
    if not prompt.strip():
        return "❌ Please enter a prompt."
    
    if not api_key.strip():
        return "❌ Please enter your Groq API key."
    
    if use_rag and not retriever:
        return "❌ Please upload a document to use RAG retrieval."
    
    return call_groq_api(prompt, api_key, model, temperature, retriever if use_rag else None)

if __name__ == "__main__":
    with gr.Blocks(css=custom_css, title="Arabic Land Registry RAG Chatbot") as app:
        # Enhanced Header for Land Registry
        gr.HTML("""
        <div class="app-header land-registry-mode">
            <h1>Arabic Land Registry Document Analyzer</h1>
            <p>Intelligent extraction from Arabic real estate documents with RAG technology</p>
        </div>
        """)
        
        with gr.Row():
            # Left column - File upload and settings
            with gr.Column(scale=1):
                gr.HTML("<div class='settings-section'><h3>📄 Document Upload</h3></div>")
                
                file_upload = gr.File(
                    label="Upload Arabic Land Registry Document",
                    file_types=[".txt", ".pdf", ".docx", ".csv", ".json"],
                    type="filepath",
                    elem_classes=["file-upload"]
                )
                
                file_status = gr.Textbox(
                    label="Upload Status",
                    interactive=False,
                    elem_classes=["status-box"]
                )
                
                preview = gr.Textbox(
                    label="Document Preview",
                    lines=8,
                    interactive=False,
                    elem_classes=["preview-box", "arabic-text"]
                )
                
                gr.HTML("<div class='settings-section'><h3>⚙️ API Settings</h3></div>")
                
                api_key = gr.Textbox(
                    label="Groq API Key",
                    type="password",
                    placeholder="Enter your Groq API key"
                )
                
                model = gr.Dropdown(
                    GROQ_MODELS,
                    value="llama-3.1-70b-versatile",  # Better model for Arabic
                    label="Model"
                )
                
                temp = gr.Slider(
                    minimum=0,
                    maximum=1,
                    step=0.1,
                    value=0.1,
                    label="Temperature (lower for extraction)"
                )
            
            # Right column - Enhanced interface
            with gr.Column(scale=2):
                document_info = gr.Textbox(
                    label="Document Status",
                    value="📄 No document loaded",
                    interactive=False
                )
                
                # Enhanced Land Registry Extraction Section
                gr.HTML("<h3>🏛️ Land Registry Information Extraction</h3>")
                
                with gr.Row():
                    extract_btn = gr.Button(
                        "🔍 Extract Complete Information",
                        variant="primary",
                        elem_classes=["extract-button"],
                        scale=2
                    )
                    
                    extract_general_btn = gr.Button(
                        "📋 Extract General Info Only",
                        variant="secondary",
                        scale=1
                    )
                
                extraction_output = gr.Textbox(
                    label="Extracted Information (JSON)",
                    lines=15,
                    interactive=False,
                    elem_classes=["json-output"]
                )
                
                # Regular Chat Section
                gr.HTML("<h3>💬 Chat with Document</h3>")
                
                use_rag = gr.Checkbox(
                    label="Use RAG Retrieval",
                    value=True
                )
                
                prompt = gr.Textbox(
                    label="Ask a question about your document",
                    lines=3,
                    placeholder="مثال: ما هو معرف الرسم العقاري؟"
                )
                
                response = gr.Textbox(
                    label="AI Response",
                    lines=8,
                    interactive=False,
                    elem_classes=["response-area", "arabic-text"]
                )
                
                send = gr.Button("Send Message", variant="secondary")
        
        # Event handlers
        file_upload.change(
            fn=handle_file_upload,
            inputs=[file_upload],
            outputs=[file_status, preview, document_info]
        )
        
        extract_btn.click(
            fn=extract_land_registry_info,
            inputs=[api_key, model, temp],
            outputs=[extraction_output]
        )
        
        extract_general_btn.click(
            fn=extract_general_info_only,
            inputs=[api_key, model, temp],
            outputs=[extraction_output]
        )
        
        send.click(
            fn=groq_interface,
            inputs=[prompt, api_key, model, temp, use_rag],
            outputs=[response]
        )
        
        prompt.submit(
            fn=groq_interface,
            inputs=[prompt, api_key, model, temp, use_rag],
            outputs=[response]
        )

    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        inbrowser=True,
        share=False
    )


  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://0.0.0.0:7860
* To create a public link, set `share=True` in `launch()`.
* To create a public link, set `share=True` in `launch()`.


  embeddings = HuggingFaceEmbeddings(
