In [2]:

# 2. Install Transformers, Datasets, and PEFT
!pip install transformers==4.57.6 datasets==4.3.0 peft==0.18.1

# 4. Install BitsAndBytes and TRL for 4-bit Quantization and Training
!pip install bitsandbytes==0.49.1 trl==0.27.0

# 5. Install Additional Dependencies for Data Handling
!pip install accelerate==1.12.0 pillow==11.3.0 tqdm==4.67.1 RapidFuzz==3.14.3

!pip install ultralytics==8.4.6

!pip install unsloth

!pip install rapidfuzz



Looking in indexes: https://download.pytorch.org/whl/cu118
[31mERROR: Could not find a version that satisfies the requirement torch==2.8.0 (from versions: 2.2.0+cu118, 2.2.1+cu118, 2.2.2+cu118, 2.3.0+cu118, 2.3.1+cu118, 2.4.0+cu118, 2.4.1+cu118, 2.5.0+cu118, 2.5.1+cu118, 2.6.0+cu118, 2.7.0+cu118, 2.7.1+cu118)[0m[31m
[0m[31mERROR: No matching distribution found for torch==2.8.0[0m[31m
Collecting trl==0.27.0
  Using cached trl-0.27.0-py3-none-any.whl.metadata (11 kB)
Using cached trl-0.27.0-py3-none-any.whl (532 kB)
Installing collected packages: trl
  Attempting uninstall: trl
    Found existing installation: trl 0.24.0
    Uninstalling trl-0.24.0:
      Successfully uninstalled trl-0.24.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
unsloth 2026.1.4 requires trl!=0.19.0,<=0.24.0,>=0.18.2, but you have trl 0.27.0 which is incompatible.
unsloth-zoo

In [7]:
import random
import os
import sys
import json
import time
import re
import logging
import numpy as np
import torch
import ast 
import gc
from rapidfuzz import process, fuzz
from PIL import Image, ImageDraw, ImageFont, ImageEnhance
from ultralytics import YOLO
from unsloth import FastVisionModel

# ==========================================
# CONFIGURATION
# ==========================================
UNSLOTH_MODEL_ID = "unsloth/Qwen3-VL-8B-Instruct-bnb-4bit" 
VISION_MODEL_PATH = "/kaggle/input/vlm-finetune/best.pt"
DB_FILE = "/kaggle/input/vlm-finetune/tractor_db.json"

# OPTIMIZATION: SEPARATE RESOLUTIONS
YOLO_IMG_SIZE = 1024  # High res for detection accuracy
MAX_VLM_SIZE = 640    # Low res for VLM speed (saves tokens)

# ENABLE BOOTSTRAPPING (Double Check)
ENABLE_BOOTSTRAP = True 

BASE_OUTPUT_DIR = "hybrid_output_preds"
DIRS = {
    "crops": os.path.join(BASE_OUTPUT_DIR, "crops"),
    # New folder for debug VLM inputs to verify Preprocessing/Anchoring
    "vlm_debug": os.path.join(BASE_OUTPUT_DIR, "vlm_debug"),
    "visuals": os.path.join(BASE_OUTPUT_DIR, "visuals"),
    "logs": os.path.join(BASE_OUTPUT_DIR, "logs")
}
    
KNOWN_BRANDS = [
    "mahindra", "swaraj", "sonalika", "massey ferguson", "mf", "tafe", 
    "escorts", "john deere", "eicher", "new holland", "kubota", "farmtrac", 
    "powertrac", "captain tractors", "force motors", "preet tractors", 
    "indo farm", "same deutz fahr", "ace", "vst shakti", "solis", "hav", 
    "autonxt", "cellestial", "trakstar", "maxgreen", "marut", "sukoon", 
    "montra", "hindustan", "kartar", "field marshall", "ford", "hmt", 
    "mahindra gujarat", "vst", "force", "captain"
]

# ==========================================
# 1. SETUP & LOADERS
# ==========================================
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
setup_directories = lambda: [os.makedirs(d, exist_ok=True) for d in DIRS.values()]
setup_directories()

# DB Loading Removed as per request

print(f"[INIT] Loading YOLO...", file=sys.stderr)
try:
    vision_model = YOLO(VISION_MODEL_PATH)
except:
    sys.exit(1)

print(f"[INIT] Loading Unsloth Qwen...", file=sys.stderr)
try:
    model, tokenizer = FastVisionModel.from_pretrained(
        UNSLOTH_MODEL_ID,
        load_in_4bit=True,
        use_gradient_checkpointing="unsloth", 
    )
    FastVisionModel.for_inference(model)
    print("‚úÖ Model loaded!", file=sys.stderr)
except Exception as e:
    sys.exit(1)


# ==========================================
# 2.5. WARMUP ROUTINE
# ==========================================
def warmup_pipeline():
    """
    Runs a dummy inference to 'burn in' CUDA kernels and allocate buffers.
    This prevents the 'Double Time/RAM' spike on the first real image.
    """
    print("[WARMUP] initializing GPU kernels...", file=sys.stderr)
    try:
        # Dummy Black Image
        vision_model.predict(Image.new('RGB', (64, 64), color='black'), verbose=False)
        
        # 2. Warmup Qwen (Minimal)
        dummy_img = Image.new('RGB', (64, 64), color='black')
        messages = [{"role": "user", "content": [{"type": "image", "image": dummy_img}, {"type": "text", "text": "test"}]}]
        text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
        inputs = tokenizer(images=[dummy_img], text=[text], padding=True, return_tensors="pt").to("cuda")
        with torch.no_grad():
            model.generate(**inputs, max_new_tokens=1)
            
        print("‚úÖ [WARMUP] System Ready!", file=sys.stderr)
    except Exception as e:
        print(f"‚ö†Ô∏è Warmup skipped: {e}", file=sys.stderr)

# RUN WARMUP (Now that function is defined and models are loaded)
warmup_pipeline()

def validate_extraction(data):
    """
    Programmatic Guardrails. Returns (is_valid, reasons).
    """
    errors = []
    
    # 1. HP Check
    try:
        hp = float(str(data.get("horse_power", 0)).replace('HP','').strip())
        if hp < 10 or hp > 200:
            errors.append(f"HP {hp} is out of realistic range (10-200).")
    except:
        pass # If not numeric, VLM might have output text, which Pass 2 catches.

    # 2. Cost Check
    try:
        cost = float(str(data.get("asset_cost", 0)).replace(',','').strip())
        if cost < 20000 or cost > 2000000:
             if cost > 0: errors.append(f"Asset Cost {cost} seems invalid (Strict Range: 20k - 20L).")
        elif cost < 100000:
             # User Request: Flag < 1L for verification by Pass 2, but don't reject outright if Pass 2 confirms.
             errors.append(f"FLAG: Asset Cost {cost} is LOW (< 1L). Verify.")
    except:
        pass

    # 3. Model Name "Brand Pollution" Check - AUTO FIX
    m_name = str(data.get("model_name", ""))
    m_name_lower = m_name.lower()
    for brand in KNOWN_BRANDS:
        if brand in m_name_lower:
            # User Request: Strip programmatically without calling Supervisor
            # Case-insensitive replace is tricky, so simplified approach:
            pattern = re.compile(re.escape(brand), re.IGNORECASE)
            clean_name = pattern.sub("", m_name).strip()
            # Update data IN-PLACE
            data["model_name"] = clean_name
            # No error appended, so we don't trigger Pass 2 just for this!
            break

    # 4. HP Pollution Check - AUTO FIX
    # Strip "50 HP", "47 H.P" (2 digits only) from model name if present
    # Prevents removing "575" (Model) while removing "47 HP" (Power)
    m_name = str(data.get("model_name", ""))
    
    # 4a. Strip Special Characters (!@#$%^&*())
    m_name = re.sub(r'[!@#$%^&*()]', '', m_name).strip()
    
    # 4b. Strip HP Pattern (Suffix "42 HP" OR Prefix "HP 42")
    hp_pattern = re.compile(r'(\b\d{2}\s*H\.?P\.?\b)|(\bH\.?P\.?\s*\d{2}\b)', re.IGNORECASE)
    if hp_pattern.search(m_name):
        clean_name = hp_pattern.sub("", m_name).strip()
        data["model_name"] = clean_name
    else:
        # If no HP stripping happened but we stripped special chars, update it
        data["model_name"] = m_name

    return (len(errors) == 0), errors

def final_clean_model_name(model_name):
    """
    Safety net regex to strip brand names if LLM fails 2 times.
    Preserves original case, removes brands/HP/Special Chars.
    """
    if not model_name: return ""
    
    # Start with original model_name (NO .lower())
    clean_name = model_name
    
    # 1. Strip Brands (Case Insensitive)
    for brand in KNOWN_BRANDS:
        pattern = re.compile(re.escape(brand), re.IGNORECASE)
        clean_name = pattern.sub("", clean_name).strip()
        
    # 2. Strip Special Chars
    clean_name = re.sub(r'[!@#$%^&*()]', '', clean_name).strip()
        
    # 3. Strip HP patterns (Strictly 2 digits, Prefix or Suffix)
    hp_pattern = re.compile(r'(\b\d{2}\s*H\.?P\.?\b)|(\bH\.?P\.?\s*\d{2}\b)', re.IGNORECASE)
    clean_name = hp_pattern.sub("", clean_name).strip()
    
    return clean_name

def draw_yolo_visuals(image_path, detections, filename_root):
    try:
        img = Image.open(image_path).convert("RGB")
        draw = ImageDraw.Draw(img)
        try:
            font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 20)
        except:
            font = ImageFont.load_default()

        colors = {"header": "blue", "detail": "green", "stamp": "orange", "signature": "red"}
        
        for key, bbox in detections.items():
            if bbox:
                color = colors.get(key, "red")
                draw.rectangle(bbox, outline=color, width=4)
                text = key.upper()
                draw.text((bbox[0], max(0, bbox[1]-20)), text, fill=color, font=font)
        
        save_path = os.path.join(DIRS["visuals"], f"{filename_root}_yolo_viz.png")
        img.save(save_path)
        return save_path
    except:
        return None

def detect_objects_yolo(image_path):
    try:
        results = vision_model.predict(image_path, conf=0.10, iou=0.45, imgsz=YOLO_IMG_SIZE, verbose=False)
        detections = {"header": None, "detail": None, "stamp": None, "signature": None}
        
        for r in results:
            for box in r.boxes:
                cls_id = int(box.cls[0])
                cls_name = vision_model.names[cls_id].lower()
                
                key = None
                if "header" in cls_name: key = "header"
                elif "detail" in cls_name: key = "detail"
                elif "stamp" in cls_name: key = "stamp"
                elif "signature" in cls_name: key = "signature"
                
                if key:
                    conf = float(box.conf[0])
                    coords = [int(x) for x in box.xyxy[0].tolist()]
                    if detections[key] is None or conf > 0.5: 
                        detections[key] = coords 
        return detections
    except:
        return {}

# Smart Lookup Removed - RAG disabled

def save_log(filename_root, log_content):
    path = os.path.join(DIRS["logs"], f"{filename_root}_log.txt")
    with open(path, "w", encoding="utf-8") as f:
        f.write(log_content)

# ==========================================
# 3. UNSLOTH INFERENCE
# ==========================================
def run_qwen_inference(pil_images, prompt_text):
    """
    Accepts LIST of PIL IMAGES.
    """
    torch.cuda.empty_cache()
    gc.collect()
    
    try:
        messages = [{
            "role": "user",
            "content": [{"type": "image", "image": img} for img in pil_images] + 
                       [{"type": "text", "text": prompt_text}]
        }]

        # Resize for VLM speed if needed
        final_images = []
        for img in pil_images:
            if max(img.size) > MAX_VLM_SIZE:
                ratio = MAX_VLM_SIZE / max(img.size)
                new_size = (int(img.width * ratio), int(img.height * ratio))
                img = img.resize(new_size, Image.Resampling.LANCZOS)
            final_images.append(img)

        text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
        inputs = tokenizer(images=final_images, text=[text], padding=True, return_tensors="pt").to("cuda")
        
        outputs = model.generate(
            **inputs, 
            max_new_tokens=100, 
            temperature=0.01, 
            do_sample=False,
            use_cache=True
        )
        
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, outputs)
        ]
        out_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        
        # --- ROBUST PARSING ---
        json_data = {}
        json_str = out_text.strip()
        
        if "```json" in json_str: json_str = json_str.split("```json")[1].split("```")[0]
        elif "```" in json_str: json_str = json_str.split("```")[1].split("```")[0]
        
        start = json_str.find('{')
        if start != -1:
            json_str = json_str[start:]
            end = json_str.rfind('}')
            if end == -1:
                json_str += '"}' 
                try: json.loads(json_str)
                except: json_str = json_str[:-2] + "}" 
            else:
                json_str = json_str[:end+1]

            try:
                json_data = json.loads(json_str)
            except:
                try: json_data = ast.literal_eval(json_str)
                except: json_data = {}
        
        return json_data, out_text
        
    except Exception as e:
        print(f"Inf Error: {e}", file=sys.stderr)
        return {}, ""

# ==========================================
# 4. MAIN PIPELINE (THE GATEKEEPER)
# ==========================================
def process_invoice(image_path):
    start_time = time.perf_counter()
    filename = os.path.basename(image_path)
    root = os.path.splitext(filename)[0]
    
    # 1. YOLO
    detections = detect_objects_yolo(image_path)
    viz_path = draw_yolo_visuals(image_path, detections, root) 
    
    # 2. Image Prep
    pil_img = Image.open(image_path).convert("RGB")
    
    # --- VISUAL ANCHORING: Draw Header Box on Full Image ---
    anchored_full_img = pil_img.copy()
    header_bbox = detections.get("header")
    
    if header_bbox:
        draw = ImageDraw.Draw(anchored_full_img)
        # RED BOX for "Look Here"
        draw.rectangle(header_bbox, outline="red", width=5)
    
    # Save for Debugging
    anchored_full_img.save(os.path.join(DIRS["vlm_debug"], f"{root}_vlm_input_full.png"))
    
    # VLM Input List
    vlm_images = [anchored_full_img]

    # Detail Crop Preparation
    key = "detail"
    has_detail = False
    
    if detections[key]:
        x1, y1, x2, y2 = detections[key]
        detail_crop = pil_img.crop((max(0, x1-10), max(0, y1-10), min(pil_img.width, x2+10), min(pil_img.height, y2+10)))
        
        detail_crop.save(os.path.join(DIRS["vlm_debug"], f"{root}_vlm_input_detail.png"))
        # Optimization 4: Grayscale to reduce noise/tokens
        vlm_images.append(detail_crop.convert("L"))
        has_detail = True
    
    brand_string = ", ".join([b.capitalize() for b in KNOWN_BRANDS])
    
    # Pre-compute Dynamic Prompt Strings based on available images
    if has_detail:
        p_img2_desc = "- IMAGE 2 (MODEL ZOOM): LOOK HERE for the MODEL NAME, and HP."
        p_rule2_loc = "IMAGE 2 (Model Zoom)"
        p_rule3_loc = "IMAGE 2"
        p_img_list = "1. Full Invoice (With RED BOX hint)\n2. Model/Detail Zoom"
        p_verify_img2 = "- CHECK IMAGE 2 (Model Zoom) for the Model Name/HP."
    else:
        p_img2_desc = ""
        p_rule2_loc = "IMAGE 1 (Full Invoice)"
        p_rule3_loc = "IMAGE 1"
        p_img_list = "1. Full Invoice (With RED BOX hint FOR HEADER) look for MODEL NAME AND HP IN THE FULL IMAGE"
        p_verify_img2 = ""


    # =========================================================================
    # PASS 1: JUNIOR ANALYST (Structured Anchoring)
    # =========================================================================
    SYSTEM_PROMPT_1 = f"""You are an experienced Invoice Analyst. Extract fields into JSON.

[STRUCTURED ANCHORING]
You have {len(vlm_images)} images. Use them as follows:
- IMAGE 1 (FULL INVOICE): I have drawn a APPROX RED BOX around the Header. LOOK INSIDE THE RED BOX for the DEALER NAME AND STRICTLY OUTPUT VERNACULAR DEALER NAME IF PRESENT DONOT TRANSLITERATE.
 Also observe the OVERALL STRUCTURE LOOK FOR POTENTIAL MODEL NAME AND HP.
{p_img2_desc}

RULES:
1. dealer_name: The Business Name at the top. [use the RED BOX in IMAGE 1.] 
   - If the header is in Hindi/Vernacular (e.g. '‡§ï‡§ø‡§∏‡§æ‡§® ‡§ü‡•ç‡§∞‡•à‡§ï‡•ç‡§ü‡§∞‡•ç‡§∏' , '‡≤≤‡≤ï‡≥ç‡≤∑‡≥ç‡≤Æ‡≤ø ‡≤ü‡≥ç‡≤∞‡≥á‡≤°‡≤∞‡≥ç‡≤∏‡≥ç' , ‡™Ö‡™Æ‡™® ‡™ü‡´ç‡™∞‡´á‡™ï‡´ç‡™ü‡™∞‡´ç‡™∏) Give the exact name as output.
   - If you see a lot of text in languages other than ENGLISH, Search the Header name in the other language.
   - Make sure you do not confuse Dealer name with company names.

2. model_name: Exact Model.
    - Check for suffixes like 'DI', 'RX', 'PLUS', 'XP', 'SUPER', 'PRO' , 'TECH' , 'MAX'.
    - Look for MODEL NAMES near company names like: {brand_string}.
    - EXAMPLE: IF 'Mahindra 575 DI' -> OUTPUT '575 DI'. IF 'Swaraj 744 FE' -> OUTPUT '744 FE'.
    - STRICTLY REMOVE THE BRAND NAME.
    - If ticked in a list, select the ticked row.
    - Model Name Should be in ENGLISH. If you see in any other language TRANSLITERATE. 

3. horse_power: Numeric HP.[STRICT RANGE - (10-200)] 
      - LOOK for fields like e.g "HP : 48" , "55 HP" , "HP = 39"
      - DO NOT infer from MODEL NAME

4. asset_cost: Total Amount (Numeric) [STRICT RANGE - [20,000 - 20,00,000] [If you see many USUALLY consider the highest one].

IMPORTANT - ANALYST NOTES:
- Briefly mention where you found the ASSET COST , Model Name and HP. (e.g., "Found Model 575 DI in Header", "HP inferred is explicitly hand written" ,).
- IF YOU ARE UNSURE about any field, START THE NOTE WITH "FLAG:" followed by the reason.
- IF text is blurry or ambiguous, START THE NOTE WITH "FLAG:".
- DO NOT to INFER HP from MODEL NAME. IF you are INFERRING , then 'FLAG' : HP is ambiguous
- IF ASSET COST IS lower than 100000 STRICTLY FLAG : 'ASSET COST IS LOW , VERIFY'.

OUTPUT FORMAT:
{{
  "dealer_name": "...", [STRICTLY DONOT TRANSLITERATE TO ENGLISH]
  "model_name": "...",  [STRICTLY DONOT PUT company_names/H.P AT THE FRONT]
  "horse_power": "...", [STRICT RANGE - (10 - 150) ]
  "asset_cost": "...", [STRICT RANGE - [50,000 - 20,00,000]
  "analyst_notes": "Found Model in Header. FLAG: HP is ambiguous..." [BE CRISP AND CLEAR DONOT EXPLAIN]
}}"""

    PROMPT_TEXT_1 = f"""{SYSTEM_PROMPT_1}

You are provided with {len(vlm_images)} images: 
{p_img_list}

Analyze them using the Anchoring rules above.
JSON OUTPUT:"""
    
    # We pass PIL images directly now, not paths
    data_pass1, raw_text_1 = run_qwen_inference(vlm_images, PROMPT_TEXT_1)

    print("\n" + "="*40, file=sys.stderr)
    print(f"üïµÔ∏è [DEBUG] JUNIOR ANALYST RAW:\n{raw_text_1}", file=sys.stderr)
    print("="*40 + "\n", file=sys.stderr)
    
    # =========================================================================
    # THE GATEKEEPER 
    # =========================================================================
    notes = str(data_pass1.get("analyst_notes", "")).lower().strip()
    
    # --- PROGRAMMATIC VALIDATION ---
    is_valid_data, validation_errors = validate_extraction(data_pass1)
    
    # Validation Failures = AUTO FAIL
    if not is_valid_data:
         print(f"‚ö†Ô∏è [GATEKEEPER] Validation Failed: {validation_errors}", file=sys.stderr)
         is_confident = False
         junior_notes = notes + " | SYSTEM ALERTS: " + "; ".join(validation_errors)
    else:
        # Keyword Scan for Uncertainty
        danger_words = ["flag:", "unsure", "unclear", "guess", "ambiguous", "illegible", "blur"]
        has_danger = any(w in notes for w in danger_words)
        
        # Missing Critical Fields check
        missing_fields = []
        if not data_pass1.get("dealer_name"): missing_fields.append("dealer_name")
        if not data_pass1.get("model_name"): missing_fields.append("model_name")
        
        if has_danger:
            print(f"‚ö†Ô∏è [GATEKEEPER] Uncertainty detected in notes: '{notes}'", file=sys.stderr)
            is_confident = False
            junior_notes = notes
        elif missing_fields:
            print(f"‚ö†Ô∏è [GATEKEEPER] Missing Critical Fields: {missing_fields}", file=sys.stderr)
            is_confident = False
            junior_notes = f"Missing fields: {missing_fields}. {notes}"
        else:
            is_confident = True
            junior_notes = notes
    
    if not is_confident and data_pass1:
        # =====================================================================
        # PASS 2: SENIOR SUPERVISOR
        # =====================================================================
        # Send same images
        locked_dealer = data_pass1.get("dealer_name")

        # Prepare summary of Pass 1 for Supervisor to respect
        p1_summary = json.dumps({k:v for k,v in data_pass1.items() if k in ['dealer_name', 'model_name', 'horse_power', 'asset_cost']}, ensure_ascii=False)

        PROMPT_TEXT_2 = f"""You are a Senior Supervisor. Your job is to FIX ERRORS flagged by the system, but PRESERVE what is already correct.

[JUNIOR ANALYST FINDINGS]
The first analyst found this:
{p1_summary}

[DETECTED ISSUES]
The Analyst detected these specific findings above:
"{junior_notes}"

[YOUR ORDERS]
1. FIX THE FLAGGED ISSUES IMMEDIATELY:
   - If "HP out of range", FIND THE REAL HP. **LOOK AROUND THE MODEL NAME** in the Detail/Model Zoom image! It is often written nearby (e.g. "47 HP").
2. VERIFY DATA: Double check spelling and digits against the images.
   - CHECK IMAGE 1 (Red Box) for Dealer accuracy.
   {p_verify_img2}
5. FINAL OUTPUT: Must be valid JSON.

OUTPUT FORMAT:
JSON: {{
  "audit_check": "FIXED",
  "dealer_name": "Correct Dealer Name", [STRICTLY DONOT TRANSLITERATE TO ENGLISH]
  "model_name": "Correct Model Name", [LOOK AROUND THE MODEL NAME IN THE DETAIL/MODEL ZOOM IMAGE FOR MORE CLUE]
  "horse_power": [Numeric] [MUST BE INTEGER 10-150. NEVER > 150]
  "asset_cost": [Numeric] [STRICT RANGE - [50,000 - 20,00,000]] LOOK FOR things like "total cost" , 'Net Amount' etc.  
  "confidence_score": [Numeric] [MUST BE BETWEEN 0.90 - 1.00]
}}"""
        
        data_final, raw_text_2 = run_qwen_inference(vlm_images, PROMPT_TEXT_2)

        print("\n" + "="*40, file=sys.stderr)
        print(f"üë®‚Äçüè´ [DEBUG] SENIOR OUTPUT:\n{raw_text_2}", file=sys.stderr)
        print("="*40 + "\n", file=sys.stderr)
        
        if not data_final.get("dealer_name"): 
             data_final["dealer_name"] = locked_dealer
            
    else:
        print(f"‚ö° [DEBUG] VALIDATION PASSED & NO FLAGS - SKIPPING PASS 2", file=sys.stderr)
        data_final = data_pass1
        raw_text_2 = "Skipped (High Confidence)"

    # 5. Finalize
    # Programmatic cleaning only (No DB Lookup)
    f_model = final_clean_model_name(str(data_final.get("model_name")))
    
    # FINAL SAFETY NET FOR HP
    try: 
        f_hp = int(data_final.get("horse_power"))
        # If HP is still insane (e.g. 245), clamp or guess based on first 2 digits
        if f_hp > 100:
            # Heuristic: 245 -> 24? 4710 -> 47?
            # Safe logic: If > 100, try taking first 2 digits
            s_hp = str(f_hp)
            if len(s_hp) >= 2:
                new_hp = int(s_hp[:2])
                if 10 <= new_hp <= 90:
                    f_hp = new_hp
            
            # If still invalid, cap at 60 (common max for tractors in this dataset) 
            # or set to 0 to indicate failure
            if f_hp > 90: f_hp = 0 
    except: 
        f_hp = data_final.get("horse_power")
    
    # Calculate costs
    elapsed = round(time.perf_counter() - start_time, 2)
    t4_cost_per_sec = 0.00006  # Estimated T4 cloud cost
    job_cost = round(elapsed * t4_cost_per_sec, 6)
    

    # User Request: Junior confidence is useless. Only use Senior's if available.
    # If Pass 2 was skipped, it means we are confident -> 1.0
    try: 
        if "confidence_score" in data_final:
            final_conf = float(data_final["confidence_score"])
        else:
            final_conf = 1.0 # Implicitly confident if we skipped Pass 2
    except: final_conf = 1.0

    result = {
        "doc_id": root,
        "fields": {
            "dealer_name": data_final.get("dealer_name"),
            "model_name": f_model,
            "horse_power": f_hp,
            "asset_cost": data_final.get("asset_cost"),
             "signature": {"present": True if detections.get("signature") else False, "bbox": detections.get("signature")},
             "stamp": {"present": True if detections.get("stamp") else False, "bbox": detections.get("stamp")}
        },
        "confidence": final_conf,
        "processing_time_sec": elapsed,
        "cost_estimate_usd": job_cost
    }
    
    full_log = f"=== PASS 1 ===\n{raw_text_1}\n\n=== PASS 2 ===\n{raw_text_2}\n\n=== FINAL ===\n{json.dumps(result, indent=2, ensure_ascii=False)}"
    save_log(root, full_log)
    
    return result

if __name__ == "__main__":
    TARGET_DIR = "/kaggle/input/idfc-data/train"
    all_imgs = [f for f in os.listdir(TARGET_DIR) if f.endswith(('.png', '.jpg'))]
    random.shuffle(all_imgs)
    
    print(f"Processing {len(all_imgs[:8])} images...")
    
    for img in all_imgs[:8]:
        print(f"\n--- {img} ---")
        try:
            res = process_invoice(os.path.join(TARGET_DIR, img))
            print(json.dumps(res, indent=2, ensure_ascii=False))
        except Exception as e:
            print(f"Error: {e}")


[INIT] Loading YOLO...
[INIT] Loading Unsloth Qwen...


==((====))==  Unsloth 2026.1.4: Fast Qwen3_Vl patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

‚úÖ Model loaded!
[WARMUP] initializing GPU kernels...
‚úÖ [WARMUP] System Ready!


Processing 8 images...

--- 172936612_3_pg1.png ---



üïµÔ∏è [DEBUG] JUNIOR ANALYST RAW:
{
  "dealer_name": "THE ORISSA AGRO INDUSTRIES CORPORATION LTD.",
  "model_name": "45 F25",
  "horse_power": 49,
  "asset_cost": 873999,
  "analyst_notes": "Found Model 45 F25 in IMAGE 2. HP 49 is explicitly handwritten in IMAGE 2. Asset Cost found in Total Rs. column of IMAGE 1."
}

‚ö° [DEBUG] VALIDATION PASSED & NO FLAGS - SKIPPING PASS 2


{
  "doc_id": "172936612_3_pg1",
  "fields": {
    "dealer_name": "THE ORISSA AGRO INDUSTRIES CORPORATION LTD.",
    "model_name": "45 F25",
    "horse_power": 49,
    "asset_cost": 873999,
    "signature": {
      "present": true,
      "bbox": [
        853,
        1169,
        1201,
        1263
      ]
    },
    "stamp": {
      "present": true,
      "bbox": [
        883,
        1260,
        1181,
        1364
      ]
    }
  },
  "confidence": 1.0,
  "processing_time_sec": 16.42,
  "cost_estimate_usd": 0.000985
}

--- 172679320_3_pg18.png ---



üïµÔ∏è [DEBUG] JUNIOR ANALYST RAW:
{
  "dealer_name": "JAY BHAGWATI TRACTORS",
  "model_name": "STEELTRAC 18",
  "horse_power": 18,
  "asset_cost": 373400,
  "analyst_notes": "Found Model Name and HP in IMAGE 2. Found Asset Cost in Sub Total row of IMAGE 1."
}

‚ö° [DEBUG] VALIDATION PASSED & NO FLAGS - SKIPPING PASS 2


{
  "doc_id": "172679320_3_pg18",
  "fields": {
    "dealer_name": "JAY BHAGWATI TRACTORS",
    "model_name": "STEELTRAC 18",
    "horse_power": 18,
    "asset_cost": 373400,
    "signature": {
      "present": true,
      "bbox": [
        929,
        1135,
        1097,
        1228
      ]
    },
    "stamp": {
      "present": true,
      "bbox": [
        866,
        1123,
        1168,
        1241
      ]
    }
  },
  "confidence": 1.0,
  "processing_time_sec": 14.55,
  "cost_estimate_usd": 0.000873
}

--- 173679851_1_pg30.png ---



üïµÔ∏è [DEBUG] JUNIOR ANALYST RAW:
{
  "dealer_name": "BALAJI TRACTORS",
  "model_name": "445 III",
  "horse_power": 50,
  "asset_cost": 765000,
  "analyst_notes": "Found dealer name in RED BOX of IMAGE 1. Found Model 445 III in IMAGE 2. HP 50 found explicitly in IMAGE 2. Asset Cost found in Total line of IMAGE 1."
}

‚ö° [DEBUG] VALIDATION PASSED & NO FLAGS - SKIPPING PASS 2


{
  "doc_id": "173679851_1_pg30",
  "fields": {
    "dealer_name": "BALAJI TRACTORS",
    "model_name": "445 III",
    "horse_power": 50,
    "asset_cost": 765000,
    "signature": {
      "present": true,
      "bbox": [
        803,
        1222,
        953,
        1280
      ]
    },
    "stamp": {
      "present": false,
      "bbox": null
    }
  },
  "confidence": 1.0,
  "processing_time_sec": 16.16,
  "cost_estimate_usd": 0.00097
}

--- 172658339_1_pg46.png ---



üïµÔ∏è [DEBUG] JUNIOR ANALYST RAW:
{
  "dealer_name": "M/s. HIREMATH TRACTORS",
  "model_name": "480",
  "horse_power": 48,
  "asset_cost": 350000,
  "analyst_notes": "Found Model 480 in Model Zoom Image 2. HP 48 inferred from 'EICHER - 480 - 48 HP' in Image 2. Asset Cost found in

‚ö° [DEBUG] VALIDATION PASSED & NO FLAGS - SKIPPING PASS 2


{
  "doc_id": "172658339_1_pg46",
  "fields": {
    "dealer_name": "M/s. HIREMATH TRACTORS",
    "model_name": "480",
    "horse_power": 48,
    "asset_cost": 350000,
    "signature": {
      "present": true,
      "bbox": [
        852,
        1341,
        1049,
        1443
      ]
    },
    "stamp": {
      "present": true,
      "bbox": [
        764,
        1285,
        940,
        1457
      ]
    }
  },
  "confidence": 1.0,
  "processing_time_sec": 16.64,
  "cost_estimate_usd": 0.000998
}

--- 90018694760_175394784_2_pg34.png ---



üïµÔ∏è [DEBUG] JUNIOR ANALYST RAW:
{
  "dealer_name": "EICHER TRACTORS",
  "model_name": "365",
  "horse_power": 38,
  "asset_cost": 650000,
  "analyst_notes": "Found Model 365 in Model Zoom. HP 38 found in Model Zoom. ASSET COST found in Total amount Rs. 650,000."
}

‚ö° [DEBUG] VALIDATION PASSED & NO FLAGS - SKIPPING PASS 2


{
  "doc_id": "90018694760_175394784_2_pg34",
  "fields": {
    "dealer_name": "EICHER TRACTORS",
    "model_name": "365",
    "horse_power": 38,
    "asset_cost": 650000,
    "signature": {
      "present": true,
      "bbox": [
        917,
        1289,
        1082,
        1401
      ]
    },
    "stamp": {
      "present": true,
      "bbox": [
        877,
        1245,
        1063,
        1419
      ]
    }
  },
  "confidence": 1.0,
  "processing_time_sec": 15.37,
  "cost_estimate_usd": 0.000922
}

--- 90019664623_OTHERS_v1_pg1.png ---



üïµÔ∏è [DEBUG] JUNIOR ANALYST RAW:
{
  "dealer_name": "SHREE SAI AUTOMOBILES",
  "model_name": "PowerMaxx",
  "horse_power": 55,
  "asset_cost": 750000,
  "analyst_notes": "Found Model Name and HP in IMAGE 2. Found Asset Cost in handwritten value section of IMAGE 1."
}

‚ö° [DEBUG] VALIDATION PASSED & NO FLAGS - SKIPPING PASS 2


{
  "doc_id": "90019664623_OTHERS_v1_pg1",
  "fields": {
    "dealer_name": "SHREE SAI AUTOMOBILES",
    "model_name": "PowerMaxx",
    "horse_power": 55,
    "asset_cost": 750000,
    "signature": {
      "present": true,
      "bbox": [
        828,
        1337,
        1020,
        1429
      ]
    },
    "stamp": {
      "present": true,
      "bbox": [
        796,
        1288,
        994,
        1483
      ]
    }
  },
  "confidence": 1.0,
  "processing_time_sec": 13.38,
  "cost_estimate_usd": 0.000803
}

--- 173134085_1_pg14.png ---



üïµÔ∏è [DEBUG] JUNIOR ANALYST RAW:
{
  "dealer_name": "M/s. SHRI RAM TRACTORS",
  "model_name": "DIGITRAC PP 46i",
  "horse_power": 50,
  "asset_cost": 890000,
  "analyst_notes": "Found Model in IMAGE 2. HP found in IMAGE 2. ASSET COST found in the 'Total Value' column in IMAGE 1."
}

‚ö° [DEBUG] VALIDATION PASSED & NO FLAGS - SKIPPING PASS 2


{
  "doc_id": "173134085_1_pg14",
  "fields": {
    "dealer_name": "M/s. SHRI RAM TRACTORS",
    "model_name": "DIGITRAC PP 46i",
    "horse_power": 50,
    "asset_cost": 890000,
    "signature": {
      "present": true,
      "bbox": [
        718,
        1349,
        869,
        1407
      ]
    },
    "stamp": {
      "present": true,
      "bbox": [
        711,
        1358,
        996,
        1403
      ]
    }
  },
  "confidence": 1.0,
  "processing_time_sec": 15.52,
  "cost_estimate_usd": 0.000931
}

--- 172863544_2_pg20.png ---
{
  "doc_id": "172863544_2_pg20",
  "fields": {
    "dealer_name": "SABAR AGROTECH",
    "model_name": "241 DE C42 HP",
    "horse_power": 42,
    "asset_cost": 830000,
    "signature": {
      "present": true,
      "bbox": [
        911,
        1262,
        1144,
        1379
      ]
    },
    "stamp": {
      "present": true,
      "bbox": [
        932,
        1217,
        1149,
        1406
      ]
    }
  },
  "confidence": 1.0,
  "proce


üïµÔ∏è [DEBUG] JUNIOR ANALYST RAW:
{
  "dealer_name": "SABAR AGROTECH",
  "model_name": "241 DE C42 HP",
  "horse_power": 42,
  "asset_cost": 830000,
  "analyst_notes": "Found Model 241 DE C42 HP in IMAGE 2. HP is explicitly mentioned as 'C42 HP' in the model name. Asset Cost found in TOTAL row of the invoice

‚ö° [DEBUG] VALIDATION PASSED & NO FLAGS - SKIPPING PASS 2


In [5]:
import shutil
import os
from IPython.display import FileLink

# Define the directory to zip and the output ZIP file name
output_dir = "/kaggle/working/hybrid_output_preds"  # Replace with your folder path
zip_name = "my_folder_archive"  # Output ZIP file name (without .zip extension)

# Create the ZIP file
if os.path.exists(output_dir):
    shutil.make_archive(zip_name, 'zip', output_dir)
    print(f"[Success]: ZIP file '{zip_name}.zip' created successfully.")
else:
    print(f"[Error]: Directory '{output_dir}' does not exist.")   

[Success]: ZIP file 'my_folder_archive.zip' created successfully.


In [None]:
import shutil
import os

# 1. Define Output Folder
save_path = "offline_model_pack"
if os.path.exists(save_path):
    shutil.rmtree(save_path) # Clean start
os.makedirs(save_path, exist_ok=True)

print("üì¶ Saving Model & Tokenizer...")
# 'model' and 'tokenizer' are variables from your running main_final script

# Save Model (Weights + Config + Generation Config)
model.save_pretrained(save_path)

# Save Tokenizer (Vocab + Special Tokens + Chat Template + Image Processor Config)
tokenizer.save_pretrained(save_path)

# Double check: Qwen-VL sometimes has a specific processor config attached
if hasattr(model, "preprocessor"):
    try:
        model.preprocessor.save_pretrained(save_path)
        print("   - Saved Preprocessor")
    except: pass

print("‚úÖ Model Artifacts Saved.")

# 2. ZIP IT UP for Easy Download
print("üóúÔ∏è Zipping everything...")
shutil.make_archive("final_offline_model", 'zip', save_path)

print(f"\nüéâ DONE! Download file: 'final_offline_model.zip' from Output.")