In [31]:
import os
import pickle
import numpy as np
import cv2
from pathlib import Path

# ----------------------------------------------------
# FIXED PATHS  (Correct: D:/Desktop/insurance-claim-checker/data)
# ----------------------------------------------------
BASE = Path("D:/Desktop/insurance-claim-checker")
DATA_DIR = BASE / "data"

OCR_PATH = DATA_DIR / "ocr_output.pkl"
NLP_PATH = DATA_DIR / "nlp_output.pkl"

print("BASE:", BASE)
print("DATA_DIR:", DATA_DIR)
print("OCR:", OCR_PATH)
print("NLP:", NLP_PATH)

BASE: D:\Desktop\insurance-claim-checker
DATA_DIR: D:\Desktop\insurance-claim-checker\data
OCR: D:\Desktop\insurance-claim-checker\data\ocr_output.pkl
NLP: D:\Desktop\insurance-claim-checker\data\nlp_output.pkl


In [32]:
with open(OCR_PATH, "rb") as f:
    ocr_data = pickle.load(f)

# ---------- Load NLP ----------
with open(NLP_PATH, "rb") as f:
    nlp_data = pickle.load(f)

print("Loaded OCR entries:", len(ocr_data))
print("Loaded NLP entries:", len(nlp_data))

Loaded OCR entries: 2
Loaded NLP entries: 2


In [33]:
from models.cnn_model import predict_image_forgery
from models.ml_model import predict_risk
from models.multimodal import fuse_features

print("Models loaded successfully.")

Models loaded successfully.


In [34]:
results = []

for filename, ocr_entry in ocr_data.items():
    print("\nProcessing:", filename)

    clean_text = ocr_entry.get("text", "")

    # NLP entry
    nlp_entry = nlp_data.get(filename, {})

    embedding = nlp_entry.get("embedding", None)
    if embedding is None:
        print("Skipping (no embedding):", filename)
        continue

    fields = nlp_entry.get("fields", {})
    analysis = nlp_entry.get("analysis", {})

    # Image path
    img_path = DATA_DIR / "bills" / filename
    if not img_path.exists():
        print("Skipping - Missing image:", img_path)
        continue

    # ---------- MULTIMODAL FUSION ----------
    fusion = fuse_features(clean_text, str(img_path))

    results.append({
        "filename": filename,
        "clean_text": clean_text,
        "embedding": embedding,
        "fields": fields,
        "analysis": analysis,

        # --- from fusion ----
        "fraud_score": fusion["fraud_score"],
        "tamper_score": fusion["tamper_score"],
        "forensic_score": fusion["forensic_score"],
        "risk_label": fusion["risk_label"],
        "nlp": fusion["nlp"],    # <-- REQUIRED FOR XAI
    })

print("\n✅ Completed:", len(results), "files")


Processing: Fraud.jpeg

Processing: Fraud1.jpeg

✅ Completed: 2 files


In [35]:
SAVE_PATH = DATA_DIR / "fusion_output.pkl"

with open(SAVE_PATH, "wb") as f:
    pickle.dump(results, f)

print("\nSaved fusion output to:", SAVE_PATH)


Saved fusion output to: D:\Desktop\insurance-claim-checker\data\fusion_output.pkl
