In [None]:
import zipfile
import os

zip_path = "/content/my_sida_model(e-10).zip"
extract_path = "sida_model"

if os.path.exists(zip_path):
    print(f"Unzipping {zip_path}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"‚úÖ Extracted to folder: {extract_path}")
else:
    print("‚ùå Zip file not found. Please run Cell 2 again.")

In [None]:
import gradio as gr
import torch
import cv2
import numpy as np
from PIL import Image
from transformers import AutoModelForImageClassification, AutoImageProcessor

# --- 1. SETUP PATHS ---
# Ensure this matches where you unzipped the model
MODEL_PATH = "sida_model"
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"‚è≥ Loading model from {MODEL_PATH} on {device}...")

try:
    # CRITICAL FIX: We add attn_implementation="eager"
    # This forces the model to calculate attention weights so we can see the heatmap
    model = AutoModelForImageClassification.from_pretrained(
        MODEL_PATH,
        attn_implementation="eager"
    ).to(device)

    processor = AutoImageProcessor.from_pretrained(MODEL_PATH)

    # Double-check config
    model.config.output_attentions = True
    print("‚úÖ Model Loaded Successfully (Heatmaps Enabled)!")

except Exception as e:
    print(f"‚ùå Error loading model: {e}")
    print("Did you upload and unzip the 'my_sida_model.zip' file?")

# --- 2. DEFINE LOGIC ---
def analyze_image(input_image):
    if input_image is None:
        return None, "Please upload an image."

    # Preprocess
    inputs = processor(images=input_image, return_tensors="pt").to(device)

    # Inference
    with torch.no_grad():
        outputs = model(**inputs, output_attentions=True)

    # A. CLASSIFICATION
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    top_prob, top_idx = torch.max(probs, dim=-1)
    class_name = model.config.id2label[top_idx.item()]
    confidence = top_prob.item()

    # B. LOCALIZATION (Robust Logic)
    if outputs.attentions:
        # Get last layer attention
        last_layer_attn = outputs.attentions[-1]
        attn_map = torch.mean(last_layer_attn, dim=1)[0] # Average heads, remove batch

        # --- SHAPE DETECTION (Fixes invisible heatmaps) ---
        num_tokens = attn_map.shape[0]

        # Check if Square (No CLS) vs Square+1 (Has CLS)
        grid_size = int(np.sqrt(num_tokens))

        if grid_size * grid_size == num_tokens:
            # Case 1: Pure Grid (No CLS)
            patch_attn = torch.mean(attn_map, dim=0) # Average self-attention
        else:
            # Case 2: Grid + CLS Token
            grid_size = int(np.sqrt(num_tokens - 1))
            patch_attn = attn_map[0, 1:] # CLS attending to patches

        # Resize Heatmap
        attn_grid = patch_attn.view(grid_size, grid_size).detach().cpu().numpy()

        # Resize to match original image
        mask = cv2.resize(attn_grid, input_image.size, interpolation=cv2.INTER_CUBIC)

        # Normalize & Boost Visibility
        mask = (mask - mask.min()) / (mask.max() - mask.min())
        mask = mask ** 0.5 # Gamma Correction (Makes faint red spots brighter)

        # Colorize
        heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)

        # Overlay
        overlay = cv2.addWeighted(np.array(input_image), 0.6, heatmap, 0.4, 0)
    else:
        print("‚ö†Ô∏è Warning: No attentions returned by model.")
        overlay = np.array(input_image)

    # C. OUTPUT TEXT
    result_text = (
        f"PREDICTION: {class_name.upper()}\n"
        f"CONFIDENCE: {confidence:.2%}\n"
    )

    return overlay, result_text

# --- 3. LAUNCH APP ---
interface = gr.Interface(
    fn=analyze_image,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=[
        gr.Image(label="SIDA Localization (Heatmap)"),
        gr.Textbox(label="Detection Result")
    ],
    title="SIDA Deepfake Detector",
    description="Upload an image to detect if it is Real, Synthetic, or Tampered.",
    theme="default"
)

print("üöÄ Launching App...")
interface.launch(debug=True, share=True)

In [None]:
import os
import zipfile
import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
import io
from PIL import Image
from transformers import AutoModelForImageClassification, AutoImageProcessor, AutoConfig
from google.colab import files

# --- 1. FORCE "MATH MODE" (Fixes Missing Heatmap) ---
# We disable the "Fast" attention kernels that skip heatmap generation
torch.backends.cuda.enable_flash_sdp(False)
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_math_sdp(True)
print("‚úÖ Fast Attention disabled (Heatmaps forced on).")

# --- 2. CONFIGURATION ---
ZIP_NAME = "/content/my_sida_model(e-10).zip"
EXTRACT_FOLDER = "sida_model_extracted"
device = "cuda" if torch.cuda.is_available() else "cpu"

# --- 3. CHECK & UNZIP ---
if not os.path.exists(EXTRACT_FOLDER):
    if not os.path.exists(ZIP_NAME):
        print(f"‚¨áÔ∏è Upload '{ZIP_NAME}' now...")
        uploaded = files.upload()

    if os.path.exists(ZIP_NAME):
        print(f"üìÇ Unzipping '{ZIP_NAME}'...")
        with zipfile.ZipFile(ZIP_NAME, 'r') as zip_ref:
            zip_ref.extractall(EXTRACT_FOLDER)
        print("‚úÖ Unzip complete!")

# --- 4. LOAD MODEL (Robust Method) ---
print(f"‚è≥ Loading model from {EXTRACT_FOLDER}...")
try:
    # Load Config first to explicitly set settings
    config = AutoConfig.from_pretrained(EXTRACT_FOLDER)
    config.output_attentions = True
    config.attn_implementation = "eager" # Hard-code eager mode

    # Load Model with this specific config
    model = AutoModelForImageClassification.from_pretrained(
        EXTRACT_FOLDER,
        config=config,
        ignore_mismatched_sizes=True
    ).to(device)

    processor = AutoImageProcessor.from_pretrained(EXTRACT_FOLDER)
    print("‚úÖ Model Loaded Successfully.")
except Exception as e:
    print(f"‚ùå Error loading model: {e}")

# --- 5. PROCESSING FUNCTION ---
def process_image(image_bytes, filename):
    print(f"\n--- Analyzing: {filename} ---")

    try:
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    except Exception as e:
        print(f"‚ùå Failed to open image: {e}")
        return

    # Preprocess
    inputs = processor(images=image, return_tensors="pt").to(device)

    # Inference (Explicitly ask for attentions again)
    with torch.no_grad():
        outputs = model(**inputs, output_attentions=True)

    # Get Prediction
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    top_conf, top_idx = torch.max(probs, dim=-1)
    pred_label = model.config.id2label[top_idx.item()]
    confidence = top_conf.item()

    print(f"üìä Result: {pred_label.upper()} ({confidence:.1%})")

    # Localization (Heatmap)
    overlay = None
    if outputs.attentions:
        print("üé® Generating heatmap...")
        try:
            # Extract last layer attention
            last_layer_attn = outputs.attentions[-1]
            attn_map = torch.mean(last_layer_attn, dim=1)[0]

            # SigLIP/ViT Shape Logic
            num_tokens = attn_map.shape[0]
            grid_size = int(np.sqrt(num_tokens))

            # Check for CLS token
            if grid_size * grid_size == num_tokens:
                patch_attn = torch.mean(attn_map, dim=0)
            else:
                grid_size = int(np.sqrt(num_tokens - 1))
                patch_attn = attn_map[0, 1:]

            # Reshape & Resize
            attn_grid = patch_attn.view(grid_size, grid_size).detach().cpu().numpy()
            mask = cv2.resize(attn_grid, image.size, interpolation=cv2.INTER_CUBIC)

            # Normalize & Gamma Boost
            mask = (mask - mask.min()) / (mask.max() - mask.min())
            mask = mask ** 0.5

            # Colorize
            heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
            heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)

            # Overlay
            overlay = cv2.addWeighted(np.array(image), 0.6, heatmap, 0.4, 0)
        except Exception as e:
            print(f"‚ö†Ô∏è Heatmap math error: {e}")
    else:
        print("‚ö†Ô∏è CRITICAL: No attentions returned. Force-mode failed.")

    # Display
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title("Original Upload")
    plt.axis('off')

    plt.subplot(1, 2, 2)
    if overlay is not None:
        plt.imshow(overlay)
        plt.title(f"SIDA Localization\n{pred_label.upper()}")
    else:
        plt.imshow(image)
        plt.title(f"{pred_label.upper()} (No Heatmap)")

    plt.axis('off')
    plt.tight_layout()
    plt.show()

# --- 6. UPLOAD LOOP ---
print("\n‚¨áÔ∏è Upload Image Below ‚¨áÔ∏è")
uploaded_imgs = files.upload()

for fname, fcontent in uploaded_imgs.items():
    process_image(fcontent, fname)

In [None]:
# --- 1. INSTALL & SETUP ---
print("‚öôÔ∏è Installing Grad-CAM library...")
import os
os.system("pip install -q grad-cam transformers torch torchvision opencv-python matplotlib")

import zipfile
import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
import io
import sys
from PIL import Image
from transformers import AutoModelForImageClassification, AutoImageProcessor, AutoConfig
from google.colab import files
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
import ipywidgets as widgets
from IPython.display import display, clear_output

# --- 2. CONFIGURATION ---
# ZIP_NAME = "my_sida_model.zip"
EXTRACT_FOLDER = "sida_model"
device = "cuda" if torch.cuda.is_available() else "cpu"

# --- 3. CHECK & UNZIP MODEL ---
if not os.path.exists(EXTRACT_FOLDER):
    if not os.path.exists(ZIP_NAME):
        print(f"‚¨áÔ∏è Please upload '{ZIP_NAME}' now...")
        uploaded = files.upload()
        if ZIP_NAME not in uploaded:
            print("‚ùå Error: Zip file missing. Please re-run and upload.")
            sys.exit()

    print(f"üìÇ Unzipping model...")
    with zipfile.ZipFile(ZIP_NAME, 'r') as zip_ref:
        zip_ref.extractall(EXTRACT_FOLDER)
    print("‚úÖ Model unzipped.")

# --- 4. LOAD MODEL ---
print(f"‚è≥ Loading model on {device}...")
try:
    config = AutoConfig.from_pretrained(EXTRACT_FOLDER)
    config.output_attentions = True
    config.attn_implementation = "eager"

    # Load original HF model
    hf_model = AutoModelForImageClassification.from_pretrained(
        EXTRACT_FOLDER, config=config, ignore_mismatched_sizes=True
    ).to(device).eval()

    processor = AutoImageProcessor.from_pretrained(EXTRACT_FOLDER)
    print("‚úÖ Model Loaded.")
except Exception as e:
    print(f"‚ùå Error loading model: {e}")
    sys.exit()

# --- 5. GRAD-CAM SETUP (FIXED) ---

# 5a. The Wrapper (Fixes 'ImageClassifierOutput' error)
class ModelWrapper(torch.nn.Module):
    def __init__(self, model):
        super(ModelWrapper, self).__init__()
        self.model = model
    def forward(self, x):
        # Extract only the logits so Grad-CAM doesn't crash
        return self.model(x).logits

# Wrap the model
wrapped_model = ModelWrapper(hf_model)

# 5b. Reshape Logic
def reshape_transform(tensor, height=14, width=14):
    seq_len = tensor.shape[1]
    grid_size = int(np.sqrt(seq_len))

    if grid_size * grid_size == seq_len:
        result = tensor
    else:
        result = tensor[:, 1:, :] # Remove CLS
        grid_size = int(np.sqrt(result.shape[1]))

    result = result.transpose(1, 2)
    result = result.reshape(tensor.size(0), result.size(1), grid_size, grid_size)
    return result

# 5c. Target Layer Selection
# We target layers inside the ORIGINAL (inner) model
try:
    target_layers = [hf_model.vision_model.encoder.layers[-1].layer_norm1]
except:
    try:
        target_layers = [hf_model.transformer.encoder.layer[-1].layernorm1]
    except:
        target_layers = [hf_model.model.encoder.layers[-1].layer_norm1]

# 5d. Initialize Grad-CAM
# Note: We pass the WRAPPED model here
cam = GradCAM(model=wrapped_model, target_layers=target_layers, reshape_transform=reshape_transform)
print("‚úÖ Grad-CAM Initialized.")

# --- 6. INTERACTIVE UPLOAD & ANALYZE ---
def on_upload_change(change):
    if not change['new']: return
    out.clear_output()

    with out:
        try:
            # Load Image
            file_info = change['new'][0] if isinstance(change['new'], list) else list(change['new'].values())[0]
            content = file_info['content']
            image = Image.open(io.BytesIO(content)).convert("RGB")
            print("üîç Analyzing...")

            # Preprocess
            inputs = processor(images=image, return_tensors="pt").to(device)

            # Get Prediction Label (Using original model for simple inference)
            with torch.no_grad():
                outputs = hf_model(**inputs)
                probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
                top_conf, top_idx = torch.max(probs, dim=-1)
                pred_label = hf_model.config.id2label[top_idx.item()]
                confidence = top_conf.item()

            # Run Grad-CAM (Backward Pass)
            # We pass 'inputs['pixel_values']' which is the tensor Grad-CAM needs
            grayscale_cam = cam(input_tensor=inputs['pixel_values'], targets=None)
            grayscale_cam = grayscale_cam[0, :]

            # Visualization
            img_resized = np.array(image.resize((224, 224)))
            img_float = np.float32(img_resized) / 255
            visualization = show_cam_on_image(img_float, grayscale_cam, use_rgb=True)

            # Plot
            fig, ax = plt.subplots(1, 2, figsize=(12, 6))
            ax[0].imshow(image)
            ax[0].set_title("Original")
            ax[0].axis('off')

            ax[1].imshow(visualization)
            ax[1].set_title(f"Grad-CAM Heatmap\nPrediction: {pred_label.upper()} ({confidence:.1%})")
            ax[1].axis('off')
            plt.show()

        except Exception as e:
            print(f"‚ùå Error: {e}")
            import traceback
            traceback.print_exc()

# Create Widget
uploader = widgets.FileUpload(accept='image/*', multiple=False, description="Upload Image")
out = widgets.Output()
uploader.observe(on_upload_change, names='value')

print("\n‚¨áÔ∏è CLICK BUTTON TO TEST ‚¨áÔ∏è")
display(uploader, out)

**Grad-CAM++**

In [None]:
# --- 1. INSTALL & SETUP ---
import os
os.system("pip install -q grad-cam transformers torch torchvision opencv-python matplotlib")

import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
import io
import sys
from PIL import Image
from transformers import AutoModelForImageClassification, AutoImageProcessor, AutoConfig
from google.colab import files
# CRITICAL CHANGE: Importing GradCAMPlusPlus (Better for localization)
from pytorch_grad_cam import GradCAMPlusPlus
from pytorch_grad_cam.utils.image import show_cam_on_image
import ipywidgets as widgets
from IPython.display import display, clear_output

# --- 2. CONFIGURATION ---
# ZIP_NAME = "/content/my_sida_model(e-10).zip"
EXTRACT_FOLDER = "sida_model"
device = "cuda" if torch.cuda.is_available() else "cpu"

# --- 3. LOAD MODEL ---
print(f"‚è≥ Loading model on {device}...")
if not os.path.exists(EXTRACT_FOLDER):
    print("‚ùå Model folder not found. Please upload/unzip your model first.")
    sys.exit()

try:
    config = AutoConfig.from_pretrained(EXTRACT_FOLDER)
    config.output_attentions = True
    config.attn_implementation = "eager"

    # Wrap model to strip complex outputs
    class ModelWrapper(torch.nn.Module):
        def __init__(self, model):
            super().__init__()
            self.model = model
        def forward(self, x):
            return self.model(x).logits

    hf_model = AutoModelForImageClassification.from_pretrained(
        EXTRACT_FOLDER, config=config, ignore_mismatched_sizes=True
    ).to(device).eval()

    wrapped_model = ModelWrapper(hf_model)
    processor = AutoImageProcessor.from_pretrained(EXTRACT_FOLDER)
    print("‚úÖ Model Loaded.")
except Exception as e:
    print(f"‚ùå Error: {e}")
    sys.exit()

# --- 4. ADVANCED GRAD-CAM SETUP ---

def reshape_transform(tensor, height=14, width=14):
    seq_len = tensor.shape[1]
    grid_size = int(np.sqrt(seq_len))
    if grid_size * grid_size != seq_len:
        result = tensor[:, 1:, :] # Remove CLS
        grid_size = int(np.sqrt(result.shape[1]))
    else:
        result = tensor

    result = result.transpose(1, 2)
    result = result.reshape(tensor.size(0), result.size(1), grid_size, grid_size)
    return result

# Target Layer
try:
    target_layers = [hf_model.vision_model.encoder.layers[-1].layer_norm1]
except:
    target_layers = [hf_model.transformer.encoder.layer[-1].layernorm1]

# SWITCH TO GRAD-CAM++ (Better details)
cam = GradCAMPlusPlus(model=wrapped_model, target_layers=target_layers, reshape_transform=reshape_transform)
print("‚úÖ Grad-CAM++ Initialized (High Res Mode).")

# --- 5. ANALYZE WITH SHARPENING ---
def on_upload_change(change):
    if not change['new']: return
    out.clear_output()

    with out:
        try:
            # Load
            file_info = change['new'][0] if isinstance(change['new'], list) else list(change['new'].values())[0]
            image = Image.open(io.BytesIO(file_info['content'])).convert("RGB")
            print("üîç Analyzing...")

            # Predict
            inputs = processor(images=image, return_tensors="pt").to(device)
            with torch.no_grad():
                outputs = hf_model(**inputs)
                probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
                top_conf, top_idx = torch.max(probs, dim=-1)
                pred_label = hf_model.config.id2label[top_idx.item()]

            # Generate Raw Heatmap
            grayscale_cam = cam(input_tensor=inputs['pixel_values'], targets=None)[0, :]

            # --- TRICK: THRESHOLDING & SHARPENING ---
            # 1. Cut off weak noise (anything below 20% intensity becomes 0)
            threshold = 0.20
            grayscale_cam[grayscale_cam < threshold] = 0

            # 2. Re-normalize after cutting
            if grayscale_cam.max() > 0:
                grayscale_cam = (grayscale_cam - grayscale_cam.min()) / (grayscale_cam.max() - grayscale_cam.min())

            # Visualize
            img_resized = np.array(image.resize((224, 224)))
            img_float = np.float32(img_resized) / 255
            visualization = show_cam_on_image(img_float, grayscale_cam, use_rgb=True)

            # Plot
            fig, ax = plt.subplots(1, 2, figsize=(12, 6))
            ax[0].imshow(image)
            ax[0].set_title("Original")
            ax[0].axis('off')

            ax[1].imshow(visualization)
            ax[1].set_title(f"Sharpened Heatmap\n{pred_label.upper()} ({top_conf.item():.1%})")
            ax[1].axis('off')
            plt.show()

        except Exception as e:
            print(f"‚ùå Error: {e}")

# Widget
uploader = widgets.FileUpload(accept='image/*', multiple=False, description="Upload Image")
out = widgets.Output()
uploader.observe(on_upload_change, names='value')

print("\n‚¨áÔ∏è UPLOAD TO TEST GRAD-CAM++ ‚¨áÔ∏è")
display(uploader, out)

# **Mobile Model Run**

In [None]:
# 1. Install Dependencies
!pip install -q onnxruntime numpy transformers pillow

In [None]:
import os
import zipfile
import onnxruntime as ort
import numpy as np
from transformers import AutoImageProcessor

# --- CONFIGURATION ---
# Path to the zip file already in your runtime
ZIP_PATH = "/content/sida_mobile_ready.zip"
EXTRACT_FOLDER = "/content/sida_mobile_quantized"

# --- 1. UNZIP MODEL ---
if os.path.exists(ZIP_PATH):
    print(f"üìÇ Found {ZIP_PATH}. Unzipping...")
    try:
        with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
            zip_ref.extractall(EXTRACT_FOLDER)
        print(f"‚úÖ Model extracted successfully to: {EXTRACT_FOLDER}")
    except Exception as e:
        print(f"‚ùå Error during unzip: {e}")
else:
    print(f"‚ùå Error: File not found at {ZIP_PATH}")
    print("Please check the 'Files' folder on the left to ensure the zip is there.")

# --- 2. LOAD ONNX SESSION ---
ONNX_MODEL_PATH = os.path.join(EXTRACT_FOLDER, "model.onnx")

if os.path.exists(ONNX_MODEL_PATH):
    print(f"‚è≥ Loading ONNX model...")
    try:
        # Load the runtime session
        session = ort.InferenceSession(ONNX_MODEL_PATH)

        # Load the processor (needed for resizing images correctly)
        processor = AutoImageProcessor.from_pretrained(EXTRACT_FOLDER)

        print("‚úÖ ONNX Model & Processor loaded successfully!")

        # Get input/output names for later use
        input_name = session.get_inputs()[0].name
        output_name = session.get_outputs()[0].name

    except Exception as e:
        print(f"‚ùå Error loading session: {e}")
else:
    print(f"‚ùå Error: model_quantized.onnx not found in {EXTRACT_FOLDER}")

In [None]:
import os
import zipfile
import onnxruntime as ort
import numpy as np
from transformers import AutoImageProcessor

# --- CONFIGURATION ---
ZIP_PATH = "/content/sida_mobile_ready.zip"
EXTRACT_FOLDER = "/content/sida_mobile_quantized"
# Standard model to grab processor config from if missing locally
FALLBACK_MODEL_ID = "prithivMLmods/AI-vs-Deepfake-vs-Real-v2.0"

# --- 1. UNZIP MODEL ---
if os.path.exists(ZIP_PATH):
    print(f"üìÇ Found {ZIP_PATH}. Unzipping...")
    try:
        with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
            zip_ref.extractall(EXTRACT_FOLDER)
        print(f"‚úÖ Model extracted successfully to: {EXTRACT_FOLDER}")
    except Exception as e:
        print(f"‚ùå Error during unzip: {e}")
else:
    print(f"‚ùå Error: File not found at {ZIP_PATH}")
    print("Please check the 'Files' folder on the left to ensure the zip is there.")

# --- 2. LOAD ONNX SESSION ---
# Check inside extracted folder (handle potential subfolders)
ONNX_MODEL_PATH = os.path.join(EXTRACT_FOLDER, "model_quantized.onnx")
# If not found, check if it's inside a 'mobile_export' subfolder (common zip behavior)
if not os.path.exists(ONNX_MODEL_PATH):
    subfolder_path = os.path.join(EXTRACT_FOLDER, "mobile_export", "model.onnx") # Some exports name it model.onnx
    if os.path.exists(subfolder_path):
        ONNX_MODEL_PATH = subfolder_path
    else:
        # Check for 'model.onnx' in root
        root_onnx = os.path.join(EXTRACT_FOLDER, "model.onnx")
        if os.path.exists(root_onnx):
            ONNX_MODEL_PATH = root_onnx

if os.path.exists(ONNX_MODEL_PATH):
    print(f"‚è≥ Loading ONNX model from: {ONNX_MODEL_PATH}")
    try:
        # Load the runtime session
        session = ort.InferenceSession(ONNX_MODEL_PATH)

        # --- CRITICAL FIX: LOAD PROCESSOR FROM HUB IF LOCAL FAILS ---
        try:
            print("Attempting to load processor locally...")
            processor = AutoImageProcessor.from_pretrained(EXTRACT_FOLDER)
        except:
            print(f"‚ö†Ô∏è Local processor config missing. Downloading from {FALLBACK_MODEL_ID}...")
            processor = AutoImageProcessor.from_pretrained(FALLBACK_MODEL_ID)

        print("‚úÖ ONNX Model & Processor loaded successfully!")

        # Get input/output names
        input_name = session.get_inputs()[0].name
        output_name = session.get_outputs()[0].name

    except Exception as e:
        print(f"‚ùå Error loading session: {e}")
else:
    print(f"‚ùå Error: ONNX file not found in {EXTRACT_FOLDER}")
    print("Check the extracted folder structure manually in the files tab.")

In [None]:
from google.colab import files
from PIL import Image
import io
import matplotlib.pyplot as plt

# Define Labels (Ensure these match your training order!)
id2label = {0: "SYNTHETIC", 1: "TAMPERED", 2: "REAL"}

# 1. Upload Image
print("\n‚¨áÔ∏è Upload a test image to check the model ‚¨áÔ∏è")
uploaded = files.upload()

if uploaded:
    for filename, content in uploaded.items():
        try:
            print(f"\nüîç Analyzing: {filename}")
            image = Image.open(io.BytesIO(content)).convert("RGB")

            # 2. Preprocess (Resize & Normalize using the loaded processor)
            inputs = processor(images=image, return_tensors="np")
            input_data = inputs["pixel_values"]

            # 3. Run Inference (ONNX)
            outputs = session.run([output_name], {input_name: input_data})
            logits = outputs[0][0]

            # 4. Calculate Probabilities (Softmax)
            exp_logits = np.exp(logits - np.max(logits))
            probs = exp_logits / np.sum(exp_logits)

            # 5. Get Result
            pred_id = np.argmax(probs)
            confidence = probs[pred_id]
            label = id2label[pred_id]

            # 6. Display
            plt.figure(figsize=(6, 6))
            plt.imshow(image)
            plt.axis('off')
            plt.title(f"Prediction: {label} ({confidence:.1%})", fontsize=14, color='green')
            plt.show()

            print(f"Raw Probabilities: {probs}")

        except Exception as e:
            print(f"‚ùå Error processing image: {e}")
else:
    print("‚ö†Ô∏è No image uploaded.")

**Model Calculation**

In [None]:
# --- 1. INSTALL DEPENDENCIES ---
import os
print("‚öôÔ∏è Installing libraries...")
os.system("pip install -q transformers torch torchvision opencv-python matplotlib")

import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
import io
from PIL import Image
from transformers import AutoModelForImageClassification, AutoImageProcessor, AutoConfig
from google.colab import files

In [None]:
import zipfile
import os

zip_path = "/content/my_sida_model(e-10).zip"
extract_path = "sida_model"

if os.path.exists(zip_path):
    print(f"Unzipping {zip_path}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"‚úÖ Extracted to folder: {extract_path}")
else:
    print("‚ùå Zip file not found. Please run Cell 2 again.")

In [None]:
# --- 2. CONFIGURATION ---
# We use the official HF path since you didn't upload a local model this time.
# If you have a local folder, change this string to your folder path.
MODEL_PATH = extract_path
device = "cuda" if torch.cuda.is_available() else "cpu"

# --- 3. LOAD MODEL ---
print(f"‚è≥ Loading model: {MODEL_PATH}...")
try:
    # Force 'eager' mode to access internal attention weights
    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.output_attentions = True
    config.attn_implementation = "eager"

    model = AutoModelForImageClassification.from_pretrained(
        MODEL_PATH, config=config
    ).to(device).eval()

    processor = AutoImageProcessor.from_pretrained(MODEL_PATH)
    print("‚úÖ Model Loaded.")
except Exception as e:
    print(f"‚ùå Error: {e}")

# --- 4. VISUALIZATION FUNCTIONS ---

def visualize_patches(image, patch_size=16):
    """Visualizes how the Vision Transformer chops the image into patches."""
    img = np.array(image.resize((224, 224)))
    h, w, c = img.shape

    # Create a grid overlay
    fig, ax = plt.subplots(1, 1, figsize=(5, 5))
    ax.imshow(img)

    # Draw grid lines
    for x in range(0, w, patch_size):
        ax.axvline(x, color='white', linewidth=0.5, alpha=0.5)
    for y in range(0, h, patch_size):
        ax.axhline(y, color='white', linewidth=0.5, alpha=0.5)

    ax.set_title(f"Step 1: Input Patches ({patch_size}x{patch_size})")
    ax.axis('off')
    plt.show()

def visualize_attention(attentions, image, layer_idx=-1):
    """Visualizes what the model focuses on in a specific layer."""
    # Get attention from specific layer: (Batch, Heads, Seq_Len, Seq_Len)
    attn = attentions[layer_idx]

    # Average across all attention heads
    attn_mean = torch.mean(attn, dim=1).squeeze(0) # (Seq_Len, Seq_Len)

    # Determine shape (SigLIP usually doesn't have CLS token at 0, but standard ViT does)
    num_tokens = attn_mean.shape[0]
    grid_size = int(np.sqrt(num_tokens))

    if grid_size * grid_size == num_tokens:
        # Case A: No CLS token (Pure patches)
        # We visualize the 'self-attention' intensity of the map
        # Taking mean across rows gives "how much is this pixel attended to by others"
        heatmap = torch.mean(attn_mean, dim=0).view(grid_size, grid_size)
    else:
        # Case B: Has CLS token (Square + 1)
        # We visualize what the [CLS] token (index 0) looks at
        patch_attn = attn_mean[0, 1:]
        grid_size = int(np.sqrt(patch_attn.shape[0]))
        heatmap = patch_attn.view(grid_size, grid_size)

    # Normalize and Resize
    heatmap = heatmap.detach().cpu().numpy()
    heatmap = (heatmap - heatmap.min()) / (heatmap.max() - heatmap.min())

    img_resized = np.array(image.resize((224, 224)))
    heatmap = cv2.resize(heatmap, (224, 224), interpolation=cv2.INTER_NEAREST)

    # Overlay
    colored_map = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
    overlay = cv2.addWeighted(img_resized, 0.6, colored_map, 0.4, 0)

    plt.figure(figsize=(5, 5))
    plt.imshow(overlay)
    plt.title(f"Step 2: Internal Attention (Layer {layer_idx if layer_idx!=-1 else 'Final'})")
    plt.axis('off')
    plt.show()

def visualize_predictions(logits, id2label):
    """Visualizes the final math (Logits vs Probabilities)."""
    probs = torch.nn.functional.softmax(logits, dim=-1).squeeze().cpu().detach().numpy()
    logits = logits.squeeze().cpu().detach().numpy()

    labels = [id2label[i] for i in range(len(probs))]

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

    # Raw Logits Chart
    ax1.bar(labels, logits, color='gray')
    ax1.set_title("Step 3a: Raw Logits (Model Output)")
    ax1.set_ylabel("Score (Pre-Softmax)")

    # Probabilities Chart
    colors = ['red' if 'fake' in lbl.lower() or 'tamper' in lbl.lower() else 'green' for lbl in labels]
    ax2.bar(labels, probs, color=colors)
    ax2.set_title("Step 3b: Final Probabilities (Softmax)")
    ax2.set_ylabel("Confidence (0-1)")
    ax2.set_ylim(0, 1.1)

    # Add percentages on top
    for i, v in enumerate(probs):
        ax2.text(i, v + 0.02, f"{v:.1%}", ha='center', fontweight='bold')

    plt.show()

# --- 5. MASTER EXECUTION FUNCTION ---
def analyze_image_deep_dive(image_bytes):
    try:
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

        # 1. Show Patches
        visualize_patches(image)

        # Preprocess
        inputs = processor(images=image, return_tensors="pt").to(device)

        # Forward Pass (The Calculation)
        with torch.no_grad():
            outputs = model(**inputs)

        # 2. Show Attention (The "Thinking")
        # We show the attention map from the very last Transformer block
        if outputs.attentions:
            visualize_attention(outputs.attentions, image, layer_idx=-1)
        else:
            print("‚ö†Ô∏è Attention weights missing.")

        # 3. Show Math (The "Decision")
        visualize_predictions(outputs.logits, model.config.id2label)

    except Exception as e:
        print(f"‚ùå Error: {e}")

# --- 6. RUN UPLOAD ---
print("\n‚¨áÔ∏è Upload an image to see the internal calculations ‚¨áÔ∏è")
uploaded = files.upload()

for fname, content in uploaded.items():
    print(f"\nüîç DEEP DIVE ANALYSIS FOR: {fname}")
    analyze_image_deep_dive(content)

In [None]:
# --- 2. THE MATHEMATICAL VISUALIZER ---
def math_deep_dive(image_bytes):
    print("\n" + "="*50)
    print("üß† DEEP LEARNING FORENSICS: INSIDE THE MODEL")
    print("="*50)

    # --- STEP 0: RAW INPUT ---
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    img_array = np.array(image.resize((224, 224)))
    print(f"\n[Step 0] Input Image")
    print(f"   ‚îî‚îÄ‚îÄ Shape: {img_array.shape} (Height, Width, Channels)")
    print(f"   ‚îî‚îÄ‚îÄ Pixel Sample (Top-Left): {img_array[0,0]}")

    # --- STEP 1: PREPROCESSING ---
    # Equation: x_norm = (x - mean) / std
    inputs = processor(images=image, return_tensors="pt").to(device)
    pixel_values = inputs.pixel_values

    print(f"\n[Step 1] Preprocessing (Normalization)")
    print(f"   ‚îî‚îÄ‚îÄ Equation: $x_{{norm}} = \\frac{{x - \\mu}}{{\\sigma}}$")
    print(f"   ‚îî‚îÄ‚îÄ Shape: {pixel_values.shape} (Batch, Channels, Height, Width)")
    print(f"   ‚îî‚îÄ‚îÄ Actual Tensor (First 3 pixels, Channel 0):")
    print(f"       {pixel_values[0, 0, 0, :3].detach().cpu().numpy()}")

    # --- STEP 2: PATCH EMBEDDINGS ---
    # Equation: Patch = Conv2d(Kernel=16x16, Stride=16)
    # This splits the 224x224 image into 14x14 = 196 patches
    with torch.no_grad():
        # Access the first layer: Embeddings
        embeddings = model.base_model.embeddings

        # 1. Patch Projection
        patch_embeddings = embeddings.patch_embeddings(pixel_values)
        print(f"\n[Step 2] Patch Embeddings (The Split)")
        print(f"   ‚îî‚îÄ‚îÄ Operation: Convolution (Kernel=16, Stride=16)")
        print(f"   ‚îî‚îÄ‚îÄ Equation: $y = Wx + b$ (Applied to 16x16 pixel blocks)")
        print(f"   ‚îî‚îÄ‚îÄ Shape (Pre-Flatten): {patch_embeddings.shape} (Batch, Hidden_Dim, Grid_H, Grid_W)")

        # Flatten: (B, C, H, W) -> (B, N, C)
        patch_embeddings = patch_embeddings.flatten(2).transpose(1, 2)
        print(f"   ‚îî‚îÄ‚îÄ Shape (Flattened):   {patch_embeddings.shape} (Batch, 196 Patches, 768 Features)")
        print(f"   ‚îî‚îÄ‚îÄ Actual Vector (Patch 1, First 5 features):")
        print(f"       {patch_embeddings[0, 0, :5].detach().cpu().numpy()}")

    # --- STEP 3: CLS TOKEN & POSITION EMBEDDINGS ---
    # Equation: Embedding = Patch_Embed + CLS_Token + Position_Embed
    with torch.no_grad():
        batch_size = patch_embeddings.shape[0]

        # Add CLS Token (The "Classification" placeholder)
        cls_token = embeddings.cls_token.expand(batch_size, -1, -1)
        embeddings_with_cls = torch.cat((cls_token, patch_embeddings), dim=1)

        # Add Position Embeddings (Knowing WHERE the patch is)
        position_embeddings = embeddings.position_embeddings[:, :embeddings_with_cls.shape[1], :]
        final_embeddings = embeddings_with_cls + position_embeddings

        print(f"\n[Step 3] Position & CLS Injection")
        print(f"   ‚îî‚îÄ‚îÄ Equation: $E_{{final}} = [E_{{CLS}}, E_{{patches}}] + E_{{pos}}$")
        print(f"   ‚îî‚îÄ‚îÄ Shape Change: {patch_embeddings.shape} -> {final_embeddings.shape} (Added 1 Token)")
        print(f"   ‚îî‚îÄ‚îÄ Actual Vector (CLS Token, First 5 features):")
        print(f"       {final_embeddings[0, 0, :5].detach().cpu().numpy()}")

    # --- STEP 4: TRANSFORMER ENCODER ---
    # Equation: x = MLP(LN(MSA(LN(x)) + x)) + x
    print(f"\n[Step 4] Transformer Encoder (12 Layers Deep)")
    print(f"   ‚îî‚îÄ‚îÄ This is the 'Brain'. It processes the vectors 12 times.")

    hidden_states = final_embeddings
    encoder = model.base_model.encoder

    # We run just the first layer to show the math
    first_layer = encoder.layer[0]

    # Self Attention
    attention_output = first_layer.attention(first_layer.layernorm_before(hidden_states))[0]
    hidden_states_1 = attention_output + hidden_states # Residual connection

    # MLP (Feed Forward)
    mlp_output = first_layer.intermediate(first_layer.layernorm_after(hidden_states_1))
    layer_output = first_layer.output(mlp_output, hidden_states_1)

    print(f"   ‚îî‚îÄ‚îÄ Layer 1 Output Shape: {layer_output.shape}")
    print(f"   ‚îî‚îÄ‚îÄ Math: Self-Attention allows Patch 1 to 'talk' to Patch 50.")
    print(f"   ‚îî‚îÄ‚îÄ Actual Vector (Patch 1 after Layer 1):")
    print(f"       {layer_output[0, 1, :5].detach().cpu().numpy()}")

    # Fast forward: Run the rest of the model
    outputs = model(**inputs)
    final_features = outputs.last_hidden_state

    print(f"   ‚îî‚îÄ‚îÄ ... (Repeated 11 more times) ...")
    print(f"   ‚îî‚îÄ‚îÄ Final Feature Map: {final_features.shape}")

    # --- STEP 5: CLASSIFICATION HEAD ---
    # Equation: Logits = W * CLS_Vector + b
    print(f"\n[Step 5] Classification Head (The Decision)")

    # 1. Extract CLS Token (Index 0)
    cls_output = final_features[:, 0, :]
    print(f"   ‚îî‚îÄ‚îÄ Extraction: Take only Index 0 (CLS Token)")
    print(f"   ‚îî‚îÄ‚îÄ CLS Vector Shape: {cls_output.shape} (Batch, 768)")

    # 2. Linear Projection (Classifier)
    # We manually replicate the final layer calculation
    classifier = model.classifier
    weight = classifier.weight
    bias = classifier.bias

    print(f"   ‚îî‚îÄ‚îÄ Equation: $Logits = (W \\times CLS) + b$")
    print(f"   ‚îî‚îÄ‚îÄ Weights Matrix: {weight.shape} (3 Classes, 768 Inputs)")

    # Manual Calculation Check
    logits_manual = torch.matmul(cls_output, weight.t()) + bias
    print(f"   ‚îî‚îÄ‚îÄ Calculated Logits: {logits_manual.detach().cpu().numpy()}")

    # 3. Softmax (Probability)
    probs = torch.nn.functional.softmax(logits_manual, dim=-1)

    print(f"\n[Final Output] Probabilities")
    print(f"   ‚îî‚îÄ‚îÄ Equation: $Softmax(z_i) = \\frac{{e^{{z_i}}}}{{\\sum e^{{z_j}}}}$")

    id2label = model.config.id2label
    probs_np = probs[0].detach().cpu().numpy()

    for i, p in enumerate(probs_np):
        print(f"   ‚îî‚îÄ‚îÄ Class {i} ({id2label[i]}): {p:.4f} ({p*100:.2f}%)")

# --- 3. RUN UPLOAD ---
print("\n‚¨áÔ∏è Upload an image to start the Deep Dive ‚¨áÔ∏è")
uploaded = files.upload()

for fname, content in uploaded.items():
    math_deep_dive(content)

In [None]:
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
import io
import os
from PIL import Image
from transformers import AutoModelForImageClassification, AutoImageProcessor, AutoConfig
from google.colab import files

# --- 1. SETUP ---
print("‚öôÔ∏è Installing dependencies...")
os.system("pip install -q transformers torch torchvision")

# Load Model
MODEL_PATH = "prithivMLmods/AI-vs-Deepfake-vs-Real-v2.0"
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"‚è≥ Loading model: {MODEL_PATH}...")
try:
    # CRITICAL FIX: Enable output_hidden_states=True in config
    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.output_hidden_states = True
    config.output_attentions = True  # Also needed for attention map visualization

    model = AutoModelForImageClassification.from_pretrained(
        MODEL_PATH, config=config
    ).to(device).eval()

    processor = AutoImageProcessor.from_pretrained(MODEL_PATH)
    print("‚úÖ Model Loaded.")
except Exception as e:
    print(f"‚ùå Error: {e}")

# --- 2. THE MATHEMATICAL VISUALIZER (FIXED) ---
def math_deep_dive(image_bytes):
    print("\n" + "="*60)
    print("üß† DEEP LEARNING FORENSICS: INSIDE THE SIGLIP MODEL")
    print("="*60)

    # --- STEP 0: RAW INPUT ---
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    img_array = np.array(image.resize((224, 224)))
    print(f"\n[Step 0] Input Image")
    print(f"   ‚îî‚îÄ‚îÄ Shape: {img_array.shape} (Height, Width, Channels)")
    print(f"   ‚îî‚îÄ‚îÄ Pixel Sample (Top-Left): {img_array[0,0]}")

    # --- STEP 1: PREPROCESSING ---
    inputs = processor(images=image, return_tensors="pt").to(device)
    pixel_values = inputs.pixel_values

    print(f"\n[Step 1] Preprocessing (Normalization)")
    print(f"   ‚îî‚îÄ‚îÄ Equation: $x_{{norm}} = \\frac{{x - \\mu}}{{\\sigma}}$")
    print(f"   ‚îî‚îÄ‚îÄ Shape: {pixel_values.shape} (Batch, Channels, Height, Width)")
    print(f"   ‚îî‚îÄ‚îÄ Actual Tensor (First 3 pixels, Channel 0):")
    print(f"       {pixel_values[0, 0, 0, :3].detach().cpu().numpy()}")

    # --- STEP 2: PATCH EMBEDDINGS ---
    with torch.no_grad():
        vision_model = model.vision_model
        embeddings = vision_model.embeddings

        patch_embeds = embeddings.patch_embedding(pixel_values)
        print(f"\n[Step 2] Patch Embeddings (The Split)")
        print(f"   ‚îî‚îÄ‚îÄ Operation: Convolution (Kernel=16, Stride=16)")
        print(f"   ‚îî‚îÄ‚îÄ Shape (Pre-Flatten): {patch_embeds.shape} (Batch, Hidden_Dim, Grid_H, Grid_W)")

        patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
        print(f"   ‚îî‚îÄ‚îÄ Shape (Flattened):   {patch_embeds.shape} (Batch, 196 Patches, 768 Features)")

    # --- STEP 3: POSITION EMBEDDINGS ---
    with torch.no_grad():
        pos_embeds = embeddings.position_embedding(embeddings.position_ids)
        hidden_states = patch_embeds + pos_embeds

        print(f"\n[Step 3] Position Injection")
        print(f"   ‚îî‚îÄ‚îÄ Equation: $E_{{final}} = E_{{patches}} + E_{{pos}}$")
        print(f"   ‚îî‚îÄ‚îÄ Actual Vector (Patch 1, First 5 features):")
        print(f"       {hidden_states[0, 0, :5].detach().cpu().numpy()}")

    # --- STEP 4: TRANSFORMER ENCODER ---
    print(f"\n[Step 4] Transformer Encoder (12 Layers Deep)")

    # Forward pass with output_hidden_states=True (CRITICAL FIX)
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)

    # Access the final hidden state (The output of the last transformer block)
    # hidden_states is a tuple, last one is the final output
    final_features = outputs.hidden_states[-1]

    print(f"   ‚îî‚îÄ‚îÄ Processing complete.")
    print(f"   ‚îî‚îÄ‚îÄ Final Feature Map Shape: {final_features.shape} (Batch, Seq_Len, Hidden_Dim)")

    # --- STEP 5: CLASSIFICATION HEAD ---
    print(f"\n[Step 5] Classification Head (The Decision)")

    # SigLIP Pooling Strategy:
    # For classification, it often takes the first token OR averages them.
    # Let's assume simple Index 0 or Mean pooling. We will extract Index 0 for visualization.
    pooled_output = final_features[:, 0, :]

    print(f"   ‚îî‚îÄ‚îÄ Pooling: Extracting Representative Vector")
    print(f"   ‚îî‚îÄ‚îÄ Pooled Vector Shape: {pooled_output.shape} (Batch, 768)")

    classifier = model.classifier
    weight = classifier.weight
    bias = classifier.bias

    print(f"   ‚îî‚îÄ‚îÄ Equation: $Logits = (W \\times Vector) + b$")
    print(f"   ‚îî‚îÄ‚îÄ Weights Matrix: {weight.shape} (3 Classes, 768 Inputs)")

    # Manual Calculation
    logits_manual = torch.matmul(pooled_output, weight.t()) + bias
    print(f"   ‚îî‚îÄ‚îÄ Calculated Logits: {logits_manual.detach().cpu().numpy()}")

    # Softmax
    probs = torch.nn.functional.softmax(logits_manual, dim=-1)

    print(f"\n[Final Output] Probabilities")
    print(f"   ‚îî‚îÄ‚îÄ Equation: $Softmax(z_i) = \\frac{{e^{{z_i}}}}{{\\sum e^{{z_j}}}}$")

    id2label = model.config.id2label
    probs_np = probs[0].detach().cpu().numpy()

    for i, p in enumerate(probs_np):
        label_name = id2label[i] if id2label else str(i)
        print(f"   ‚îî‚îÄ‚îÄ Class {i} ({label_name}): {p:.4f} ({p*100:.2f}%)")

# --- 3. RUN UPLOAD ---
print("\n‚¨áÔ∏è Upload an image to start the Deep Dive ‚¨áÔ∏è")
uploaded = files.upload()

for fname, content in uploaded.items():
    math_deep_dive(content)

In [None]:
# --- 1. SETUP & INSTALL ---
print("‚öôÔ∏è Installing libraries...")
import os
os.system("pip install -q grad-cam transformers torch torchvision opencv-python matplotlib")

import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
import io
import zipfile
from PIL import Image
from google.colab import files
from transformers import AutoModelForImageClassification, AutoImageProcessor, AutoConfig
from pytorch_grad_cam import GradCAMPlusPlus
from pytorch_grad_cam.utils.image import show_cam_on_image



In [None]:
# --- 2. CONFIGURATION ---
# We need the PyTorch weights for GradCAM (NOT the quantized ONNX model)
# If you don't have 'sida_model_extracted' yet, please upload 'my_sida_model.zip'
ZIP_NAME = "/content/my_sida_model(e-10).zip"
EXTRACT_FOLDER = "sida_model_pytorch"
device = "cuda" if torch.cuda.is_available() else "cpu"

# --- 3. LOAD MODEL ---
if not os.path.exists(EXTRACT_FOLDER):
    if os.path.exists(ZIP_NAME):
        print(f"üìÇ Unzipping {ZIP_NAME}...")
        with zipfile.ZipFile(ZIP_NAME, 'r') as zip_ref:
            zip_ref.extractall(EXTRACT_FOLDER)
    else:
        print(f"‚ö†Ô∏è Zip file '{ZIP_NAME}' not found. Please upload it!")

print(f"‚è≥ Loading model for GradCAM++...")
try:
    # Force Eager mode for gradients
    config = AutoConfig.from_pretrained(EXTRACT_FOLDER)
    config.attn_implementation = "eager"

    model = AutoModelForImageClassification.from_pretrained(
        EXTRACT_FOLDER, config=config, ignore_mismatched_sizes=True
    ).to(device).eval()

    processor = AutoImageProcessor.from_pretrained(EXTRACT_FOLDER)

    # Wrap model (Fixes 'ImageClassifierOutput' error)
    class ModelWrapper(torch.nn.Module):
        def __init__(self, m): super().__init__(); self.m = m
        def forward(self, x): return self.m(x).logits

    wrapped_model = ModelWrapper(model)
    print("‚úÖ Model Loaded.")
except Exception as e:
    print(f"‚ùå Error: {e}")

# --- 4. DEFINE GRAD-CAM LOGIC ---
def reshape_transform(tensor, height=14, width=14):
    # Reshape 1D patches -> 2D Image
    result = tensor[:, 1:, :] if tensor.shape[1] != 196 else tensor
    grid_size = int(np.sqrt(result.shape[1]))
    result = result.transpose(1, 2).reshape(tensor.size(0), result.size(2), grid_size, grid_size)
    return result

# Target the last layer
target_layers = [model.vision_model.encoder.layers[-1].layer_norm1]
cam = GradCAMPlusPlus(model=wrapped_model, target_layers=target_layers, reshape_transform=reshape_transform)

# --- 5. VISUALIZATION FUNCTION ---
def explain_heatmap(image_bytes, filename):
    print(f"\nüîç Deep Dive into: {filename}")
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    img_resized = np.array(image.resize((224, 224)))
    img_float = np.float32(img_resized) / 255

    # Preprocess
    inputs = processor(images=image, return_tensors="pt").to(device)

    # 1. Get Prediction
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        top_conf, top_idx = torch.max(probs, dim=-1)
        pred_label = model.config.id2label[top_idx.item()]

    # 2. Generate Raw Activation Map (Grayscale)
    # targets=None means "Why did you predict this class?"
    grayscale_cam = cam(input_tensor=inputs['pixel_values'], targets=None)[0, :]

    # 3. Apply Thresholding (The Cleanup Step)
    threshold_cam = grayscale_cam.copy()
    threshold_cam[threshold_cam < 0.2] = 0  # Remove noise < 20%

    # 4. Create Overlays
    heatmap_raw = show_cam_on_image(img_float, grayscale_cam, use_rgb=True)
    heatmap_clean = show_cam_on_image(img_float, threshold_cam, use_rgb=True)

    # --- PLOT THE PROCESS ---
    fig, ax = plt.subplots(1, 4, figsize=(20, 5))

    # Original
    ax[0].imshow(img_resized)
    ax[0].set_title(f"1. Original\nPred: {pred_label.upper()}")
    ax[0].axis('off')

    # Raw Grayscale (The "Brain Scan")
    ax[1].imshow(grayscale_cam, cmap='gray')
    ax[1].set_title("2. Raw Attention Map\n(White = High Importance)")
    ax[1].axis('off')

    # Color Heatmap (Standard)
    ax[2].imshow(heatmap_raw)
    ax[2].set_title("3. Standard GradCAM\n(Before Cleaning)")
    ax[2].axis('off')

    # Final Result
    ax[3].imshow(heatmap_clean)
    ax[3].set_title("4. Final Result\n(Noise Removed)")
    ax[3].axis('off')

    plt.tight_layout()
    plt.show()

# --- 6. UPLOAD & RUN ---
print("\n‚¨áÔ∏è Upload an image to see how the heatmap is made ‚¨áÔ∏è")
uploaded = files.upload()

for fname, content in uploaded.items():
    explain_heatmap(content, fname)

In [None]:
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
import io
from PIL import Image
from transformers import AutoModelForImageClassification, AutoImageProcessor, AutoConfig
from google.colab import files

# --- 1. SETUP & LOAD MODEL ---
print("‚è≥ Loading model...")
MODEL_PATH = "sida_model_extracted" # Or your huggingface path
device = "cuda" if torch.cuda.is_available() else "cpu"

try:
    # Force Eager mode so we can manually run .backward()
    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.attn_implementation = "eager"

    model = AutoModelForImageClassification.from_pretrained(
        MODEL_PATH, config=config
    ).to(device).eval()

    processor = AutoImageProcessor.from_pretrained(MODEL_PATH)
    print("‚úÖ Model Loaded.")
except Exception as e:
    print(f"‚ùå Error: {e}")

# --- 2. DEFINE HOOKS TO CATCH THE NUMBERS ---
# These variables will store the raw data passing through the model
gradients = None
activations = None

def backward_hook(module, grad_input, grad_output):
    global gradients
    gradients = grad_output[0] # Capture the gradients flowing backward

def forward_hook(module, input, output):
    global activations
    activations = output # Capture the features flowing forward

# Attach hooks to the final LayerNorm layer (The standard target for ViT)
target_layer = model.vision_model.encoder.layers[-1].layer_norm1
target_layer.register_full_backward_hook(backward_hook)
target_layer.register_forward_hook(forward_hook)

# --- 3. THE MATH EXPLAINER FUNCTION ---
def deep_dive_heatmap(image_bytes):
    global gradients, activations

    print("\n" + "="*60)
    print("üßÆ GRAD-CAM MATH: UNDER THE HOOD")
    print("="*60)

    # 1. Prepare Image
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)

    # 2. Forward Pass (Get Prediction)
    model.zero_grad()
    outputs = model(**inputs)
    logits = outputs.logits

    # Get top class
    probs = torch.nn.functional.softmax(logits, dim=-1)
    top_id = torch.argmax(probs, dim=-1).item()
    print(f"üéØ Target Class: {model.config.id2label[top_id]} (Score: {logits[0, top_id].item():.4f})")

    # 3. Backward Pass (Calculate Gradients)
    # We ask: "How much does every pixel in the last layer affect this score?"
    logits[0, top_id].backward()

    # --- THE NUMBERS ---

    # A. Reshape from 1D Sequence to 2D Grid
    # SigLIP Output: (Batch, 196 Patches, 768 Channels) -> We want (768, 14, 14)
    # Note: SigLIP usually has no CLS token at index 0, so 196 tokens = 14x14
    acts = activations.detach().cpu() # (1, 196, 768)
    grads = gradients.detach().cpu()  # (1, 196, 768)

    # Check for CLS token (If 197 tokens, drop first one)
    if acts.shape[1] != 196:
        acts = acts[:, 1:, :]
        grads = grads[:, 1:, :]

    # Reshape to (Channels, Height, Width)
    # Transpose (1, 196, 768) -> (1, 768, 196) -> View (768, 14, 14)
    acts = acts.transpose(1, 2).view(768, 14, 14)
    grads = grads.transpose(1, 2).view(768, 14, 14)

    print(f"\n[Step 1] The Raw Data (Feature Maps & Gradients)")
    print(f"   ‚îî‚îÄ‚îÄ Activation Shape: {acts.shape} (768 Feature Maps of size 14x14)")
    print(f"   ‚îî‚îÄ‚îÄ Gradient Shape:   {grads.shape} (How sensitive the score is to each map)")

    # B. Calculate Weights (Global Average Pooling of Gradients)
    # Equation: alpha_k = Mean(Gradients_k)
    weights = torch.mean(grads, dim=(1, 2)) # Average each 14x14 grid into 1 number

    print(f"\n[Step 2] Calculating Importance Weights")
    print(f"   ‚îî‚îÄ‚îÄ Operation: Average the gradients for each of the 768 channels.")
    print(f"   ‚îî‚îÄ‚îÄ Result: A vector of 768 weights.")
    print(f"   ‚îî‚îÄ‚îÄ Sample Weights (First 5 channels):")
    print(f"       {weights[:5].numpy()}")

    # Find the most important feature channel
    best_channel = torch.argmax(weights).item()
    print(f"   ‚îî‚îÄ‚îÄ Most Important Channel: #{best_channel} (Weight: {weights[best_channel]:.4f})")

    # C. Weighted Combination
    # Equation: Heatmap = sum(Weight_k * Activation_k)
    # We multiply each of the 768 maps by its weight and add them up
    cam = torch.zeros(14, 14, dtype=torch.float32)
    for i, w in enumerate(weights):
        cam += w * acts[i, :, :]

    print(f"\n[Step 3] Generating the Heatmap")
    print(f"   ‚îî‚îÄ‚îÄ Equation: $L_{{Grad-CAM}} = ReLU(\\sum_k \\alpha_k A^k)$")
    print(f"   ‚îî‚îÄ‚îÄ We sum 768 maps weighted by importance.")
    print(f"   ‚îî‚îÄ‚îÄ Raw Heatmap Value Sample (Center Pixel): {cam[7,7].item():.4f}")

    # D. ReLU (Rectified Linear Unit)
    # We only care about features that *increase* the score (Positive), not decrease it.
    cam = torch.maximum(cam, torch.tensor(0.0))

    # E. Normalize & Visualize
    cam = cam.numpy()
    cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam)) # Scale 0 to 1
    cam = cv2.resize(cam, (224, 224))

    # Plotting
    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    overlay = cv2.addWeighted(np.array(image.resize((224, 224))), 0.6, heatmap, 0.4, 0)

    fig, ax = plt.subplots(1, 2, figsize=(10, 5))
    ax[0].imshow(image)
    ax[0].set_title("Original")
    ax[0].axis('off')

    ax[1].imshow(overlay)
    ax[1].set_title("Manual Grad-CAM Result")
    ax[1].axis('off')
    plt.show()

# --- 4. RUN UPLOAD ---
print("\n‚¨áÔ∏è Upload an image to calculate its math ‚¨áÔ∏è")
uploaded = files.upload()

for fname, content in uploaded.items():
    deep_dive_heatmap(content)