# We need to get the predictions on a photo with the ground truth public dataset faster rcnn

In [None]:
import re

def extract_id(filename):
    """Extracts the numeric ID from filenames like 'shadow-0-aug_124.jpg'."""
    match = re.search(r'aug_(\d+)', filename)
    return int(match.group(1)) if match else None

# Example usage:
filenames = [
    "shadow-0-aug_124.jpg",
    "sun_flare-0-aug_10.jpg",
    "shadow-0-aug_125.jpg",
    "sun_flare-0-aug_101.jpg",
    "fog-0-aug_99.jpg"
]

ids = [extract_id(f) for f in filenames]
print(ids)  # Output: [124, 10, 125, 101, 99]


In [None]:
print("Importing libraries...")
import torch
from torch.amp import autocast, GradScaler
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import Dataset
import torchvision.transforms as T
import torchvision.transforms.functional as F
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.ops import MultiScaleRoIAlign
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shutil
import json
from PIL import Image
import contextlib
import io
import time
import random
from torch.utils.data import Subset
from datetime import datetime
from tqdm import tqdm

class Compose:
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target

class RandomHorizontalFlip(object):
    def __init__(self, prob):
        self.prob = prob

    def __call__(self, image, target):
        # Check image type
        if not isinstance(image, (torch.Tensor, Image.Image)):
            raise TypeError(f"Unsupported image type: {type(image)}. Expected torch.Tensor or PIL.Image.")
        
        if torch.rand(1) < self.prob:
            if isinstance(image, torch.Tensor):
                width = image.shape[-1]
                image = F.hflip(image)
            else:
                width, _ = image.size
                image = F.hflip(image)
            
            # Flip bounding boxes
            bbox = target["boxes"]
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # Flip x-coordinates
            target["boxes"] = bbox
        return image, target

class RandomBrightnessCont(object):
    def __init__(self, brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5):
        self.color_jitter = ColorJitter(brightness, contrast, saturation, hue)
        self.p = p

    def __call__(self, image, target):
        if random.random() < self.p:
            image = self.color_jitter(image)
        return image, target

class RandomBlur(object):
    def __init__(self, kernel_size=3, p=0.5):
        self.blur = GaussianBlur(kernel_size)
        self.p = p

    def __call__(self, image, target):
        if random.random() < self.p:
            image = self.blur(image)
        return image, target

import math
import torch
import torchvision.transforms.functional as F
from PIL import Image

class RandomRotate(object):
    def __init__(self, angle_range=10, p=0.5):
        self.angle_range = angle_range
        self.p = p

    def __call__(self, image, target):
        if random.random() >= self.p:
            return image, target

        angle = random.uniform(-self.angle_range, self.angle_range)
        original_width, original_height = self._get_image_size(image)

        # Rotate image with expansion to get new dimensions
        image_pil = image if isinstance(image, Image.Image) else F.to_pil_image(image)
        image_pil_rotated = F.rotate(image_pil, angle, expand=True)
        new_width, new_height = image_pil_rotated.size

        # Convert back to tensor if needed
        image = F.to_tensor(image_pil_rotated) if isinstance(image, torch.Tensor) else image_pil_rotated

        # Rotate bounding boxes
        boxes = target['boxes']
        if len(boxes) == 0:
            return image, target

        # Compute rotation matrix with expansion offset
        cx_orig = original_width / 2
        cy_orig = original_height / 2

        # Calculate expansion offset (min_x, min_y)
        corners_original = torch.tensor([
            [0, 0],
            [original_width, 0],
            [original_width, original_height],
            [0, original_height]
        ])
        corners_rotated = self._rotate_points(corners_original, -angle, (cx_orig, cy_orig))
        min_x = corners_rotated[:, 0].min()
        min_y = corners_rotated[:, 1].min()

        # Rotate and translate box corners
        boxes_rotated = []
        for box in boxes:
            x1, y1, x2, y2 = box
            corners = torch.tensor([
                [x1, y1], [x2, y1], [x2, y2], [x1, y2]
            ])
            corners_rot = self._rotate_points(corners, -angle, (cx_orig, cy_orig))
            corners_rot -= torch.tensor([[min_x, min_y]])  # Adjust for expansion

            # Clamp to new image bounds
            x_min = max(0.0, corners_rot[:, 0].min().item())
            y_min = max(0.0, corners_rot[:, 1].min().item())
            x_max = min(new_width, corners_rot[:, 0].max().item())
            y_max = min(new_height, corners_rot[:, 1].max().item())

            if x_max > x_min and y_max > y_min:
                boxes_rotated.append([x_min, y_min, x_max, y_max])

        target['boxes'] = torch.tensor(boxes_rotated, dtype=torch.float32) if boxes_rotated else torch.zeros((0, 4), dtype=torch.float32)
        return image, target

    def _rotate_points(self, points, angle, center):
        angle_rad = math.radians(angle)
        cos_theta = math.cos(angle_rad)
        sin_theta = math.sin(angle_rad)
        cx, cy = center

        # Translate points to origin
        translated = points - torch.tensor([[cx, cy]])

        # Apply rotation
        x_rot = translated[:, 0] * cos_theta - translated[:, 1] * sin_theta
        y_rot = translated[:, 0] * sin_theta + translated[:, 1] * cos_theta

        # Translate back
        rotated_points = torch.stack([x_rot + cx, y_rot + cy], dim=1)
        return rotated_points

    def _get_image_size(self, image):
        if isinstance(image, torch.Tensor):
            return image.shape[-1], image.shape[-2]
        elif isinstance(image, Image.Image):
            return image.size
        else:
            raise TypeError("Unsupported image type.")
    
class RandomZoom(object):
    def __init__(self, zoom_range=(1.0, 2.0), p=0.5):
        self.zoom_range = zoom_range
        self.p = p

    def __call__(self, image, target):
        if random.random() < self.p:
            boxes = target['boxes']
            if len(boxes) == 0:
                return image, target

            box_idx = random.randint(0, len(boxes) - 1)
            x1, y1, x2, y2 = boxes[box_idx].numpy()

            width, height = image.size
            center_x, center_y = (x1 + x2) / 2, (y1 + y2) / 2
            box_width, box_height = x2 - x1, y2 - y1

            zoom_factor = random.uniform(*self.zoom_range)
            crop_width = box_width / zoom_factor
            crop_height = box_height / zoom_factor

            crop_x1 = max(0, center_x - crop_width / 2)
            crop_y1 = max(0, center_y - crop_height / 2)
            crop_x2 = min(width, center_x + crop_width / 2)
            crop_y2 = min(height, center_y + crop_height / 2)

            image = image.crop((int(crop_x1), int(crop_y1), int(crop_x2), int(crop_y2)))
            target['boxes'][:, [0, 2]] -= crop_x1
            target['boxes'][:, [1, 3]] -= crop_y1
            target['boxes'][:, [0, 2]] = target['boxes'][:, [0, 2]].clamp(0, crop_x2 - crop_x1)
            target['boxes'][:, [1, 3]] = target['boxes'][:, [1, 3]].clamp(0, crop_y2 - crop_y1)

        return image, target

    
def get_augmented_transform(train):
    """
    Get transform pipeline with augmentations for training or validation
    """
    transforms = []
    
    if train:
        # Applies a series of data augmentations specifically for the training set
        transforms.extend([
            RandomHorizontalFlip(0.5),  # Horizontally flips the image with a 50% probability
            RandomBrightnessCont(  # Adjusts brightness, contrast, saturation, and hue with specified ranges
                brightness=0.3, 
                contrast=0.4, 
                saturation=0.5, 
                hue=0.5, 
                p=0.5  # Applies these adjustments with a 50% probability
            ),
            RandomBlur(kernel_size=3, p=0.5),  # Applies Gaussian blur with a kernel size of 3, 30% chance
            RandomRotate(angle_range=50, p=0.5),  # Rotates the image by -10 to +10 degrees, 30% chance

            RandomZoom(zoom_range=(0.05, 0.99), p=1)  # Zooms the image by a factor between 0.05 and 0.9, 60% chance
        ])

    # Converts the image to a tensor for model input
    transforms.append(ToTensor())
    
    return Compose(transforms)

def visualize_augmentations(dataset, num_samples=5):
    fig, axes = plt.subplots(num_samples, 2, figsize=(12, 4 * num_samples))
    
    for i in range(num_samples):
        idx = random.randint(0, len(dataset) - 1)
        orig_img, orig_target = dataset[idx]
        
        if isinstance(orig_img, torch.Tensor):
            orig_img_np = orig_img.permute(1, 2, 0).numpy()
        else:
            orig_img_np = np.array(orig_img)
        
        axes[i, 0].imshow(orig_img_np)
        axes[i, 0].set_title('Original')
        
        aug_img, aug_target = dataset[idx]
        aug_img_np = aug_img.permute(1, 2, 0).numpy() if isinstance(aug_img, torch.Tensor) else np.array(aug_img)
        
        axes[i, 1].imshow(aug_img_np)
        axes[i, 1].set_title('Augmented')
    
    plt.tight_layout()
    plt.show()

scaler = GradScaler()
class HazmatDataset(Dataset):
    def __init__(self, data_dir, annotations_file, transforms=None):
        self.data_dir = data_dir
        self.transforms = transforms

        # Load annotations
        with open(annotations_file) as f:
            data = json.load(f)

        self.images = {img['id']: img for img in data['images']}
        self.annotations = data['annotations']

        # Create image_id to annotations mapping
        self.img_to_anns = {}
        for ann in self.annotations:
            img_id = ann['image_id']
            if img_id not in self.img_to_anns:
                self.img_to_anns[img_id] = []
            self.img_to_anns[img_id].append(ann)

        self.ids = list(self.images.keys())

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        img_info = self.images[img_id]

        # Load image
        img_path = os.path.join(self.data_dir, 'images', img_info['file_name'])
        img = Image.open(img_path).convert('RGB')

        # Get annotations
        anns = self.img_to_anns.get(img_id, [])

        boxes = []
        labels = []
        areas = []
        iscrowd = []

        for ann in anns:
            bbox = ann['bbox']
            # Convert [x, y, w, h] to [x1, y1, x2, y2]
            x_min = bbox[0]
            y_min = bbox[1]
            x_max = bbox[0] + bbox[2]
            y_max = bbox[1] + bbox[3]

            # Filter out degenerate boxes
            if x_max > x_min and y_max > y_min:
                boxes.append([x_min, y_min, x_max, y_max])
                labels.append(ann['category_id'])
                areas.append(ann['area'])
                iscrowd.append(ann['iscrowd'])

        # Convert to tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        areas = torch.as_tensor(areas, dtype=torch.float32)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([img_id]),
            'area': areas,
            'iscrowd': iscrowd
        }

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.ids)

    
class ToTensor(object):
    def __call__(self, image, target):
        # Convert PIL image to tensor
        image = F.to_tensor(image)
        return image, target

class RandomHorizontalFlip(object):
    def __init__(self, prob):
        self.prob = prob

    def __call__(self, image, target):
        if torch.rand(1) < self.prob:
            height, width = image.shape[-2:]
            image = F.hflip(image)
            # Flip bounding boxes
            bbox = target["boxes"]
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # Flip x-coordinates
            target["boxes"] = bbox
        return image, target

def get_transform(train):
    transforms = []
    # Convert PIL image to tensor
    transforms.append(ToTensor())
    if train:
        # Add training augmentations here if needed
        transforms.append(RandomHorizontalFlip(0.5))
    return transforms

def collate_fn(batch):
    return tuple(zip(*batch))

def train_one_epoch(model, optimizer, data_loader, device, scaler):
    model.train()
    total_loss = 0
    total_classifier_loss = 0
    total_box_reg_loss = 0
    total_objectness_loss = 0
    total_rpn_box_reg_loss = 0

    # Voeg tqdm toe om de voortgang te tonen
    progress_bar = tqdm(data_loader, desc="Training", leave=True)
    
    for images, targets in progress_bar:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Wrap the forward pass in autocast
        with autocast(device_type='cuda'):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        # Scale the loss and call backward
        scaler.scale(losses).backward()
        # Unscales the gradients and calls or skips optimizer.step()
        scaler.step(optimizer)
        # Updates the scale for next iteration
        scaler.update()

        # Bereken de totalen
        total_loss += losses.item()
        total_classifier_loss += loss_dict['loss_classifier'].item()
        total_box_reg_loss += loss_dict['loss_box_reg'].item()
        total_objectness_loss += loss_dict['loss_objectness'].item()
        total_rpn_box_reg_loss += loss_dict['loss_rpn_box_reg'].item()

        # Update tqdm-balk
        progress_bar.set_postfix({
            "Loss": f"{losses.item():.4f}",
            "Classifier": f"{loss_dict['loss_classifier'].item():.4f}",
            "BoxReg": f"{loss_dict['loss_box_reg'].item():.4f}",
        })

    avg_loss = total_loss / len(data_loader)
    avg_classifier_loss = total_classifier_loss / len(data_loader)
    avg_box_reg_loss = total_box_reg_loss / len(data_loader)
    avg_objectness_loss = total_objectness_loss / len(data_loader)
    avg_rpn_box_reg_loss = total_rpn_box_reg_loss / len(data_loader)

    return avg_loss, avg_classifier_loss, avg_box_reg_loss, avg_objectness_loss, avg_rpn_box_reg_loss



# Load ground truth annotations
coco_val = COCO('data/data_faster_rcnn/val/annotations/instances_val.json')

# Prepare predictions in COCO format
# Assuming you have a function to convert model outputs to COCO format
# Conversion to COCO Format
def convert_to_coco_format(outputs, image_ids):
    coco_results = []
    for output, image_id in zip(outputs, image_ids):
        boxes = output['boxes'].cpu().numpy()
        scores = output['scores'].cpu().numpy()
        labels = output['labels'].cpu().numpy()
        
        for box, score, label in zip(boxes, scores, labels):
            coco_results.append({
                'image_id': image_id,
                'category_id': int(label),
                'bbox': [box[0], box[1], box[2] - box[0], box[3] - box[1]],
                'score': float(score)
            })
    return coco_results

# Validation Function
def validate_old(model, data_loader, coco_gt, device):
    model.eval()
    results = []

    # Add tqdm
    progress_bar = tqdm(data_loader, desc="Validation", leave=True)

    with torch.no_grad():
        for images, targets in progress_bar:
            images = list(image.to(device) for image in images)
            outputs = model(images)
            
            image_ids = [target['image_id'].item() for target in targets]
            coco_results = convert_to_coco_format(outputs, image_ids)
            results.extend(coco_results)

            # Update tqdm-bar
            progress_bar.set_postfix({"Processed": len(results)})

    if not results:
        print("No predictions generated. Skipping evaluation.")
        return [0.0] * 6  # Return dummy metrics for empty results

    # Suppress COCOeval output
    with contextlib.redirect_stdout(io.StringIO()):
        coco_dt = coco_gt.loadRes(results)
        coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

    return coco_eval.stats
def validate(model, data_loader, coco_gt, device):
    model.eval()
    results = []
    total_loss = 0
    total_classifier_loss = 0
    total_box_reg_loss = 0
    total_objectness_loss = 0
    total_rpn_box_reg_loss = 0

    progress_bar = tqdm(data_loader, desc="Validation", leave=True)

    with torch.no_grad():
        for images, targets in progress_bar:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Compute detections for mAP (in eval mode)
            outputs = model(images)
            
            # Convert outputs to COCO format
            image_ids = [t['image_id'].item() for t in targets]
            coco_results = convert_to_coco_format(outputs, image_ids)
            results.extend(coco_results)

            # Compute validation loss by temporarily switching to train mode
            model.train()  # Switch to train mode to compute loss
            loss_dict = model(images, targets)
            model.eval()   # Switch back to eval mode

            # Sum the losses
            losses = sum(loss for loss in loss_dict.values())
            total_loss += losses.item()
            total_classifier_loss += loss_dict['loss_classifier'].item()
            total_box_reg_loss += loss_dict['loss_box_reg'].item()
            total_objectness_loss += loss_dict['loss_objectness'].item()
            total_rpn_box_reg_loss += loss_dict['loss_rpn_box_reg'].item()

            # Update progress bar with validation loss
            progress_bar.set_postfix({
                "Processed": len(results),
                "Val Loss": f"{losses.item():.4f}"
            })

    # Calculate average validation losses
    avg_loss = total_loss / len(data_loader)
    avg_classifier_loss = total_classifier_loss / len(data_loader)
    avg_box_reg_loss = total_box_reg_loss / len(data_loader)
    avg_objectness_loss = total_objectness_loss / len(data_loader)
    avg_rpn_box_reg_loss = total_rpn_box_reg_loss / len(data_loader)

    # Compute COCO metrics if there are results
    if not results:
        print("No predictions generated. Skipping evaluation.")
        return [0.0] * 6, (avg_loss, avg_classifier_loss, avg_box_reg_loss, avg_objectness_loss, avg_rpn_box_reg_loss)

    # Evaluate using COCO API
    with contextlib.redirect_stdout(io.StringIO()):
        coco_dt = coco_gt.loadRes(results)
        coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

    return coco_eval.stats, (avg_loss, avg_classifier_loss, avg_box_reg_loss, avg_objectness_loss, avg_rpn_box_reg_loss)

# Custom backbone to return a dictionary of feature maps
class BackboneWithChannels(torch.nn.Module):
    def __init__(self, backbone):
        super().__init__()
        self.backbone = backbone
    def forward(self, x):
        x = self.backbone(x)
        return {'0': x}
    
# Function to create a subset of the dataset
def create_subset(dataset, percentage):
    """
    Create a subset of the dataset based on the given percentage.
    
    Parameters:
    - dataset: The full dataset.
    - percentage: The fraction of the dataset to use (value between 0.0 and 1.0).
    
    Returns:
    - subset: A subset of the dataset containing the specified percentage of data.
    """
    if not (0.0 < percentage <= 1.0):
        raise ValueError("Percentage must be between 0.0 and 1.0.")
    
    # Determine the subset size
    total_samples = len(dataset)
    subset_size = int(total_samples * percentage)
    
    # Shuffle and select a random subset of indices
    indices = list(range(total_samples))
    random.shuffle(indices)
    subset_indices = indices[:subset_size]
    
    return Subset(dataset, subset_indices)

def create_directory(base_path="data/models"):
    """
    Create a directory inside the base path named 'faster-rcnn-finetuned-{date}' 
    to store models and logs. The name includes the current date and time in the format 'DD-MM-YYYY HH:MM:SS'.

    Parameters:
    - base_path (str): Base directory where the new directory will be created.

    Returns:
    - directory_path (str): Full path to the created directory.
    """
    # Get the current date and time
    current_time = datetime.now().strftime("%d-%m-%Y %H:%M:%S")
    
    # Define the full directory path
    directory_name = f"faster-rcnn-finetuned-{current_time}"
    directory_path = os.path.join(base_path, directory_name)
    
    # Create the directory
    os.makedirs(directory_path, exist_ok=True)
    
    print(f"Directory created: {directory_path}")
    return directory_path

def train_model_old(directory, model, optimizer, train_loader, device, train_metrics_list, best_val_map, lr_scheduler, val_loader, coco_val, scaler, epoch):
    
    epoch+=1
    # Start the timer
    start_time = time.time()
    
    # Train for one epoch
    train_loss, train_classifier_loss, train_box_reg_loss, train_objectness_loss, train_rpn_box_reg_loss = train_one_epoch(
        model, optimizer, train_loader, device, scaler)
    
    # Validate and get all COCO-metrics
    val_metrics = validate(model, val_loader, coco_val, device)
    val_map = val_metrics[0]  # mAP@IoU=0.50:0.95
    
    # Stop the timer
    end_time = time.time()
    elapsed_time = end_time - start_time
    minutes, seconds = divmod(elapsed_time, 60)
    
    # Obtain the current learning rate
    current_lr = optimizer.param_groups[0]['lr']
    
    # Prepare data for logging
    data = {
        "epoch": epoch,
        "time_elapsed": (int(minutes), int(seconds)),
        "learning_rate": current_lr,
        "train_loss": train_loss,
        "classifier_loss": train_classifier_loss,
        "box_reg_loss": train_box_reg_loss,
        "objectness_loss": train_objectness_loss,
        "rpn_box_reg_loss": train_rpn_box_reg_loss,
        "val_metrics": val_metrics
    }
    
    # Append current epoch data to metrics list
    train_metrics_list.append(data)
    
    # Print summary for this epoch
    print(f"📊 Epoch {epoch} | ⏳ Time: {int(minutes)}m {int(seconds)}s | 🔄 LR: {current_lr:.6f}")
    print(f"📉 Train Loss: {train_loss:.4f} | 🎯 Classifier: {train_classifier_loss:.4f} | 📦 Box Reg: {train_box_reg_loss:.4f}")
    print(f"🔍 Objectness: {train_objectness_loss:.4f} | 🗂️ RPN Box Reg: {train_rpn_box_reg_loss:.4f}")
    print(f"🧪 mAP | 🟢 mAP@IoU=0.50:0.95: {val_metrics[0]:.4f} | 🔵 mAP@IoU=0.50: {val_metrics[1]:.4f} | 🟣 mAP@IoU=0.75: {val_metrics[2]:.4f}")
    print(f"📏 Small mAP: {val_metrics[3]:.4f} | 📐 Medium mAP: {val_metrics[4]:.4f} | 📏 Large mAP: {val_metrics[5]:.4f}")
    
    # Save epoch data to a log file
    save_epoch_data(directory, data)
    
    # Update learning rate
    lr_scheduler.step()
    
    # Save the latest checkpoint with all metrics
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'val_map': val_map,
        'train_metrics_list': train_metrics_list  # Save all metrics
    }
    torch.save(checkpoint, os.path.join(directory, "latest_model.pth"))
    
    # Save the best model if the val_map is the highest so far
    if val_map > best_val_map:
        best_val_map = val_map
        torch.save(checkpoint, os.path.join(directory, "best_model.pth"))
    
    return best_val_map

def train_model(
    directory, 
    model, 
    optimizer, 
    train_loader, 
    device, 
    train_metrics_list, 
    best_val_map, 
    lr_scheduler, 
    val_loader, 
    coco_val, 
    scaler, 
    epoch, 
    # Early stopping parameters
    patience=5, 
    delta=0.001, 
    monitor='val_map', 
    maximize=True,
    epochs_no_improve=0,
    early_stop=False
):
    epoch += 1
    start_time = time.time()
    
    # Train one epoch
    train_loss, train_classifier_loss, train_box_reg_loss, train_objectness_loss, train_rpn_box_reg_loss = train_one_epoch(
        model, optimizer, train_loader, device, scaler)
    
    # Validate and get metrics and losses
    val_metrics, val_losses = validate(model, val_loader, coco_val, device)
    val_loss, val_classifier_loss, val_box_reg_loss, val_objectness_loss, val_rpn_box_reg_loss = val_losses
    val_map = val_metrics[0]  # mAP@IoU=0.50:0.95
    
    # Determine current metric based on monitor
    current_metric = val_map if monitor == 'val_map' else val_loss
    
    # Check if current metric is the best
    if maximize:
        improved = (current_metric - best_val_map) > delta
    else:
        improved = (best_val_map - current_metric) > delta
    
    if improved:
        best_val_map = current_metric
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
    
    # Check if early stopping is triggered
    if epochs_no_improve >= patience:
        early_stop = True
        print(f"🚨 Early stopping triggered at epoch {epoch}!")
    
    # --- Rest of the function remains unchanged until return ---
    end_time = time.time()
    elapsed_time = end_time - start_time
    minutes, seconds = divmod(elapsed_time, 60)
    
    current_lr = optimizer.param_groups[0]['lr']
    
    # Prepare data for logging (add early stopping info)
    data = {
        "epoch": epoch,
        "time_elapsed": (int(minutes), int(seconds)),
        "learning_rate": current_lr,
        "train_loss": train_loss,
        "classifier_loss": train_classifier_loss,
        "box_reg_loss": train_box_reg_loss,
        "objectness_loss": train_objectness_loss,
        "rpn_box_reg_loss": train_rpn_box_reg_loss,
        "val_loss": val_loss,
        "val_classifier_loss": val_classifier_loss,
        "val_box_reg_loss": val_box_reg_loss,
        "val_objectness_loss": val_objectness_loss,
        "val_rpn_box_reg_loss": val_rpn_box_reg_loss,
        "val_metrics": val_metrics,
        "early_stop": early_stop,
        "epochs_no_improve": epochs_no_improve
    }
    
    train_metrics_list.append(data)
    
    # Print summary with validation losses
    print(f"📊 Epoch {epoch} | ⏳ Time: {int(minutes)}m {int(seconds)}s | 🔄 LR: {current_lr:.6f}")
    print(f"📉 Train Loss: {train_loss:.4f} | 🎯 Classifier: {train_classifier_loss:.4f} | 📦 Box Reg: {train_box_reg_loss:.4f}")
    print(f"🔍 Objectness: {train_objectness_loss:.4f} | 🗂️ RPN Box Reg: {train_rpn_box_reg_loss:.4f}")
    print(f"📉 Val Loss: {val_loss:.4f} | 🎯 Val Classifier: {val_classifier_loss:.4f} | 📦 Val Box Reg: {val_box_reg_loss:.4f}")
    print(f"🔍 Val Objectness: {val_objectness_loss:.4f} | 🗂️ Val RPN Box Reg: {val_rpn_box_reg_loss:.4f}")
    print(f"🧪 mAP | 🟢 mAP@IoU=0.50:0.95: {val_metrics[0]:.4f} | 🔵 mAP@IoU=0.50: {val_metrics[1]:.4f} | 🟣 mAP@IoU=0.75: {val_metrics[2]:.4f}")
    print(f"📏 Small mAP: {val_metrics[3]:.4f} | 📐 Medium mAP: {val_metrics[4]:.4f} | 📏 Large mAP: {val_metrics[5]:.4f}")
    print(f"🛑 Early stopping counter: {epochs_no_improve}/{patience}")
    
    save_epoch_data(directory, data)

    # Save checkpoints and update learning rate
    lr_scheduler.step()
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'val_map': val_map,
        'train_metrics_list': train_metrics_list
    }
    torch.save(checkpoint, os.path.join(directory, "latest_model.pth"))
    
    if improved:
        torch.save(checkpoint, os.path.join(directory, "best_model.pth"))
    
    return best_val_map, epochs_no_improve, early_stop

from datetime import datetime
def save_epoch_data(directory, data):
    """
    Save training statistics for each epoch in a text file.

    Parameters:
    - directory (str): Path to the directory.
    - data (dict): Contains data on metrics such as epoch, losses, and validation metrics.
    """
    log_file_path = os.path.join(directory, "training_log.txt")
    
    lines = [
        f"Datetime {datetime.now()}\n",
        f"Epoch {data['epoch']} | Time: {data['time_elapsed'][0]}m {data['time_elapsed'][1]}s | LR: {data['learning_rate']:.10f}\n",
        f"Train Loss: {data['train_loss']:.4f} | Classifier: {data['classifier_loss']:.4f} | Box Reg: {data['box_reg_loss']:.4f}\n",
        f"Objectness: {data['objectness_loss']:.4f} | RPN Box Reg: {data['rpn_box_reg_loss']:.4f}\n",
        f"Validation Metrics: | mAP@IoU=0.50:0.95: {data['val_metrics'][0]:.4f} | mAP@IoU=0.50: {data['val_metrics'][1]:.4f} | mAP@IoU=0.75: {data['val_metrics'][2]:.4f}\n",
        f"Small mAP: {data['val_metrics'][3]:.4f} | Medium mAP: {data['val_metrics'][4]:.4f} | Large mAP: {data['val_metrics'][5]:.4f}\n",
        f"Early Stop: {data['early_stop']} | Epochs no improvement: {data['epochs_no_improve']}\n\n"
    ]
    with open(log_file_path, "a") as log_file:
        log_file.writelines(lines)
# Initialize model
num_classes = 2  # hazmat code and background

# Create ResNet-101 backbone with FPN
backbone = resnet_fpn_backbone('resnet101', pretrained=True)

# Define anchor generator for FPN
anchor_generator = AnchorGenerator(
    sizes=((32,), (64,), (128,), (256,), (512,)),
    aspect_ratios=((0.5, 1.0, 2.0),) * 5
)

# Multi-scale RoI pooling for FPN
roi_pooler = MultiScaleRoIAlign(
    featmap_names=['0', '1', '2', '3', '4'],
    output_size=7,
    sampling_ratio=2
)

print("initializing model...")
# Initialize Faster R-CNN with ResNet-101-FPN
model = FasterRCNN(
    backbone=backbone,
    num_classes=num_classes,
    rpn_anchor_generator=anchor_generator,
    box_roi_pool=roi_pooler
)
device = torch.device('cuda:0')

# Move model to device
model.to(device)
# Load the model
# directory_finetuned_model = os.path.join("data", "models", "faster-rcnn-finetuned-20-02-2025 11:53:54")
directory_finetuned_model = os.path.join("data", "models")
device = torch.device('cuda')
model_path = os.path.join(directory_finetuned_model, 'latest_model.pth')
checkpoint = torch.load(model_path, map_location=device, weights_only=False)
val_map = checkpoint['val_map']
epoch = checkpoint['epoch']
#latest
latest_model_path = os.path.join(directory_finetuned_model, 'latest_model.pth')
checkpoint_latest = torch.load(latest_model_path, map_location=device, weights_only=False)
val_map_latest = checkpoint_latest['val_map']
epoch_latest = checkpoint_latest['epoch']

model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # Set the model to evaluation mode

print(f"Validation mAP best model: {val_map:.4f}")
print(f"Epoch best model: {epoch}")

print(f"Validation mAP latest model: {val_map_latest:.4f}")
print(f"Epoch latest model: {epoch_latest}")

def load_image(image_path, transforms=None):
    image = Image.open(image_path).convert('RGB')
    if transforms:
        for transform in transforms:
            image, _ = transform(image, target=None)  # No target during inference
    return image

# Define preprocessing transforms
test_transforms = get_transform(train=False)

# Load the image
image_path = 'images/hazard_plate.jpg'  # Replace with your image path
image = load_image(image_path, transforms=test_transforms)
image = image.to(device)
# Wrap the image in a list as the model expects a batch
with torch.no_grad():
    predictions = model([image])

import numpy as np
import matplotlib.pyplot as plt
import torch
import random
import time
import os
from PIL import Image
import matplotlib.patches as patches

def get_color_with_opacity(score):
    """
    Get a color with opacity based on the confidence score.
    Higher confidence = more red and higher opacity.
    Lower confidence = random color and lower opacity.
    """
    if score > 0.75:
        # High confidence: Red with high opacity
        color = (1, 0, 0, min(1.0, 0.3 + score))  # Red with opacity based on score
    else:
        # Low confidence: Random color with lower opacity
        color = (random.random(), random.random(), random.random(), max(0.3, score))
    return color

def draw_predictions(image, predictions, threshold=0.5, classes=['background', 'hazmat'], save_path=None):
    """
    Draw predictions on the image. If save_path is provided, the image is saved to that path,
    otherwise it is displayed.
    """
    # Convert image from tensor to numpy array
    image_np = image.cpu().permute(1, 2, 0).numpy()
    image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
    
    boxes = predictions[0]['boxes'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()
    
    # Filter predictions based on confidence threshold
    keep = scores >= threshold
    boxes = boxes[keep]
    labels = labels[keep]
    scores = scores[keep]
    
    # Create figure and axis
    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(image_np)
    
    for box, label, score in zip(boxes, labels, scores):
        if label == 1:  # Only plot hazmat codes
            x1, y1, x2, y2 = box
            color = get_color_with_opacity(score)
            
            # Draw rectangle with opacity
            rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 
                                   edgecolor=color, facecolor='none')
            ax.add_patch(rect)
            
            # Add text label with confidence score
            label_name = classes[label]
            ax.text(x1, y1, f'{label_name}: {score:.2f}', 
                    color='white', 
                    bbox=dict(facecolor=color[:3], alpha=0.6), 
                    fontsize=12)
    
    plt.axis('off')
    
    # Save or display the image
    if save_path:
        # Ensure directory exists
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        
        # Save with tight layout and no padding
        plt.savefig(save_path, bbox_inches='tight', pad_inches=0, dpi=300)
        plt.close(fig)
        print(f"Image saved to: {save_path}")
    else:
        plt.show()



def predict_image(image_path, model, device, test_transforms, threshold=0.5, save_path=None):
    """
    Load an image, predict using the model, and display or save results.
    
    Args:
        image_path (str): Path to the input image
        model: The detection model
        device: The computation device (CPU/GPU)
        test_transforms: Transforms to apply to the image
        threshold (float): Confidence threshold for predictions
        save_path (str, optional): Path to save the output image with predictions
    """
    # List of class names
    classes = ['background', 'hazmat']
    
    # Load the image
    image = load_image(image_path, transforms=test_transforms)
    image = image.to(device)
    
    # Start timing
    start_time = time.time()
    
    # Wrap the image in a list as the model expects a batch
    with torch.no_grad():
        predictions = model([image])
    
    # End timing
    end_time = time.time()
    prediction_time = end_time - start_time
    print(f"Prediction time: {prediction_time:.4f} seconds")
    
    # Filter predictions based on threshold
    boxes = predictions[0]['boxes'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()
    
    # Apply threshold filter
    keep = scores >= threshold
    boxes = boxes[keep]
    labels = labels[keep]
    scores = scores[keep]
    
    # Print the predictions
    if len(boxes) == 0:
        print("No predictions meet the threshold.")
    else:
        print("Predictions:")
        for label, score in zip(labels, scores):
            class_name = classes[label]
            print(f"  {class_name}: {score:.2f}")
    
    # Display or save the predictions
    draw_predictions(image, predictions, threshold=threshold, classes=classes, save_path=save_path)
    
    return predictions
x = predict_image(image_path='images/hazard_plate.jpg', 
              model=model, 
              device=device, 
              test_transforms=test_transforms, 
              threshold=0.5)

print(x)


In [None]:
print(x)

In [None]:
# Make code to draw the GT on the picture from public dataset
import json
import random
import matplotlib.pyplot as plt
import cv2
from collections import defaultdict

def check_images_with_annotations(annotation_file, images_dir, max_images=10):
    with open(annotation_file) as f:
        data = json.load(f)
        
        # Create a dictionary mapping image_id to all its annotations
        image_annotations = defaultdict(list)
        for annotation in data["annotations"]:
            image_annotations[annotation["image_id"]].append(annotation)
        
        # Get list of images and shuffle them
        images = data["images"]
        # random.shuffle(images)
        
        # Process up to max_images
        for image_data in images[:max_images]:
            image_id = image_data["id"]
            file_name = image_data["file_name"]
            
            # Check if this image has annotations
            if image_id not in image_annotations:
                continue
                
            # Read the image
            path_im = f"{images_dir}/{file_name}"
            image = cv2.imread(path_im)
            if image is None:
                print(f"Image not found: {path_im}")
                continue
                
            # Convert BGR (OpenCV format) to RGB (Matplotlib format)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # Plot the image
            plt.figure(figsize=(10, 10))
            plt.imshow(image)
            
            # Draw all annotations for this image
            annotations_for_image = image_annotations[image_id]
            for annotation in annotations_for_image:
                x, y, w, h = annotation["bbox"]
                x, y, w, h = round(x), round(y), round(w), round(h)
                
                # Draw the bounding box with thicker lines
                plt.gca().add_patch(plt.Rectangle((x, y), w, h, edgecolor='green', facecolor='none', linewidth=4))
                
                # Add "Ground Truth" label above each bounding box
                plt.text(x, y-5, "Ground Truth", color='green', fontsize=12, 
                         bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=1))
            
            plt.title(f"Image file: {file_name}", fontsize=12)
            plt.axis("off")
            plt.tight_layout()
            plt.show()

# Usage
annotations_file = "data/public_dataset/test/annotations/instances_test.json"
images_dir = "data/public_dataset/test/images"
check_images_with_annotations(annotations_file, images_dir, max_images=10)

In [None]:
import json
import random
import matplotlib.pyplot as plt
import cv2
import os
import torch
import numpy as np
import time
from PIL import Image
import matplotlib.patches as patches
from collections import defaultdict

def get_color_with_opacity(score):
    """
    Get a color with opacity based on the confidence score.
    Higher confidence = more red and higher opacity.
    Lower confidence = random color and lower opacity.
    """
    if score > 0.75:
        # High confidence: Red with high opacity
        color = (1, 0, 0, min(1.0, 0.3 + score))  # Red with opacity based on score
    else:
        # Low confidence: Random color with lower opacity
        color = (random.random(), random.random(), random.random(), max(0.3, score))
    return color

def load_image(image_path, transforms=None):
    """Load an image and apply transforms if provided."""
    image = Image.open(image_path).convert('RGB')
    if transforms:
        for transform in transforms:
            image, _ = transform(image, target=None)  # No target during inference
    return image

def visualize_ground_truth_and_predictions(annotation_file, images_dir, model, device, test_transforms, 
                                          max_images=10, confidence_threshold=0.5, 
                                          classes=['background', 'hazmat'], save_dir=None):
    """
    Visualize both ground truth and model predictions on images.
    
    Args:
        annotation_file (str): Path to the annotations JSON file
        images_dir (str): Directory containing the images
        model: The detection model
        device: The computation device (CPU/GPU)
        test_transforms: Transforms to apply to the image
        max_images (int): Maximum number of images to process
        confidence_threshold (float): Threshold for prediction confidence
        classes (list): List of class names
        save_dir (str, optional): Directory to save output images instead of displaying
    """
    # Load annotations
    with open(annotation_file) as f:
        data = json.load(f)
        
        # Create a dictionary mapping image_id to all its annotations
        image_annotations = defaultdict(list)
        for annotation in data["annotations"]:
            image_annotations[annotation["image_id"]].append(annotation)
        
        # Get list of images and shuffle them
        images = data["images"]
        random.shuffle(images)
        
        # Process up to max_images
        for image_data in images[:max_images]:
            image_id = image_data["id"]
            file_name = image_data["file_name"]
            
            # Check if this image has annotations
            if image_id not in image_annotations:
                continue
                
            # Read the image
            image_path = os.path.join(images_dir, file_name)
            
            # Load image for CV2/matplotlib display
            cv_image = cv2.imread(image_path)
            if cv_image is None:
                print(f"Image not found: {image_path}")
                continue
                
            # Convert BGR (OpenCV format) to RGB (Matplotlib format)
            cv_image_rgb = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
            
            # Load image for model prediction
            tensor_image = load_image(image_path, transforms=test_transforms)
            tensor_image = tensor_image.to(device)
            
            # Make predictions
            start_time = time.time()
            with torch.no_grad():
                predictions = model([tensor_image])
            prediction_time = time.time() - start_time
            print(f"Prediction time for {file_name}: {prediction_time:.4f} seconds")
            
            # Create figure and axis
            fig, ax = plt.subplots(1, figsize=(12, 10))
            ax.imshow(cv_image_rgb)
            
            # Draw ground truth annotations
            annotations_for_image = image_annotations[image_id]
            for annotation in annotations_for_image:
                x, y, w, h = annotation["bbox"]
                x, y, w, h = round(x), round(y), round(w), round(h)
                
                # Draw the bounding box with thick green lines
                rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='white', facecolor='none')
                ax.add_patch(rect)
                
                # Add "Ground Truth" label above each bounding box
                ax.text(x + w+20, y+h+20, "Ground Truth", color='white', fontsize=12, ha='right',
                            bbox=dict(facecolor='black', alpha=0.7, edgecolor='none', pad=1))
            
            # Draw model predictions
            boxes = predictions[0]['boxes'].cpu().numpy()
            labels = predictions[0]['labels'].cpu().numpy()
            scores = predictions[0]['scores'].cpu().numpy()
            
            # Filter predictions based on confidence threshold
            keep = scores >= confidence_threshold
            boxes = boxes[keep]
            labels = labels[keep]
            scores = scores[keep]
            
            for box, label, score in zip(boxes, labels, scores):
                if label == 1:  # Only plot hazmat codes (adjust as needed)
                    x1, y1, x2, y2 = box
                    color = get_color_with_opacity(score)
                    
                    # Draw rectangle with opacity
                    rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 
                                           edgecolor=color, facecolor='none')
                    ax.add_patch(rect)
                    
                    # Add text label with confidence score
                    label_name = classes[label]
                    ax.text(x1, y1-10, f'Prediction: {score:.2f}', 
                            color='white', 
                            bbox=dict(facecolor=color[:3], alpha=0.6), 
                            fontsize=12)
            
            # plt.title(f"Image: {file_name} - Ground Truth & Predictions", fontsize=14)
            plt.axis('off')
            plt.tight_layout()
            
            # Save or display the image
            if save_dir:
                # Ensure directory exists
                os.makedirs(save_dir, exist_ok=True)
                save_path = os.path.join(save_dir, f"gt_pred_{file_name}")
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0, dpi=300)
                plt.close(fig)
                print(f"Image saved to: {save_path}")
            else:
                plt.show()

# Example usage
def run_visualization(annotation_file, images_dir, model, device, test_transforms, save_dir=None):
    visualize_ground_truth_and_predictions(
        annotation_file=annotation_file,
        images_dir=images_dir,
        model=model,
        device=device,
        test_transforms=test_transforms,
        max_images=999999999999999999999999999,  # Adjust as needed
        confidence_threshold=0.5,
        classes=['background', 'hazmat'],
        save_dir=save_dir
    )

# To use this function, call it with your model and data:

# Example usage:
run_visualization(
    annotation_file="data/public_dataset/test/annotations/instances_test.json",
    images_dir="data/public_dataset/test/images",
    model=model,  # Your loaded model
    device=device,  # torch.device object
    test_transforms=test_transforms,  # Your transformation pipeline
    save_dir="output/faster_rcnn_paper_examples_2"  # Optional: directory to save results
)


In [None]:
import json
import random
import matplotlib.pyplot as plt
import cv2
import os
import torch
import numpy as np
import time
from PIL import Image
import matplotlib.patches as patches
from collections import defaultdict

def get_color_with_opacity(score):
    """
    Get a color with opacity based on the confidence score.
    Higher confidence = more red and higher opacity.
    Lower confidence = random color and lower opacity.
    """
    if score > 0.75:
        # High confidence: Red with high opacity
        color = (1, 0, 0, min(1.0, 0.3 + score))  # Red with opacity based on score
    else:
        # Low confidence: Random color with lower opacity
        color = (random.random(), random.random(), random.random(), max(0.3, score))
    return color

def load_image(image_path, transforms=None):
    """Load an image and apply transforms if provided."""
    image = Image.open(image_path).convert('RGB')
    if transforms:
        for transform in transforms:
            image, _ = transform(image, target=None)  # No target during inference
    return image

def visualize_ground_truth_and_predictions(annotation_file, images_dir, model, device, test_transforms, 
                                          confidence_threshold=0.5, 
                                          classes=['background', 'hazmat'], save_dir=None):
    """
    Visualize both ground truth and model predictions on images, ensuring each image is processed only once.
    
    Args:
        annotation_file (str): Path to the annotations JSON file
        images_dir (str): Directory containing the images
        model: The detection model
        device: The computation device (CPU/GPU)
        test_transforms: Transforms to apply to the image
        confidence_threshold (float): Threshold for prediction confidence
        classes (list): List of class names
        save_dir (str, optional): Directory to save output images instead of displaying
    """
    # Load annotations
    with open(annotation_file) as f:
        data = json.load(f)
        
        # Create a dictionary mapping image_id to all its annotations
        image_annotations = defaultdict(list)
        for annotation in data["annotations"]:
            image_annotations[annotation["image_id"]].append(annotation)
        
        # Create a dictionary mapping file_name to image_id to ensure uniqueness
        filename_to_id = {}
        for image in data["images"]:
            filename_to_id[image["file_name"]] = image["id"]
        
        # Process each unique image
        processed_images = set()
        for image_data in data["images"]:
            image_id = image_data["id"]
            file_name = image_data["file_name"]
            
            # Skip if we've already processed this image or if it has no annotations
            if file_name in processed_images or image_id not in image_annotations:
                continue
                
            # Mark as processed
            processed_images.add(file_name)
            
            # Read the image
            image_path = os.path.join(images_dir, file_name)
            
            # Load image for CV2/matplotlib display
            cv_image = cv2.imread(image_path)
            if cv_image is None:
                print(f"Image not found: {image_path}")
                continue
                
            # Convert BGR (OpenCV format) to RGB (Matplotlib format)
            cv_image_rgb = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
            
            # Load image for model prediction
            tensor_image = load_image(image_path, transforms=test_transforms)
            tensor_image = tensor_image.to(device)
            
            # Make predictions
            start_time = time.time()
            with torch.no_grad():
                predictions = model([tensor_image])
            prediction_time = time.time() - start_time
            print(f"Prediction time for {file_name}: {prediction_time:.4f} seconds")
            
            # Create figure and axis
            fig, ax = plt.subplots(1, figsize=(12, 10))
            ax.imshow(cv_image_rgb)
            
            # Draw ground truth annotations
            annotations_for_image = image_annotations[image_id]
            for annotation in annotations_for_image:
                x, y, w, h = annotation["bbox"]
                x, y, w, h = round(x), round(y), round(w), round(h)
                
                # Draw the bounding box with thick white lines
                rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='white', facecolor='none')
                ax.add_patch(rect)
                
                # Add "Ground Truth" label
                ax.text(x + w+20, y+h+20, "Ground Truth", color='white', fontsize=12, ha='right',
                        bbox=dict(facecolor='black', alpha=0.7, edgecolor='none', pad=1))
            
            # Draw model predictions
            boxes = predictions[0]['boxes'].cpu().numpy()
            labels = predictions[0]['labels'].cpu().numpy()
            scores = predictions[0]['scores'].cpu().numpy()
            
            # Filter predictions based on confidence threshold
            keep = scores >= confidence_threshold
            boxes = boxes[keep]
            labels = labels[keep]
            scores = scores[keep]
            
            for box, label, score in zip(boxes, labels, scores):
                if label == 1:  # Only plot hazmat codes (adjust as needed)
                    x1, y1, x2, y2 = box
                    color = get_color_with_opacity(score)
                    
                    # Draw rectangle with opacity
                    rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 
                                           edgecolor=color, facecolor='none')
                    ax.add_patch(rect)
                    
                    # Add text label with confidence score
                    label_name = classes[label]
                    ax.text(x1, y1-10, f'Prediction: {score:.2f}', 
                            color='white', 
                            bbox=dict(facecolor=color[:3], alpha=0.6), 
                            fontsize=12)
            
            plt.axis('off')
            plt.tight_layout()
            
            # Save or display the image
            if save_dir:
                # Ensure directory exists
                os.makedirs(save_dir, exist_ok=True)
                save_path = os.path.join(save_dir, f"gt_pred_{file_name}")
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0, dpi=300)
                plt.close(fig)
                print(f"Image saved to: {save_path}")
            else:
                plt.show()

# Example usage
def run_visualization(annotation_file, images_dir, model, device, test_transforms, save_dir=None):
    visualize_ground_truth_and_predictions(
        annotation_file=annotation_file,
        images_dir=images_dir,
        model=model,
        device=device,
        test_transforms=test_transforms,
        confidence_threshold=0.5,
        classes=['background', 'hazmat'],
        save_dir=save_dir
    )

# To use this function, call it with your model and data:

run_visualization(
    annotation_file="data/public_dataset/test/annotations/instances_test.json",
    images_dir="data/public_dataset/test/images",
    model=model,  # Your loaded model
    device=device,  # torch.device object
    test_transforms=test_transforms,  # Your transformation pipeline
    save_dir="output/ground_truth_and_predictions"  # Optional: directory to save results
)


### id's of images with problems:
#### backlights
- 85
- 275
####  Duplicate UN Number Predictions
- 277
####  Small UN Numbers Are Not Being Predicted (False negative)
- 144
- 135
- 133
####  Red Letters on Truck Plates
- 215
- 154
#### Back of the Truck Identified as a UN Number
- 257
#### obscured UN Numbers are hard to Recognize
- 187
#### Yellow UN numbers are harder to recognize
- 256
- 13

In [None]:
# make function to get the author and link and researcher from id

In [None]:
from ultralytics import YOLO

model = YOLO(r"data/yolo_models/yolo11x_10epoch_augmented.pt")

In [None]:
model

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import random
def draw_rectangles(image_path, results):
    # Read the image
    image = cv2.imread(image_path)
    img_height, img_width, _ = image.shape
    boxes = results.boxes   
    for box in boxes:
        x_min, y_min, x_max, y_max = map(int, box.xyxy[0])  # Convert to integers
        confidence = box.conf[0]  # Confidence score
        class_id = int(box.cls[0])  # Class ID
        label = results.names[class_id]  # Class label

        # Draw the bounding box
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        # Put the label and confidence score
        cv2.putText(image, f"{label} {confidence:.2f}", (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    
    # Convert BGR image to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Display the image using matplotlib
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    plt.axis('off')
    plt.show()

In [None]:
from ultralytics import YOLO

model = YOLO(r"data/yolo_models/yolo11x_10epoch_augmented.pt")
import cv2
import matplotlib.pyplot as plt
import numpy as np
import random
def draw_rectangles(image_path, results):
    # Read the image
    image = cv2.imread(image_path)
    img_height, img_width, _ = image.shape
    boxes = results.boxes   
    for box in boxes:
        x_min, y_min, x_max, y_max = map(int, box.xyxy[0])  # Convert to integers
        confidence = box.conf[0]  # Confidence score
        class_id = int(box.cls[0])  # Class ID
        label = results.names[class_id]  # Class label

        # Draw the bounding box
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        # Put the label and confidence score
        cv2.putText(image, f"{label} {confidence:.2f}", (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    
    # Convert BGR image to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Display the image using matplotlib
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    plt.axis('off')
    plt.show()
# get predictions for this image
results = model("images/hazard_plate.jpg")
for result in results:
    print(result.boxes)
    draw_rectangles(result.path, result)

In [None]:
import json
import random
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import time
import matplotlib.patches as patches
from collections import defaultdict
from ultralytics import YOLO

def get_color_with_opacity(score):
    """
    Get a color with opacity based on the confidence score.
    Higher confidence = more red and higher opacity.
    Lower confidence = random color and lower opacity.
    """
    if score > 0.75:
        # High confidence: Red with high opacity
        color = (1, 0, 0, min(1.0, 0.3 + score))  # Red with opacity based on score
    else:
        # Low confidence: Random color with lower opacity
        color = (random.random(), random.random(), random.random(), max(0.3, score))
    return color

def visualize_ground_truth_and_predictions(annotation_file, images_dir, model, 
                                          confidence_threshold=0.5, 
                                          save_dir=None):
    """
    Visualize both ground truth and YOLO model predictions on images.
    
    Args:
        annotation_file (str): Path to the annotations JSON file
        images_dir (str): Directory containing the images
        model: The YOLO model
        confidence_threshold (float): Threshold for prediction confidence
        save_dir (str, optional): Directory to save output images instead of displaying
    """
    # Load annotations
    with open(annotation_file) as f:
        data = json.load(f)
        
        # Create a dictionary mapping image_id to all its annotations
        image_annotations = defaultdict(list)
        for annotation in data["annotations"]:
            image_annotations[annotation["image_id"]].append(annotation)
        
        # Create a dictionary mapping file_name to image_id to ensure uniqueness
        filename_to_id = {}
        for image in data["images"]:
            filename_to_id[image["file_name"]] = image["id"]
        
        # Process each unique image
        processed_images = set()
        for image_data in data["images"]:
            image_id = image_data["id"]
            file_name = image_data["file_name"]
            
            # Skip if we've already processed this image or if it has no annotations
            if file_name in processed_images or image_id not in image_annotations:
                continue
                
            # Mark as processed
            processed_images.add(file_name)
            
            # Read the image
            image_path = os.path.join(images_dir, file_name)
            
            # Load image for CV2/matplotlib display
            cv_image = cv2.imread(image_path)
            if cv_image is None:
                print(f"Image not found: {image_path}")
                continue
                
            # Convert BGR (OpenCV format) to RGB (Matplotlib format)
            cv_image_rgb = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
            
            # Make predictions with YOLO
            start_time = time.time()
            results = model(image_path)
            prediction_time = time.time() - start_time
            print(f"Prediction time for {file_name}: {prediction_time:.4f} seconds")
            
            # Create figure and axis
            fig, ax = plt.subplots(1, figsize=(12, 10))
            ax.imshow(cv_image_rgb)
            
            # Draw ground truth annotations (white boxes)
            annotations_for_image = image_annotations[image_id]
            for annotation in annotations_for_image:
                x, y, w, h = annotation["bbox"]
                x, y, w, h = round(x), round(y), round(w), round(h)
                
                # Draw the bounding box with thick white lines
                rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='white', facecolor='none')
                ax.add_patch(rect)
                
                # Add "Ground Truth" label
                ax.text(x + w + 5, y + 5, "Ground Truth", color='white', fontsize=12, 
                        bbox=dict(facecolor='black', alpha=0.7, edgecolor='none', pad=1))
            
            # Draw YOLO model predictions (colored boxes based on confidence)
            for result in results:
                boxes = result.boxes
                for box in boxes:
                    confidence = float(box.conf[0])
                    
                    # Filter predictions based on confidence threshold
                    if confidence >= confidence_threshold:
                        x1, y1, x2, y2 = map(int, box.xyxy[0])
                        class_id = int(box.cls[0])
                        label = result.names.get(class_id, f"Class {class_id}")
                        
                        color = get_color_with_opacity(confidence)
                        
                        # Draw rectangle with opacity
                        rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 
                                               edgecolor=color, facecolor='none')
                        ax.add_patch(rect)
                        
                        # Add text label with confidence score
                        ax.text(x1, y1 - 10, f'Prediction: {confidence:.2f}', 
                                color='white', 
                                bbox=dict(facecolor=color[:3], alpha=0.6), 
                                fontsize=12)
            
            plt.axis('off')
            plt.tight_layout()
            
            # Save or display the image
            if save_dir:
                # Ensure directory exists
                os.makedirs(save_dir, exist_ok=True)
                save_path = os.path.join(save_dir, f"gt_pred_{file_name}")
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0, dpi=300)
                plt.close(fig)
                print(f"Image saved to: {save_path}")
            else:
                plt.show()

def run_visualization(annotation_file, images_dir, model_path, save_dir=None):
    """
    Run the visualization for a YOLO model.
    
    Args:
        annotation_file (str): Path to the COCO format annotation file
        images_dir (str): Directory containing images
        model_path (str): Path to YOLO model file
        save_dir (str, optional): Directory to save output images
    """
    # Load YOLO model
    model = YOLO(model_path)
    
    # Run visualization
    visualize_ground_truth_and_predictions(
        annotation_file=annotation_file,
        images_dir=images_dir,
        model=model,
        confidence_threshold=0.5,
        save_dir=save_dir
    )

# Example usage

run_visualization(
    annotation_file="data/public_dataset/test/annotations/instances_test.json",
    images_dir="data/public_dataset/test/images",
    model_path="data/yolo_models/yolo11x_10epoch_augmented.pt",
    # save_dir="output/ground_truth_and_predictions"  # Optional: directory to save results
)

# Weather Augmentations eval
## YOLO v11

In [None]:
import json
import random
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import time
import re
import matplotlib.patches as patches
from collections import defaultdict
from ultralytics import YOLO

def extract_id(filename):
    """Extracts the numeric ID from filenames like 'shadow-0-aug_124.jpg'."""
    match = re.search(r'aug_(\d+)', filename)
    return int(match.group(1)) if match else None

def get_color_with_opacity(score):
    """
    Get a color with opacity based on the confidence score.
    Higher confidence = more red and higher opacity.
    Lower confidence = random color and lower opacity.
    """
    if score > 0.75:
        # High confidence: Red with high opacity
        color = (1, 0, 0, min(1.0, 0.3 + score))  # Red with opacity based on score
    else:
        # Low confidence: Random color with lower opacity
        color = (random.random(), random.random(), random.random(), max(0.3, score))
    return color

def visualize_ground_truth_and_predictions(annotation_file, images_dir, model, 
                                          confidence_threshold=0.5, 
                                          save_dir=None):
    """
    Visualize both ground truth and YOLO model predictions on images.
    
    Args:
        annotation_file (str): Path to the annotations JSON file
        images_dir (str): Directory containing the images
        model: The YOLO model
        confidence_threshold (float): Threshold for prediction confidence
        save_dir (str, optional): Directory to save output images instead of displaying
    """
    # Load annotations
    with open(annotation_file) as f:
        data = json.load(f)
        
        # Create a dictionary mapping image_id to all its annotations
        image_annotations = defaultdict(list)
        for annotation in data["annotations"]:
            image_annotations[annotation["image_id"]].append(annotation)
        
        # Create a dictionary mapping numeric ID to image data
        id_to_image = {}
        for image in data["images"]:
            file_name = image["file_name"]
            numeric_id = extract_id(file_name)
            if numeric_id is not None:
                id_to_image[numeric_id] = image
        
        # Get all image files in the directory
        image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
        print(image_annotations)
        # Process each image file
        for file_name in image_files:
            # Extract numeric ID from filename
            numeric_id = extract_id(file_name)
            if numeric_id is None:
                print(f"Couldn't extract numeric ID from {file_name}, skipping")
                continue
            
            # Find the corresponding image data and annotations
            if numeric_id not in id_to_image:
                print(f"No image data found for ID {numeric_id} ({file_name}), skipping")
                continue
                
            image_data = id_to_image[numeric_id]
            image_id = image_data["id"]
            
            # Skip if it has no annotations
            if image_id not in image_annotations:
                print(f"No annotations found for image ID {image_id} ({file_name}), skipping")
                continue
            
            # Read the image
            image_path = os.path.join(images_dir, file_name)
            
            # Load image for CV2/matplotlib display
            cv_image = cv2.imread(image_path)
            if cv_image is None:
                print(f"Image not found: {image_path}")
                continue
                
            # Convert BGR (OpenCV format) to RGB (Matplotlib format)
            cv_image_rgb = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
            
            # Make predictions with YOLO
            start_time = time.time()
            results = model(image_path)
            prediction_time = time.time() - start_time
            print(f"Prediction time for {file_name}: {prediction_time:.4f} seconds")
            
            # Create figure and axis
            fig, ax = plt.subplots(1, figsize=(12, 10))
            ax.imshow(cv_image_rgb)
            
            # Draw ground truth annotations (white boxes)
            annotations_for_image = image_annotations[image_id]
            for annotation in annotations_for_image:
                x, y, w, h = annotation["bbox"]
                x, y, w, h = round(x), round(y), round(w), round(h)
                
                # Draw the bounding box with thick white lines
                rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='white', facecolor='none')
                ax.add_patch(rect)
                
                # Add "Ground Truth" label
                ax.text(x + w + 5, y + 5, "Ground Truth", color='white', fontsize=12, 
                        bbox=dict(facecolor='black', alpha=0.7, edgecolor='none', pad=1))
            
            # Draw YOLO model predictions (colored boxes based on confidence)
            for result in results:
                boxes = result.boxes
                for box in boxes:
                    confidence = float(box.conf[0])
                    
                    # Filter predictions based on confidence threshold
                    if confidence >= confidence_threshold:
                        x1, y1, x2, y2 = map(int, box.xyxy[0])
                        class_id = int(box.cls[0])
                        label = result.names.get(class_id, f"Class {class_id}")
                        
                        color = get_color_with_opacity(confidence)
                        
                        # Draw rectangle with opacity
                        rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 
                                               edgecolor=color, facecolor='none')
                        ax.add_patch(rect)
                        
                        # Add text label with confidence score
                        ax.text(x1, y1 - 10, f'{label}: {confidence:.2f}', 
                                color='white', 
                                bbox=dict(facecolor=color[:3], alpha=0.6), 
                                fontsize=12)
            
            plt.axis('off')
            plt.tight_layout()
            
            # Save or display the image
            if save_dir:
                # Ensure directory exists
                os.makedirs(save_dir, exist_ok=True)
                save_path = os.path.join(save_dir, f"gt_pred_{file_name}")
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0, dpi=300)
                plt.close(fig)
                print(f"Image saved to: {save_path}")
            else:
                plt.show()

def run_visualization(annotation_file, images_dir, model_path, save_dir=None):
    """
    Run the visualization for a YOLO model.
    
    Args:
        annotation_file (str): Path to the COCO format annotation file
        images_dir (str): Directory containing images
        model_path (str): Path to YOLO model file
        save_dir (str, optional): Directory to save output images
    """
    # Load YOLO model
    model = YOLO(model_path)
    
    # Run visualization
    visualize_ground_truth_and_predictions(
        annotation_file=annotation_file,
        images_dir=images_dir,
        model=model,
        confidence_threshold=0.5,
        save_dir=save_dir
    )

# Example usage

run_visualization(
    annotation_file="data/public_dataset/test/annotations/instances_test.json",
    images_dir="output/sun_flare",
    model_path="data/yolo_models/yolo11x_10epoch_augmented.pt",
    # save_dir="output/ground_truth_and_predictions"  # Optional: directory to save results
)

In [None]:
import json
import random
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import time
import matplotlib.patches as patches
from collections import defaultdict
from ultralytics import YOLO

def get_color_with_opacity(score):
    """
    Get a color with opacity based on the confidence score.
    Higher confidence = more red and higher opacity.
    Lower confidence = random color and lower opacity.
    """
    if score > 0.75:
        # High confidence: Red with high opacity
        color = (1, 0, 0, min(1.0, 0.3 + score))  # Red with opacity based on score
    else:
        # Low confidence: Random color with lower opacity
        color = (random.random(), random.random(), random.random(), max(0.3, score))
    return color

def visualize_ground_truth_and_predictions(annotation_file, images_dir, model, 
                                          confidence_threshold=0.5, 
                                          save_dir=None):
    """
    Visualize both ground truth and YOLO model predictions on images.
    
    Args:
        annotation_file (str): Path to the annotations JSON file in COCO format
        images_dir (str): Directory containing the images
        model: The YOLO model
        confidence_threshold (float): Threshold for prediction confidence
        save_dir (str, optional): Directory to save output images instead of displaying
    """
    # Load annotations
    with open(annotation_file) as f:
        data = json.load(f)
        
        # Create a dictionary mapping image_id to all its annotations
        image_annotations = defaultdict(list)
        for annotation in data["annotations"]:
            image_annotations[annotation["image_id"]].append(annotation)
        
        # Create a dictionary mapping image_id to image data (including filename)
        image_data_by_id = {}
        for image in data["images"]:
            image_data_by_id[image["id"]] = image
        
        # Process each image defined in the annotations
        for image_id, annotations in image_annotations.items():
            # Skip if we don't have image data for this id
            if image_id not in image_data_by_id:
                print(f"No image data found for image ID {image_id}, skipping")
                continue
                
            # Get image data including filename
            image_data = image_data_by_id[image_id]
            file_name = image_data["file_name"]
            
            # Read the image
            # get filename without extension
            file_name_no_ext = os.path.splitext(file_name)[0]

            image_path = images_dir + file_name_no_ext + ".jpg"
            
            # Check if file exists
            if not os.path.exists(image_path):
                print(f"Image not found: {image_path}")
                continue
                
            # Load image for CV2/matplotlib display
            cv_image = cv2.imread(image_path)
            if cv_image is None:
                print(f"Failed to read image: {image_path}")
                continue
                
            # Convert BGR (OpenCV format) to RGB (Matplotlib format)
            cv_image_rgb = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
            
            # Make predictions with YOLO
            start_time = time.time()
            results = model(image_path)
            prediction_time = time.time() - start_time
            print(f"Prediction time for {file_name} (ID: {image_id}): {prediction_time:.4f} seconds")
            
            # Create figure and axis
            fig, ax = plt.subplots(1, figsize=(12, 10))
            ax.imshow(cv_image_rgb)
            
            # Draw ground truth annotations (white boxes)
            for annotation in annotations:
                x, y, w, h = annotation["bbox"]
                x, y, w, h = round(x), round(y), round(w), round(h)
                
                # Draw the bounding box with thick white lines
                rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='white', facecolor='none')
                ax.add_patch(rect)
                
                # Add "Ground Truth" label
                ax.text(x + w + 5, y + 5, "Ground Truth", color='white', fontsize=12, 
                        bbox=dict(facecolor='black', alpha=0.7, edgecolor='none', pad=1))
            
            # Draw YOLO model predictions (colored boxes based on confidence)
            for result in results:
                boxes = result.boxes
                for box in boxes:
                    confidence = float(box.conf[0])
                    
                    # Filter predictions based on confidence threshold
                    if confidence >= confidence_threshold:
                        x1, y1, x2, y2 = map(int, box.xyxy[0])
                        class_id = int(box.cls[0])
                        label = result.names.get(class_id, f"Class {class_id}")
                        
                        color = get_color_with_opacity(confidence)
                        
                        # Draw rectangle with opacity
                        rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 
                                               edgecolor=color, facecolor='none')
                        ax.add_patch(rect)
                        
                        # Add text label with confidence score
                        ax.text(x1, y1 - 10, f'{label}: {confidence:.2f}', 
                                color='white', 
                                bbox=dict(facecolor=color[:3], alpha=0.6), 
                                fontsize=12)
            
            # Add image filename and ID as title
            plt.title(f"File: {file_name} (ID: {image_id})", fontsize=14)
            plt.axis('off')
            plt.tight_layout()
            
            # Save or display the image
            if save_dir:
                # Ensure directory exists
                os.makedirs(save_dir, exist_ok=True)
                save_path = os.path.join(save_dir, f"gt_pred_{file_name}")
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0, dpi=300)
                plt.close(fig)
                print(f"Image saved to: {save_path}")
            else:
                plt.show()

def run_visualization(annotation_file, images_dir, model_path, save_dir=None):
    """
    Run the visualization for a YOLO model.
    
    Args:
        annotation_file (str): Path to the COCO format annotation file
        images_dir (str): Directory containing images
        model_path (str): Path to YOLO model file
        save_dir (str, optional): Directory to save output images
    """
    # Load YOLO model
    model = YOLO(model_path)
    
    # Run visualization
    visualize_ground_truth_and_predictions(
        annotation_file=annotation_file,
        images_dir=images_dir,
        model=model,
        confidence_threshold=0.5,
        save_dir=save_dir
    )

# Example usage

# run_visualization(
#     annotation_file="data/public_dataset/test/annotations/instances_test.json",
#     images_dir="output/sun_flare/sun_flare-0-aug_",
#     model_path="data/yolo_models/yolo11x_10epoch_augmented.pt",
#     # save_dir="output/ground_truth_and_predictions"  # Optional: directory to save results
# )

## Faster RCNN

In [None]:
import json
import random
import matplotlib.pyplot as plt
import cv2
import os
import torch
import numpy as np
import time
from PIL import Image
import matplotlib.patches as patches
from collections import defaultdict

def get_color_with_opacity(score):
    """
    Get a color with opacity based on the confidence score.
    Higher confidence = more red and higher opacity.
    Lower confidence = random color and lower opacity.
    """
    if score > 0.75:
        # High confidence: Red with high opacity
        color = (1, 0, 0, min(1.0, 0.3 + score))
    else:
        # Low confidence: Random color with lower opacity
        color = (random.random(), random.random(), random.random(), max(0.3, score))
    return color

def load_image(image_path, transforms=None):
    image = Image.open(image_path).convert('RGB')
    if transforms:
        for transform in transforms:
            image, _ = transform(image, target=None)  # No target during inference
    return image

def visualize_ground_truth_and_predictions(annotation_file, images_dir, model, device, test_transforms,
                                             confidence_threshold=0.5, classes=['background', 'hazmat'], save_dir=None):
    """
    Visualize both ground truth and Faster R-CNN model predictions on images.
    
    Args:
        annotation_file (str): Path to the annotations JSON file (COCO format)
        images_dir (str): Directory containing the images
        model: The Faster R-CNN model
        device: torch.device (CPU/GPU)
        test_transforms: Transformation pipeline to apply to images for model input
        confidence_threshold (float): Threshold for prediction confidence
        classes (list): List of class names; adjust as needed
        save_dir (str, optional): Directory to save output images instead of displaying
    """
    # Load annotations
    with open(annotation_file) as f:
        data = json.load(f)
    
    # Map image IDs to annotations
    image_annotations = defaultdict(list)
    for annotation in data["annotations"]:
        image_annotations[annotation["image_id"]].append(annotation)
    
    # Map image IDs to image data
    image_data_by_id = {}
    for image in data["images"]:
        image_data_by_id[image["id"]] = image
    
    for image_id, annotations in image_annotations.items():
        # Skip if image data isn't found
        if image_id not in image_data_by_id:
            print(f"No image data found for image ID {image_id}, skipping")
            continue
        
        image_data = image_data_by_id[image_id]
        file_name = image_data["file_name"]
        file_name_no_ext = os.path.splitext(file_name)[0]
        image_path = images_dir + file_name_no_ext + ".jpg"
        
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            continue
        
        # Load image for display via OpenCV
        cv_image = cv2.imread(image_path)
        if cv_image is None:
            print(f"Failed to read image: {image_path}")
            continue
        cv_image_rgb = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
        
        # Prepare image for model prediction using PIL and transforms
        tensor_image = load_image(image_path, transforms=test_transforms)
        tensor_image = tensor_image.to(device)
        
        # Get predictions from Faster R-CNN (wrap in no_grad)
        start_time = time.time()
        with torch.no_grad():
            predictions = model([tensor_image])
        prediction_time = time.time() - start_time
        print(f"Prediction time for {file_name} (ID: {image_id}): {prediction_time:.4f} seconds")
        
        # Create figure for plotting
        fig, ax = plt.subplots(1, figsize=(12, 10))
        ax.imshow(cv_image_rgb)
        
        # Draw ground truth annotations (white boxes)
        for annotation in annotations:
            x, y, w, h = map(round, annotation["bbox"])
            rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='white', facecolor='none')
            ax.add_patch(rect)
            ax.text(x + w + 5, y + 5, "Ground Truth", color='white', fontsize=12,
                    bbox=dict(facecolor='black', alpha=0.7, edgecolor='none', pad=1))
        
        # Process Faster R-CNN predictions
        pred = predictions[0]
        boxes = pred['boxes'].cpu().numpy()
        labels = pred['labels'].cpu().numpy()
        scores = pred['scores'].cpu().numpy()
        
        # Filter predictions based on confidence threshold
        keep = scores >= confidence_threshold
        boxes = boxes[keep]
        labels = labels[keep]
        scores = scores[keep]
        
        for box, label, score in zip(boxes, labels, scores):
            # Get class name from the provided classes list (adjust if needed)
            label_name = classes[label] if label < len(classes) else f"Class {label}"
            x1, y1, x2, y2 = box
            color = get_color_with_opacity(score)
            rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
                                     edgecolor=color, facecolor='none')
            ax.add_patch(rect)
            ax.text(x1, y1 - 10, f'{label_name}: {score:.2f}', 
                    color='white', 
                    bbox=dict(facecolor=color[:3], alpha=0.6), 
                    fontsize=12)
        
        plt.title(f"File: {file_name} (ID: {image_id})", fontsize=14)
        plt.axis('off')
        plt.tight_layout()
        
        # Save or display the image
        if save_dir:
            os.makedirs(save_dir, exist_ok=True)
            save_path = os.path.join(save_dir, f"gt_pred_{file_name}")
            plt.savefig(save_path, bbox_inches='tight', pad_inches=0, dpi=300)
            plt.close(fig)
            print(f"Image saved to: {save_path}")
        else:
            plt.show()

def run_visualization(annotation_file, images_dir, model, device, test_transforms,
                      confidence_threshold=0.5, classes=['background', 'hazmat'], save_dir=None):
    """
    Run the visualization for a Faster R-CNN model.
    
    Args:
        annotation_file (str): Path to the annotations JSON file
        images_dir (str): Directory containing the images
        model: Loaded Faster R-CNN model
        device: torch.device object (CPU or GPU)
        test_transforms: Transformation pipeline for model input
        confidence_threshold (float): Threshold for displaying predictions
        classes (list): List of class names
        save_dir (str, optional): Directory to save output images
    """
    visualize_ground_truth_and_predictions(
        annotation_file=annotation_file,
        images_dir=images_dir,
        model=model,
        device=device,
        test_transforms=test_transforms,
        confidence_threshold=confidence_threshold,
        classes=classes,
        save_dir=save_dir
    )

# Example usage:
# Assuming you've already loaded your Faster R-CNN model, defined your device (e.g., torch.device("cuda" if torch.cuda.is_available() else "cpu")),
# and set up your test_transforms:

# run_visualization(
#     annotation_file="data/public_dataset/test/annotations/instances_test.json",
#     images_dir="output/sun_flare/sun_flare-0-aug_",
#     model=model,              # Your loaded Faster R-CNN model
#     device=device,            # torch.device instance
#     test_transforms=test_transforms,  # Your transformation pipeline
#     # save_dir="output/ground_truth_and_predictions"  # Optional: directory to save results
# )

In [None]:
# we want to make the same code but faster rcnn