# Imports

In [None]:
print("Importing libraries...")
import torch
from torch.cuda.amp import autocast, GradScaler
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import Dataset
import torchvision.transforms as T
import torchvision.transforms.functional as F
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.ops import MultiScaleRoIAlign
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from PIL import Image
import contextlib
import io
import time
import random
from torch.utils.data import Subset
from datetime import datetime
from tqdm import tqdm
import time
import numpy as np
import matplotlib.pyplot as plt
import albumentations as A



# Functions

In [None]:
scaler = GradScaler()

# Define the dataset class
class HazmatDataset(Dataset):
    def __init__(self, data_dir, annotations_file, transforms=None):
        self.data_dir = data_dir
        self.transforms = transforms
        
        # Load annotations
        with open(annotations_file) as f:
            data = json.load(f)
        
        self.images = {img['id']: img for img in data['images']}
        self.annotations = data['annotations']
        
        # Create image_id to annotations mapping
        self.img_to_anns = {}
        for ann in self.annotations:
            img_id = ann['image_id']
            if img_id not in self.img_to_anns:
                self.img_to_anns[img_id] = []
            self.img_to_anns[img_id].append(ann)
        
        self.ids = list(self.images.keys())

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        img_info = self.images[img_id]
        
        # Load image
        img_path = os.path.join(self.data_dir, 'images', img_info['file_name'])
        img = Image.open(img_path).convert('RGB')
        
        # Get annotations
        anns = self.img_to_anns.get(img_id, [])
        
        boxes = []
        labels = []
        areas = []
        iscrowd = []
        
        for ann in anns:
            bbox = ann['bbox']
            # Convert [x, y, w, h] to [x1, y1, x2, y2]
            boxes.append([
                bbox[0],
                bbox[1],
                bbox[0] + bbox[2],
                bbox[1] + bbox[3]
            ])
            labels.append(ann['category_id'])
            areas.append(ann['area'])
            iscrowd.append(ann['iscrowd'])
        
        # Convert to tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        areas = torch.as_tensor(areas, dtype=torch.float32)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        
        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([img_id]),
            'area': areas,
            'iscrowd': iscrowd
        }
        
        if self.transforms is not None:
            for transform in self.transforms:
                img, target = transform(img, target)
        
        return img, target

    def __len__(self):
        return len(self.ids)
    
class ToTensor(object):
    def __call__(self, image, target):
        # Convert PIL image to tensor
        image = F.to_tensor(image)
        return image, target

class RandomHorizontalFlip(object):
    def __init__(self, prob):
        self.prob = prob

    def __call__(self, image, target):
        if torch.rand(1) < self.prob:
            height, width = image.shape[-2:]
            image = F.hflip(image)
            # Flip bounding boxes
            bbox = target["boxes"]
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # Flip x-coordinates
            target["boxes"] = bbox
        return image, target

def get_transform(train):
    transforms = []
    # Convert PIL image to tensor
    transforms.append(ToTensor())
    if train:
        # Add training augmentations here if needed
        transforms.append(RandomHorizontalFlip(0.5))
    return transforms

def collate_fn(batch):
    return tuple(zip(*batch))

def train_one_epoch(model, optimizer, data_loader, device, scaler):
    model.train()
    total_loss = 0
    total_classifier_loss = 0
    total_box_reg_loss = 0
    total_objectness_loss = 0
    total_rpn_box_reg_loss = 0

    # Voeg tqdm toe om de voortgang te tonen
    progress_bar = tqdm(data_loader, desc="Training", leave=True)
    
    for images, targets in progress_bar:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Wrap the forward pass in autocast
        with autocast():
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        # Scale the loss and call backward
        scaler.scale(losses).backward()
        # Unscales the gradients and calls or skips optimizer.step()
        scaler.step(optimizer)
        # Updates the scale for next iteration
        scaler.update()

        # Bereken de totalen
        total_loss += losses.item()
        total_classifier_loss += loss_dict['loss_classifier'].item()
        total_box_reg_loss += loss_dict['loss_box_reg'].item()
        total_objectness_loss += loss_dict['loss_objectness'].item()
        total_rpn_box_reg_loss += loss_dict['loss_rpn_box_reg'].item()

        # Update tqdm-balk
        progress_bar.set_postfix({
            "Loss": f"{losses.item():.4f}",
            "Classifier": f"{loss_dict['loss_classifier'].item():.4f}",
            "BoxReg": f"{loss_dict['loss_box_reg'].item():.4f}",
        })

    avg_loss = total_loss / len(data_loader)
    avg_classifier_loss = total_classifier_loss / len(data_loader)
    avg_box_reg_loss = total_box_reg_loss / len(data_loader)
    avg_objectness_loss = total_objectness_loss / len(data_loader)
    avg_rpn_box_reg_loss = total_rpn_box_reg_loss / len(data_loader)

    return avg_loss, avg_classifier_loss, avg_box_reg_loss, avg_objectness_loss, avg_rpn_box_reg_loss



# Load ground truth annotations
coco_val = COCO('data/data_faster_rcnn/val/annotations/instances_val.json')

# Prepare predictions in COCO format
# Assuming you have a function to convert model outputs to COCO format
# Conversion to COCO Format
def convert_to_coco_format(outputs, image_ids):
    coco_results = []
    for output, image_id in zip(outputs, image_ids):
        boxes = output['boxes'].cpu().numpy()
        scores = output['scores'].cpu().numpy()
        labels = output['labels'].cpu().numpy()
        
        for box, score, label in zip(boxes, scores, labels):
            coco_results.append({
                'image_id': image_id,
                'category_id': int(label),
                'bbox': [box[0], box[1], box[2] - box[0], box[3] - box[1]],
                'score': float(score)
            })
    return coco_results

# Validation Function
def validate(model, data_loader, coco_gt, device):
    model.eval()
    results = []

    # Add tqdm
    progress_bar = tqdm(data_loader, desc="Validation", leave=True)

    with torch.no_grad():
        for images, targets in progress_bar:
            images = list(image.to(device) for image in images)
            outputs = model(images)
            
            image_ids = [target['image_id'].item() for target in targets]
            coco_results = convert_to_coco_format(outputs, image_ids)
            results.extend(coco_results)

            # Update tqdm-bar
            progress_bar.set_postfix({"Processed": len(results)})

    if not results:
        print("No predictions generated. Skipping evaluation.")
        return [0.0] * 6  # Return dummy metrics for empty results

    # Suppress COCOeval output
    with contextlib.redirect_stdout(io.StringIO()):
        coco_dt = coco_gt.loadRes(results)
        coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

    return coco_eval.stats


# Custom backbone to return a dictionary of feature maps
class BackboneWithChannels(torch.nn.Module):
    def __init__(self, backbone):
        super().__init__()
        self.backbone = backbone
    def forward(self, x):
        x = self.backbone(x)
        return {'0': x}
    
# Function to create a subset of the dataset
def create_subset(dataset, percentage):
    """
    Create a subset of the dataset based on the given percentage.
    
    Parameters:
    - dataset: The full dataset.
    - percentage: The fraction of the dataset to use (value between 0.0 and 1.0).
    
    Returns:
    - subset: A subset of the dataset containing the specified percentage of data.
    """
    if not (0.0 < percentage <= 1.0):
        raise ValueError("Percentage must be between 0.0 and 1.0.")
    
    # Determine the subset size
    total_samples = len(dataset)
    subset_size = int(total_samples * percentage)
    
    # Shuffle and select a random subset of indices
    indices = list(range(total_samples))
    random.shuffle(indices)
    subset_indices = indices[:subset_size]
    
    return Subset(dataset, subset_indices)

def create_directory(base_path="data/models"):
    """
    Create a directory inside the base path named 'faster-rcnn-finetuned-{date}' 
    to store models and logs. The name includes the current date and time in the format 'DD-MM-YYYY HH:MM:SS'.

    Parameters:
    - base_path (str): Base directory where the new directory will be created.

    Returns:
    - directory_path (str): Full path to the created directory.
    """
    # Get the current date and time
    current_time = datetime.now().strftime("%d-%m-%Y %H:%M:%S")
    
    # Define the full directory path
    directory_name = f"faster-rcnn-finetuned-{current_time}"
    directory_path = os.path.join(base_path, directory_name)
    
    # Create the directory
    os.makedirs(directory_path, exist_ok=True)
    
    print(f"Directory created: {directory_path}")
    return directory_path

def train_model(directory, model, optimizer, train_loader, device, train_metrics_list, best_val_map, lr_scheduler, val_loader, coco_val, scaler, epoch):
    
    epoch+=1
    # Start the timer
    start_time = time.time()
    
    # Train for one epoch
    train_loss, train_classifier_loss, train_box_reg_loss, train_objectness_loss, train_rpn_box_reg_loss = train_one_epoch(
        model, optimizer, train_loader, device, scaler)
    
    # Validate and get all COCO-metrics
    val_metrics = validate(model, val_loader, coco_val, device)
    val_map = val_metrics[0]  # mAP@IoU=0.50:0.95
    
    # Stop the timer
    end_time = time.time()
    elapsed_time = end_time - start_time
    minutes, seconds = divmod(elapsed_time, 60)
    
    # Obtain the current learning rate
    current_lr = optimizer.param_groups[0]['lr']
    
    # Prepare data for logging
    data = {
        "epoch": epoch,
        "time_elapsed": (int(minutes), int(seconds)),
        "learning_rate": current_lr,
        "train_loss": train_loss,
        "classifier_loss": train_classifier_loss,
        "box_reg_loss": train_box_reg_loss,
        "objectness_loss": train_objectness_loss,
        "rpn_box_reg_loss": train_rpn_box_reg_loss,
        "val_metrics": val_metrics
    }
    
    # Append current epoch data to metrics list
    train_metrics_list.append(data)
    
    # Print summary for this epoch
    print(f"📊 Epoch {epoch} | ⏳ Time: {int(minutes)}m {int(seconds)}s | 🔄 LR: {current_lr:.6f}")
    print(f"📉 Train Loss: {train_loss:.4f} | 🎯 Classifier: {train_classifier_loss:.4f} | 📦 Box Reg: {train_box_reg_loss:.4f}")
    print(f"🔍 Objectness: {train_objectness_loss:.4f} | 🗂️ RPN Box Reg: {train_rpn_box_reg_loss:.4f}")
    print(f"🧪 mAP | 🟢 mAP@IoU=0.50:0.95: {val_metrics[0]:.4f} | 🔵 mAP@IoU=0.50: {val_metrics[1]:.4f} | 🟣 mAP@IoU=0.75: {val_metrics[2]:.4f}")
    print(f"📏 Small mAP: {val_metrics[3]:.4f} | 📐 Medium mAP: {val_metrics[4]:.4f} | 📏 Large mAP: {val_metrics[5]:.4f}")
    
    # Save epoch data to a log file
    save_epoch_data(directory, data)
    
    # Update learning rate
    lr_scheduler.step()
    
    # Save the latest checkpoint with all metrics
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'val_map': val_map,
        'train_metrics_list': train_metrics_list  # Save all metrics
    }
    torch.save(checkpoint, os.path.join(directory, "latest_model.pth"))
    
    # Save the best model if the val_map is the highest so far
    if val_map > best_val_map:
        best_val_map = val_map
        torch.save(checkpoint, os.path.join(directory, "best_model.pth"))
    
    return best_val_map
        


def save_epoch_data(directory, data):
    """
    Save training statistics for each epoch in a text file.

    Parameters:
    - directory (str): Path to the directory.
    - data (dict): Contains data on metrics such as epoch, losses, and validation metrics.
    """
    log_file_path = os.path.join(directory, "training_log.txt")
    
    with open(log_file_path, "a") as log_file:
        log_file.write(f"📊 Epoch {data['epoch']} | ⏳ Time: {data['time_elapsed'][0]}m {data['time_elapsed'][1]}s | 🔄 LR: {data['learning_rate']:.6f}\n")
        log_file.write(f"📉 Train Loss: {data['train_loss']:.4f} | 🎯 Classifier: {data['classifier_loss']:.4f} | 📦 Box Reg: {data['box_reg_loss']:.4f}\n")
        log_file.write(f"🔍 Objectness: {data['objectness_loss']:.4f} | 🗂️ RPN Box Reg: {data['rpn_box_reg_loss']:.4f}\n")
        log_file.write(f"🧪 Validation Metrics | 🟢 mAP@IoU=0.50:0.95: {data['val_metrics'][0]:.4f} | 🔵 mAP@IoU=0.50: {data['val_metrics'][1]:.4f} | 🟣 mAP@IoU=0.75: {data['val_metrics'][2]:.4f}\n")
        log_file.write(f"📏 Small mAP: {data['val_metrics'][3]:.4f} | 📐 Medium mAP: {data['val_metrics'][4]:.4f} | 📏 Large mAP: {data['val_metrics'][5]:.4f}\n")
        log_file.write("\n")

# Modeling

In [None]:
device = torch.device('cpu')
print(f"Training model on {device}")

# Create datasets
train_dataset = HazmatDataset(
    data_dir='data/data_faster_rcnn/train',
    annotations_file='data/data_faster_rcnn/train/annotations/instances_train.json',
    transforms=get_transform(train=True)
)

val_dataset = HazmatDataset(
    data_dir='data/data_faster_rcnn/val',
    annotations_file='data/data_faster_rcnn/val/annotations/instances_val.json',
    transforms=get_transform(train=False)
)

# Set the percentage of the training dataset to use (e.g. 0.x to 1)
train_percentage = 1

# Create a subset of the training dataset
train_dataset_subset = create_subset(train_dataset, train_percentage)

# Set the percentage of the val dataset to use (e.g. 0.x to 1)
val_percentage = 1

# Create a subset of the training dataset
val_dataset_subset = create_subset(val_dataset, val_percentage)

# amount of cpu cores
workers = 2

# Create data loaders
train_loader = DataLoader(
    train_dataset_subset,
    batch_size=16,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=workers,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset_subset,
    batch_size=16,
    shuffle=False,
    collate_fn=collate_fn,
    num_workers=workers,
    pin_memory=True
)

# Initialize model
num_classes = 2  # hazmat code and background

# Create ResNet-101 backbone with FPN
backbone = resnet_fpn_backbone('resnet101', pretrained=True)

# Define anchor generator for FPN
anchor_generator = AnchorGenerator(
    sizes=((32,), (64,), (128,), (256,), (512,)),
    aspect_ratios=((0.5, 1.0, 2.0),) * 5
)

# Multi-scale RoI pooling for FPN
roi_pooler = MultiScaleRoIAlign(
    featmap_names=['0', '1', '2', '3', '4'],
    output_size=7,
    sampling_ratio=2
)

print("initializing model...")
# Initialize Faster R-CNN with ResNet-101-FPN
model = FasterRCNN(
    backbone=backbone,
    num_classes=num_classes,
    rpn_anchor_generator=anchor_generator,
    box_roi_pool=roi_pooler
)

# Move model to device
model.to(device)

In [None]:
!nvidia-smi
num_gpus = torch.cuda.device_count()
print(f"Number of GPUs available: {num_gpus}")

In [None]:
# !kill -9 7710

## Training

In [None]:
# Initialize optimizer and scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Training loop
num_epochs = 23
train_metrics_map = []
best_val_map = float('-inf')

print("Starting training...")

# Create directory to store models and logs
directory_finetuned_model = create_directory()


for epoch in range(num_epochs):
    best_val_map = train_model(
        directory=directory_finetuned_model, 
        model=model, optimizer=optimizer, train_loader=train_loader, device=device, 
        train_metrics_list=train_metrics_map, best_val_map=best_val_map, lr_scheduler=lr_scheduler, 
        val_loader=val_loader, coco_val=coco_val, scaler=scaler, epoch=epoch
    )



# Evaluation

In [None]:
# Load the model
directory_finetuned_model = "data/models"
device = torch.device('gpu:0' if torch.cuda.is_available() else 'cpu')
model_path = os.path.join(directory_finetuned_model, 'best_model.pth')
checkpoint = torch.load(model_path, map_location=device)
val_map = checkpoint['val_map']
epoch = checkpoint['epoch']
#latest
latest_model_path = os.path.join(directory_finetuned_model, 'latest_checkpoint.pth')
checkpoint_latest = torch.load(latest_model_path, map_location=device)
val_map_latest = checkpoint_latest['val_map']
epoch_latest = checkpoint_latest['epoch']

model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # Set the model to evaluation mode

print(f"Validation mAP best model: {val_map:.4f}")
print(f"Epoch best model: {epoch}")

print(f"Validation mAP latest model: {val_map_latest:.4f}")
print(f"Epoch latest model: {epoch_latest}")


In [None]:
def plot_metrics(checkpoint_path, title="Training and Validation Metrics over Epochs"):
    """
    Plot training and validation metrics from a given model checkpoint.
    
    Parameters:
    - checkpoint_path (str): Path to the model checkpoint file (e.g., 'latest_model.pth').
    - title (str): Title for the plot.
    """
    # Load the checkpoint
    checkpoint = torch.load(checkpoint_path, map_location=device)
    train_metrics_list = checkpoint['train_metrics_list']
    
    # Extract metrics per epoch
    epochs = [data['epoch'] for data in train_metrics_list]
    train_loss_list = [data['train_loss'] for data in train_metrics_list]
    classifier_loss_list = [data['classifier_loss'] for data in train_metrics_list]
    box_reg_loss_list = [data['box_reg_loss'] for data in train_metrics_list]
    objectness_loss_list = [data['objectness_loss'] for data in train_metrics_list]
    rpn_box_reg_loss_list = [data['rpn_box_reg_loss'] for data in train_metrics_list]

    # Extract validation mAP metrics
    val_map_list = [data['val_metrics'][0] for data in train_metrics_list]  # mAP@IoU=0.50:0.95
    val_map_50_list = [data['val_metrics'][1] for data in train_metrics_list]  # mAP@IoU=0.50
    val_map_75_list = [data['val_metrics'][2] for data in train_metrics_list]  # mAP@IoU=0.75
    val_map_small_list = [data['val_metrics'][3] for data in train_metrics_list]  # Small mAP
    val_map_medium_list = [data['val_metrics'][4] for data in train_metrics_list]  # Medium mAP
    val_map_large_list = [data['val_metrics'][5] for data in train_metrics_list]  # Large mAP

    # Initialize the plot
    plt.figure(figsize=(14, 10))

    # Plot training losses
    plt.plot(epochs, train_loss_list, label='Training Loss', marker='o')
    #     plt.plot(epochs, classifier_loss_list, label='Classifier Loss', marker='o')
    #     plt.plot(epochs, box_reg_loss_list, label='Box Regression Loss', marker='o')
    #     plt.plot(epochs, objectness_loss_list, label='Objectness Loss', marker='o')
    #     plt.plot(epochs, rpn_box_reg_loss_list, label='RPN Box Regression Loss', marker='o')

    # Plot validation mAP metrics
    plt.plot(epochs, val_map_list, label='Validation mAP (IoU=0.50:0.95)', linestyle='--', marker='x')
    plt.plot(epochs, val_map_50_list, label='Validation mAP (IoU=0.50)', linestyle='--', marker='x')
    plt.plot(epochs, val_map_75_list, label='Validation mAP (IoU=0.75)', linestyle='--', marker='x')
    plt.plot(epochs, val_map_small_list, label='Validation mAP (Small)', linestyle='--', marker='x')
    plt.plot(epochs, val_map_medium_list, label='Validation mAP (Medium)', linestyle='--', marker='x')
    plt.plot(epochs, val_map_large_list, label='Validation mAP (Large)', linestyle='--', marker='x')

    # Set x-axis ticks to start from 1
    plt.xticks(range(1, len(epochs) + 1))

    # Set plot details
    plt.xlabel('Epoch')
    plt.ylabel('Metric Value')
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
# Load latest model checkpoint
latest_model_path = os.path.join(directory_finetuned_model, 'latest_model.pth')
plot_metrics(latest_model_path, "Training and validation over epochs")

In [None]:
def load_image(image_path, transforms=None):
    image = Image.open(image_path).convert('RGB')
    if transforms:
        for transform in transforms:
            image, _ = transform(image, target=None)  # No target during inference
    return image

# Define preprocessing transforms
test_transforms = get_transform(train=False)

# Load the image
image_path = 'images/hazard_plate.jpg'  # Replace with your image path
image = load_image(image_path, transforms=test_transforms)
image = image.to(device)
# Wrap the image in a list as the model expects a batch
with torch.no_grad():
    predictions = model([image])

In [None]:
def get_color_with_opacity(score):
    """
    Get a color with opacity based on the confidence score.
    Higher confidence = more red and higher opacity.
    Lower confidence = random color and lower opacity.
    """
    if score > 0.75:
        # High confidence: Red with high opacity
        color = (1, 0, 0, min(1.0, 0.3 + score))  # Red with opacity based on score
    else:
        # Low confidence: Random color with lower opacity
        color = (random.random(), random.random(), random.random(), max(0.3, score))
    return color

def draw_predictions(image, predictions, threshold=0.5, classes=['background', 'hazmat']):
    # Convert image from tensor to numpy array
    image = image.cpu().permute(1, 2, 0).numpy()
    image = np.clip(image * 255, 0, 255).astype(np.uint8)
    
    boxes = predictions[0]['boxes'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()
    
    # Filter predictions based on confidence threshold
    keep = scores >= threshold
    boxes = boxes[keep]
    labels = labels[keep]
    scores = scores[keep]
    
    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(image)
    
    for box, label, score in zip(boxes, labels, scores):
        if label == 1:  # Only plot hazmat codes
            x1, y1, x2, y2 = box
            color = get_color_with_opacity(score)
            
            # Draw rectangle with opacity
            rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 
                                 edgecolor=color, facecolor='none')
            ax.add_patch(rect)
            
            # Add text label with confidence score
            label_name = classes[label]
            ax.text(x1, y1, f'{label_name}: {score:.2f}', 
                    color='white', 
                    bbox=dict(facecolor=color[:3], alpha=0.6), 
                    fontsize=12)
    
    plt.axis('off')
    plt.show()


In [None]:
def predict_image(image_path, threshold=0.5):
    # List of class names
    classes = ['background', 'hazmat']
    
    # Load the image
    image = load_image(image_path, transforms=test_transforms)
    image = image.to(device)
    
    # Start timing
    start_time = time.time()
    
    # Wrap the image in a list as the model expects a batch
    with torch.no_grad():
        predictions = model([image])
    
    # End timing
    end_time = time.time()
    prediction_time = end_time - start_time
    print(f"Prediction time: {prediction_time:.4f} seconds")
    
    # Filter predictions based on threshold
    boxes = predictions[0]['boxes'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()
    
    # Apply threshold filter
    keep = scores >= threshold
    boxes = boxes[keep]
    labels = labels[keep]
    scores = scores[keep]
    
    # Print the predictions
    if len(boxes) == 0:
        print("No predictions meet the threshold.")
    else:
        print("Predictions:")
        for label, score in zip(labels, scores):
            class_name = classes[label]
            print(f"  {class_name}: {score:.2f}")
        # Display the predictions
        draw_predictions(image, predictions, threshold=threshold, classes=classes)


def get_color_with_opacity(score):
    """
    Get a color with opacity based on the confidence score.
    Higher confidence = more red and higher opacity.
    Lower confidence = random color and lower opacity.
    """
    if score > 0.75:
        # High confidence: Red with high opacity
        color = (1, 0, 0, min(1.0, 0.3 + score))  # Red with opacity based on score
    else:
        # Low confidence: Random color with lower opacity
        color = (random.random(), random.random(), random.random(), max(0.3, score))
    return color

def draw_predictions(image, predictions, threshold=0.5, classes=['background', 'hazmat']):
    # Convert image from tensor to numpy array
    image = image.cpu().permute(1, 2, 0).numpy()
    image = np.clip(image * 255, 0, 255).astype(np.uint8)
    
    boxes = predictions[0]['boxes'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()
    
    # Filter predictions based on confidence threshold
    keep = scores >= threshold
    boxes = boxes[keep]
    labels = labels[keep]
    scores = scores[keep]
    
    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(image)
    
    for box, label, score in zip(boxes, labels, scores):
        if label == 1:  # Only plot hazmat codes
            x1, y1, x2, y2 = box
            color = get_color_with_opacity(score)
            
            # Draw rectangle with opacity
            rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 
                                 edgecolor=color, facecolor='none')
            ax.add_patch(rect)
            
            # Add text label with confidence score
            label_name = classes[label]
            ax.text(x1, y1, f'{label_name}: {score:.2f}', 
                    color='white', 
                    bbox=dict(facecolor=color[:3], alpha=0.6), 
                    fontsize=12)
    
    plt.axis('off')
    plt.show()


In [None]:
# predict_image('data/data_faster_rcnn/val/images/1690281365_00595.jpg', threshold=0.29)
predict_image('images/hazard_plate.jpg', threshold=0)
predict_image('images/un_numbers_test/close_up_number.webp', threshold=0)
predict_image('images/un_numbers_test/2.jpg', threshold=0)
predict_image('images/un_numbers_test/3.jpg', threshold=0)
predict_image('images/two_signs_different_distance.jpg', threshold=0)
predict_image('images/un_numbers_test/6.webp', threshold=0)
predict_image('images/no_signs.jpg', threshold=0)
predict_image('images/africalane_closed_off.jpg', threshold=0)
predict_image('images/bikes_get_off.jpg', threshold=0)
predict_image('images/gevaarlijke_stoffen_route.jpg', threshold=0)
predict_image('images/great_britain_nb.jpeg', threshold=0)
predict_image('images/priority-road-sign.webp', threshold=0)
predict_image('images/reflective_un_number_on_truck.jpg', threshold=0)
predict_image('images/traffic signs.jpg', threshold=0)


## Test set evaluation

In [None]:
# Create test dataset
test_dataset = HazmatDataset(
    data_dir='data/data_faster_rcnn/test',
    annotations_file='data/data_faster_rcnn/test/annotations/instances_test.json',
    transforms=get_transform(train=False)
)

# Create test data loader
test_loader = DataLoader(
    test_dataset,
    batch_size=16,
    shuffle=False,
    collate_fn=collate_fn,
    num_workers=workers,
    pin_memory=True
)

# Load the best model checkpoint

model_path = os.path.join(directory_finetuned_model, 'best_model.pth')
checkpoint = torch.load(model_path, map_location=device)

model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)

# Load ground truth annotations for test set
coco_test = COCO('data/data_faster_rcnn/test/annotations/instances_test.json')

# Evaluate on test set
test_metrics = validate(model, test_loader, coco_test, device)

# Print test metrics
print(f"Test Metrics - mAP: {test_metrics[0]:.4f}")
print(f"mAP@0.5: {test_metrics[1]:.4f}, mAP@0.75: {test_metrics[2]:.4f}")
print(f"mAP medium: {test_metrics[4]:.4f}, mAP large: {test_metrics[5]:.4f}")

In [None]:
# /tmp/ipykernel_2090903/2616491437.py:20: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
#   checkpoint = torch.load(best_checkpoint_path)
# loading annotations into memory...
# Done (t=0.00s)
# creating index...
# index created!
# Validation: 100%|██████████| 62/62 [02:38<00:00,  2.56s/it, Processed=1095]
# Test Metrics - mAP: 0.5634
# mAP@0.5: 0.9892, mAP@0.75: 0.4738
# mAP small: -1.0000, mAP medium: 0.4648, mAP large: 0.5724

## Data prep for augmented weather evaluation

In [None]:
# loop trhough all images from test and predict
test_images_path = "data/data_faster_rcnn/test/images"

# frames available
test_images_path_list = os.listdir(test_images_path)
random.shuffle(test_images_path_list)

# Predict on the first 30 images
for count, image_name in enumerate(test_images_path_list[:20]):
    image_path = os.path.join(test_images_path, image_name)
    predict_image(image_path, threshold=0.4)

In [None]:

def visualize(image):
    plt.figure(figsize=(20, 10))
    plt.axis('off')
    plt.imshow(image)

test_images_path = "data/data_faster_rcnn/test/images"

# frames available
test_images_path_list = os.listdir(test_images_path)
random.shuffle(test_images_path_list)
path = os.path.join(test_images_path, test_images_path_list[0])
image = cv2.imread(path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

transform = A.Compose(
    [A.RandomRain(brightness_coefficient=0.9, drop_width=1, blur_value=5, p=1)],
)
random.seed(7)
transformed = transform(image=image)
visualize(transformed['image'])

In [None]:
transform = A.Compose(
    [A.RandomSunFlare(flare_roi=(0, 0, 1,0.5), angle_lower=1, p=1)],
)
random.seed(7)
transformed = transform(image=image)
visualize(transformed['image'])

In [None]:
transform = A.Compose(
    [A.RandomShadow(num_shadows_lower=1, num_shadows_upper=1, shadow_dimension=5, shadow_roi=(0, 0.5, 1, 1), p=1)],
)
random.seed(7)
transformed = transform(image=image)
visualize(transformed['image'])

In [None]:
transform = A.Compose(
    [A.RandomFog(fog_coef_lower=0.7, fog_coef_upper=0.8, alpha_coef=0.1, p=1)],
)
random.seed(7)
transformed = transform(image=image)
visualize(transformed['image'])


In [None]:
import os
import random
from PIL import Image
import cv2
import albumentations as A

# frames available
train_images_path = "data/data_faster_rcnn/train/images"
train_list = os.listdir(train_images_path)
random.shuffle(train_list)

def add_to_dataset(image, dataset_name):
    """
    Add an image to the specified dataset.
    
    Parameters:
    - image (numpy.ndarray): The image to add (as a NumPy array).
    - dataset_name (str): The name of the dataset to add the image to.
    """
    # Get the dataset directory
    dataset_dir = os.path.join('data/augmented_images', dataset_name)
    
    # Create the dataset directory if it doesn't exist
    os.makedirs(dataset_dir, exist_ok=True)
    
    # Get the image filename
    image_id = len(os.listdir(dataset_dir)) + 1
    image_filename = f"{image_id}.jpg"
    
    # Convert NumPy array to PIL image
    image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    
    # Save the image
    image_pil.save(os.path.join(dataset_dir, image_filename))


# Loop over train list
for count, image_name in enumerate(train_list):
    image_path = os.path.join(train_images_path, image_name)
    
    # Load image using OpenCV
    image = cv2.imread(image_path)
    
    #check if we have max images per set
    random_string = random.choice(["rain", "sunflare", "shadow", "fog"])

    

    # Apply augmentation based on the random choice
    if random_string == "rain":
        transform = A.Compose(
            [A.RandomRain(brightness_coefficient=0.9, drop_width=1, blur_value=5, p=1)],
        )

    elif random_string == "sunflare":
        transform = A.Compose(
            [A.RandomSunFlare(flare_roi=(0, 0, 1, 0.5), angle_lower=1, p=1)],
        )

    elif random_string == "shadow":
        transform = A.Compose(
            [A.RandomShadow(num_shadows_lower=1, num_shadows_upper=1, shadow_dimension=5, shadow_roi=(0, 0.5, 1, 1), p=1)],
        )

    elif random_string == "fog":
        transform = A.Compose(
            [A.RandomFog(fog_coef_lower=0.7, fog_coef_upper=0.8, alpha_coef=0.1, p=1)],
        )

    # Apply transformation
    transformed = transform(image=image)
    
    # Save transformed image
    add_to_dataset(transformed['image'], random_string)


In [None]:
# go over each set rain, sunflare, shadow, fog and predict the images

# base path
base_path = "data/augmented_images"

# loop over each set

for set_name in ["rain", "sunflare", "shadow", "fog"]:
    # Get all images in the set
    set_path = os.path.join(base_path, set_name)

    # loop over each image and use the predict_image function
    for count, image_name in enumerate(os.listdir(set_path)):
        image_path = os.path.join(set_path, image_name)
        predict_image(image_path, threshold=0.4)

## Model metrics

<img src="data/models/validation.png" alt="validation over epochs" width=1000>



In [None]:
# open training log file

log_file_path = os.path.join("data","models", "training_log.txt")

with open(log_file_path, "r") as log_file:
    print(log_file.read())

The fintuned model Faster R-CNN has been trained for 18 epochs and reached a these maximal metrics:

- **mAP@IoU=0.50:0.95 (overall mAP)** → Epoch 9, value: 0.5303
- **mAP@IoU=0.50** → Epoch 7 and 12 to 18 value: 0.9792
- **mAP@IoU=0.75** → Epoch 6, value: 0.4170
- **Medium mAP** → Epoch 8, value: 0.4431
- **Large mAP** → Epoch 6, value: 0.5395
- **RPN Box Reg** → Epoch 5 to 18, value: 0.0008
- **Objectness** → Epoch 5 to 18, value: 0.0007
- **Box Reg** → Epoch 18, value: 0.0341
- **Classifier** → Epoch 4 and 5, value: 0.0182
- **Train loss** → Epoch 18, 12, 10, 7, value: 0.0540


The model which was chosen as the best model was the model with the highest mAP@IoU=0.50:0.95 (overall mAP), so the checkpoint at epoch 9

When evaluating the best model on the test set we get these metrics:
- mAP@=0.50:0.95: 0.5634
- mAP@0.5: 0.9892
- mAP@0.75: 0.4738
- mAP medium: 0.4648
- mAP large: 0.5724

Which are slightly better than the results on the validation set

## Training analysis
Based on the analysis of the training and validation metrics, it can be concluded that additional training with the current configuration (data and hyperparameters) yields diminishing returns. The training loss remains stable at approximately 0.0540 over the last 7 to 18 epochs, indicating little improvement with further training. Additionally, the validation mAP scores have plateaued between epochs 4 and 18, showing no significant change. 

Therefore, it can be inferred that training for around 7 epochs provides near-optimal results while minimizing the time spent on training.

## False positives

Predictions:
  - hazmat: 0.88
  - hazmat: 0.15
  - hazmat: 0.14
  - hazmat: 0.06

<br>
<img src="images/predictions/bmw_prediction.png" alt="Model prediction on a BMW licence plate" width=600>


Predictions:
- hazmat: 0.82
- hazmat: 0.81
- hazmat: 0.14
- hazmat: 0.11
- hazmat: 0.06
- hazmat: 0.06
- hazmat: 0.06
- hazmat: 0.06
- hazmat: 0.05
- hazmat: 0.05

<img src="images/predictions/trafficsignpred.png" alt="Model prediction on a traffic sign" width=600>


The model appears to produce some false positives, mistakenly identifying certain objects as hazmat placards when they are not. This issue is particularly prevalent with objects that are square and have colors such as yellow, red, or orange. Expanding the training dataset and applying data augmentation techniques should help mitigate this problem by improving the model's ability to differentiate between actual placards and visually similar objects.

## False negatives

<img src="images/predictions/hazmatclose.png" width=600>

In certain cases, the model failed to detect UN number placards in high-resolution images. This issue likely arises because the training dataset primarily consists of images where UN number placards were captured from a distance. As a result, the model may have overfitted to the assumption that UN numbers appear relatively small in images. To address this, applying targeted data augmentation techniques—such as zooming in on UN numbers, rotating them (e.g., upside down), and varying their orientation—can help the model generalize better to different scales and perspectives.

## Weather Conditions

For this project, it was essential to develop a robust model capable of performing well under various weather conditions. However, our dataset primarily consisted of images captured under different lighting conditions, such as day and night, without significant weather variations.  

To address this limitation, we evaluated the model using augmented images generated with the **Albumentations** library, applying weather-related transformations such as **rain, sunflare, shadow, and fog.** The model performed reasonably well on these augmented images, successfully identifying objects in most cases. However, it is important to note that data augmentation does not always provide a fully realistic representation of real-world weather conditions.  

While the results indicate that the model can likely handle different weather conditions to some extent, further improvements are needed to ensure robust performance in real-life scenarios. Incorporating actual weather-diverse data, along with advanced augmentation techniques that simulate real-world complexity more accurately, would enhance the model's generalization capabilities.

### Predictions on Augmented Images:

<h4> Rain </h4>
<img src="images/predictions/rainpred.png" alt="Augmented image with rain effect" width="400"/>
<h4> Flare </h4>
<img src="images/predictions/flare.png" alt="Augmented image with sunflare effect" width="400"/>
<h4> Shadows </h4>
<img src="images/predictions/shadowspred.png" alt="Augmented image with shadow effect" width="400"/>
<h4> Fog </h4>
<img src="images/predictions/fog_pred.png" alt="Augmented image with fog effect" width="400"/>

## Reflection on Data Mining Goals

### **Primary Data Mining Goal:**
Create and train an object detection model capable of identifying and interpreting UN number hazard plates on freight wagons in real-time.

---

### **Specific Data Mining Goals:**

#### **1. Object Detection and Localization:**
Develop a model that achieves a high AP score for accurately detecting and localizing hazard plates on freight wagons within each video frame.

**Approach and Outcome:**  
To accomplish this goal, we finetuned a Faster R-CNN model, which demonstrated promising results in localizing hazard placards. However, the model is not yet fully robust and occasionally produces false positives. These false detections often occur when objects with similar visual characteristics—such as square shapes and colors resembling hazard placards (e.g., yellow, red, or orange)—are present in the scene.

**Improvement Strategies:**
- **1.1: Expanding the training dataset** with a greater variety of real-world scenarios to improve generalization.
- **1.2: Advanced data augmentation**, such as applying transformations that simulate real-life conditions (e.g., partial occlusion, varying angles, and different lighting conditions).

---

#### **2. Robustness Across Variable Conditions:**
Enhance the model’s robustness by training it on datasets representing diverse lighting and weather conditions, with a goal to maintain high AP scores across these environments.

**Approach and Outcome:**  
While we did not have access to datasets covering a wide range of weather conditions, we leveraged data captured at different times of the day, covering various lighting conditions such as daytime, nighttime, and low-light scenarios. The model demonstrated strong performance across these lighting variations, indicating a certain level of robustness in this aspect.

To evaluate its performance under different weather conditions, we used data augmentation techniques from the **Albumentations** library. Augmentations like rain, sunflare, shadow, and fog were introduced to simulate adverse weather conditions. Although the model performed reasonably well on these augmented images, it is important to acknowledge that synthetic augmentations do not fully replicate real-world conditions.

**Further Improvements:**
To improve the model’s robustness, it would be beneficial to collect and incorporate real-world data by filming freight wagons across different seasons and weather conditions. This would ensure the model can generalize better to practical scenarios. Additional techniques to enhance robustness, such as advanced augmentation strategies, are discussed in sections **1.1** and **1.2**.

---

#### **3. Optimization for Real-Time Processing:**
Implement real-time object detection and OCR capabilities to ensure the model operates at a frame rate suitable for analyzing images from moving trains.

**Approach and Outcome:**  
Currently, the model does not perform in real-time. Inference can take up to **2.8 seconds per frame**, and reading the hazard placard (OCR) requires an additional **2 seconds**, making the total processing time **at least 4.8 seconds per frame**. This delay is far from real-time performance requirements, which typically demand processing speeds of **30 frames per second (FPS)** or faster, depending on the train's speed and camera setup.

**Optimization Strategies:**
To enhance processing speed, several optimization strategies can be considered:
- **4.1: Model Quantization:** Reducing the precision of model parameters (e.g., from 32-bit floating point to 8-bit integers) to speed up computations with minimal accuracy loss.
- **4.2: Efficient Attention Mechanisms:** Using lightweight attention models to focus computational resources on relevant regions, improving both speed and accuracy.
- **4.3: Model Pruning:** Removing redundant weights and layers to reduce computation overhead.
- **4.4: Hardware Acceleration:** Leveraging GPUs, TPUs, or edge AI devices for faster inference.

#### Data Mining Success Criteria evaluation

- **Object Detection AP**: Achieve a Mean Average Precision (mAP) of at least 0.70 for detecting and localizing hazard plates across varied conditions.  
  **Outcome:** Not achieved, with an mAP of 0.5303.

- **OCR Precision for UN Numbers**: Ensure the Tesseract OCR module achieves high accuracy in reading UN numbers, even under challenging conditions, with a target precision score above 0.95.  
  **Outcome:** Tesseract was unable to consistently recognize codes in difficult conditions, so we switched to using the **idefics2 VLM**, which performed significantly better, even with low-quality images. However, accuracy metrics for idefics2 have not been formally evaluated.

- **Processing Speed**: Ensure the model achieves a processing time per frame under 100 milliseconds to maintain real-time functionality.  
  **Outcome:** Not achieved. The model takes longer to process predictions, and the OCR stage, which involves **idefics2**, also contributes to longer processing times, resulting in a total time greater than 100 milliseconds.

- **Environmental Robustness**: Maintain consistent mAP scores across different lighting and weather conditions.  
  **Outcome:** Partly achieved. Lighting conditions were well represented in the dataset, and the model performed consistently across different lighting variations. Weather conditions, however, were not included in the dataset, but the model performed reasonably well when evaluated with augmented data. The mAP score was not measured for augmented weather conditions.