# X-ray Threat Detection - Model Evaluation

This notebook evaluates the trained X-ray threat detection model and analyzes its performance.

In [None]:
## DEPENDENT ON YOUR LAPTOPS/PC SPECS 
## USE "CPU" on untils.py if your machine doesnt have a gpu as torch cuda isnt being torch cuda

# Import necessary libraries
import os
import sys
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import torch
from datetime import datetime

# Add project root to path
sys.path.append('..')

# Import project modules
from config.model_config import *
from src.data.dataset import create_data_loaders
from src.models.faster_rcnn import create_faster_rcnn_model
from src.models.utils import load_model, get_best_checkpoint, get_model_size, get_inference_time
from src.utils.metrics import compute_metrics_for_all_classes, compute_confusion_matrix
from src.utils.visualization import visualize_batch, visualize_feature_maps, visualize_class_activation_map

## 1. Load Model and Data

First, let's load the trained model and the validation data.

In [4]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create model
model = create_faster_rcnn_model(config=sys.modules['config.model_config'])
model.to(device)

# Load checkpoint
checkpoint_path = get_best_checkpoint(CHECKPOINTS_DIR)
if checkpoint_path:
    model, _, _, _, _ = load_model(model, None, checkpoint_path)
    print(f"Loaded checkpoint: {checkpoint_path}")
else:
    print("No checkpoint found. Using initial model.")

# Set model to evaluation mode
model.eval()

# Create data loaders
data_loaders = create_data_loaders(config=sys.modules['config.model_config'])
val_loader = data_loaders['val']
class_map = data_loaders['class_map']

# Print model information
model_size = get_model_size(model)
print(f"Model size: {model_size:.2f} MB")

# Print dataset information
print(f"Validation set size: {len(val_loader.dataset)} images")
print(f"Class map: {class_map}")

Using device: cpu




No checkpoint found. Using initial model.
Model size: 630.13 MB
Validation set size: 7109 images
Class map: {'Straight_Knife': 1, 'Folding_Knife': 2, 'Utility_Knife': 3, 'Multi-tool_Knife': 4, 'Scissor': 5}


## 2. Evaluate Model Performance

Let's evaluate the model's performance on the validation set.

In [5]:
# Function to evaluate model
def evaluate_model(model, data_loader, device, num_batches=None):
    # Initialize lists for metrics computation
    all_predictions = []
    all_targets = []
    
    # Iterate over batches
    with torch.no_grad():
        for batch_idx, (images, targets, metadata) in enumerate(data_loader):
            if num_batches is not None and batch_idx >= num_batches:
                break
                
            # Move data to device
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            # Forward pass
            predictions = model(images)
            
            # Store predictions and targets for metrics computation
            all_predictions.extend(predictions)
            all_targets.extend(targets)
            
            # Print progress
            if batch_idx % 10 == 0:
                print(f"Processed batch {batch_idx}/{len(data_loader)}")
    
    # Compute metrics
    metrics = compute_metrics_for_all_classes(
        all_predictions, all_targets, CLASSES
    )
    
    # Compute confusion matrix
    cm = compute_confusion_matrix(
        all_predictions, all_targets, len(CLASSES)
    )
    
    return metrics, cm, all_predictions, all_targets

# Evaluate model
print("Evaluating model...")
metrics, confusion_matrix, predictions, targets = evaluate_model(model, val_loader, device)

# Print metrics
print(f"\nmAP: {metrics['mAP']:.4f}")
print("\nClass APs:")
for class_name, ap in metrics['class_APs'].items():
    print(f"  {class_name}: {ap:.4f}")

print("\nPrecision, Recall, and F1 Score:")
for class_name in metrics['precision'].keys():
    print(f"  {class_name}:")
    print(f"    Precision: {metrics['precision'][class_name]:.4f}")
    print(f"    Recall: {metrics['recall'][class_name]:.4f}")
    print(f"    F1 Score: {metrics['f1_score'][class_name]:.4f}")

Evaluating model...
Processed batch 0/356
Processed batch 10/356


KeyboardInterrupt: 

## 3. Visualize Confusion Matrix

Let's visualize the confusion matrix to understand the model's performance across different classes.

In [6]:
# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(
    confusion_matrix,
    annot=True,
    fmt='d',
    cmap='Blues',
    xticklabels=CLASSES[1:],  # Skip background class
    yticklabels=CLASSES[1:]   # Skip background class
)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

NameError: name 'confusion_matrix' is not defined

<Figure size 1000x800 with 0 Axes>

## 4. Visualize Detection Examples

Let's visualize some detection examples to better understand the model's performance.

In [7]:
# Get a batch of validation data
images, targets, metadata = next(iter(val_loader))

# Generate predictions
with torch.no_grad():
    predictions = model([img.to(device) for img in images])

# Visualize batch
visualize_batch(
    images, targets, predictions,
    class_names=CLASSES,
    threshold=0.5,
    max_images=min(4, len(images))
)

[array([[[255, 255, 255, 255],
         [255, 255, 255, 255],
         [255, 255, 255, 255],
         ...,
         [255, 255, 255, 255],
         [255, 255, 255, 255],
         [255, 255, 255, 255]],
 
        [[255, 255, 255, 255],
         [255, 255, 255, 255],
         [255, 255, 255, 255],
         ...,
         [255, 255, 255, 255],
         [255, 255, 255, 255],
         [255, 255, 255, 255]],
 
        [[255, 255, 255, 255],
         [255, 255, 255, 255],
         [255, 255, 255, 255],
         ...,
         [255, 255, 255, 255],
         [255, 255, 255, 255],
         [255, 255, 255, 255]],
 
        ...,
 
        [[255, 255, 255, 255],
         [255, 255, 255, 255],
         [255, 255, 255, 255],
         ...,
         [255, 255, 255, 255],
         [255, 255, 255, 255],
         [255, 255, 255, 255]],
 
        [[255, 255, 255, 255],
         [255, 255, 255, 255],
         [255, 255, 255, 255],
         ...,
         [255, 255, 255, 255],
         [255, 255, 255, 255],
    

## 5. Measure Inference Time

Let's measure the inference time of the model.

In [8]:
# Measure inference time
sample_image = images[0].to(device)
inference_time = get_inference_time(model, sample_image.unsqueeze(0), num_runs=100, warmup_runs=10)
print(f"Average inference time: {inference_time:.2f} ms")
print(f"FPS: {1000 / inference_time:.2f}")

KeyboardInterrupt: 