In [1]:
import os
import cv2
import yaml
import json

ModuleNotFoundError: No module named 'cv2'

In [None]:
def convert_yolo_to_fasterrcnn(yolo_data_dir, output_json_path, class_names_list):
    """
    Converts YOLO format annotations to a JSON suitable for Faster R-CNN training.

    Args:
        yolo_data_dir (str): Path to the root of your Dataset directory (e.g., '../Dataset').
        output_json_path (str): Path where the converted JSON file will be saved.
        class_names_list (list): List of class names, where the index corresponds to the YOLO class_id.
                                (e.g., ['wake']).
    """
    all_annotations = []
    image_id_counter = 0

    for split in ['train', 'valid','test']: 
        image_dir = os.path.join(yolo_data_dir, 'images', split)
        label_dir = os.path.join(yolo_data_dir, 'labels', split)

        if not os.path.exists(image_dir) or not os.path.exists(label_dir):
            print(f"Skipping {split} split as directories not found: {image_dir}, {label_dir}")
            continue

        image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
        print(f"Processing {split} split with {len(image_files)} images...")

        for img_filename in image_files:
            img_path = os.path.join(image_dir, img_filename)
            label_filename = os.path.splitext(img_filename)[0] + '.txt'
            label_path = os.path.join(label_dir, label_filename)

            if not os.path.exists(label_path):
                print(f"Warning: No label file found for {img_filename}. Skipping.")
                continue

            # Read image to get its dimensions
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image {img_path}. Skipping.")
                continue
            height, width, _ = img.shape

            image_annotations = {
                "image_id": image_id_counter,
                "file_name": os.path.join('images', split, img_filename), # CORRECTED LINE
                "width": width,
                "height": height,
                "annotations": []
            }

            with open(label_path, 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) < 5:
                        continue # Skip malformed lines

                    class_id = int(parts[0])
                    x_center_norm, y_center_norm, width_norm, height_norm = map(float, parts[1:5])

                    # Convert normalized YOLO format to absolute [xmin, ymin, xmax, ymax]
                    x_center = x_center_norm * width
                    y_center = y_center_norm * height
                    box_width = width_norm * width
                    box_height = height_norm * height

                    xmin = int(x_center - box_width / 2)
                    ymin = int(y_center - box_height / 2)
                    xmax = int(x_center + box_width / 2)
                    ymax = int(y_center + box_height / 2)

                    # Clamp coordinates to image boundaries
                    xmin = max(0, xmin)
                    ymin = max(0, ymin)
                    xmax = min(width, xmax)
                    ymax = min(height, ymax)

                    # Calculate area
                    area = (xmax - xmin) * (ymax - ymin)

                    # IMPORTANT: Map YOLO class_id (0-indexed) to Faster R-CNN label (1-indexed)
                    # For Faster R-CNN in torchvision, class 0 is BACKGROUND, so shift your actual classes by +1.
                    fasterrcnn_label = class_id + 1

                    annotation = {
                        "bbox": [xmin, ymin, xmax, ymax],
                        "category_id": fasterrcnn_label,
                        "area": area,
                        "iscrowd": 0 # Assuming no crowd annotations
                    }
                    image_annotations["annotations"].append(annotation)

            all_annotations.append(image_annotations)
            image_id_counter += 1

    # Save to JSON
    with open(output_json_path, 'w') as f:
        json.dump(all_annotations, f, indent=4)
    print(f"Conversion complete. Annotations saved to {output_json_path}")


In [9]:


if __name__ == "__main__":
    # --- Configuration ---
    yaml_path = "../Dataset/vessel_wakes.yaml"
    yolo_dataset_root = "../Dataset"
    output_json = "../Dataset/faster_rcnn_annotations.json"

    # Load class names from your YAML file
    try:
        with open(yaml_path, 'r') as f:
            yaml_config = yaml.safe_load(f)
            # Get the values from the 'names' dictionary
            # and sort them by key to maintain the correct class ID order.
            class_names_dict = yaml_config['names']
            # Convert dictionary values to a list, ensuring order by class ID
            class_names = [class_names_dict[i] for i in sorted(class_names_dict.keys())]

    except FileNotFoundError:
        print(f"Error: {yaml_path} not found. Please check the path.")
        exit()
    except KeyError:
        print("Error: 'names' key not found in your YAML config. Ensure it defines your class names.")
        exit()
    except Exception as e:
        print(f"An error occurred while loading YAML: {e}")
        exit()

    # Prepend a '__background__' class to match torchvision's convention
    fasterrcnn_class_names = ['__background__'] + class_names
    print(f"Detected class names: {class_names}")
    print(f"Faster R-CNN class mapping (index 0 is background): {fasterrcnn_class_names}")

    # Pass the list of class names to the conversion function
    convert_yolo_to_fasterrcnn(yolo_dataset_root, output_json, class_names) # Passing class_names_list here is not strictly used by the function itself,
                                                                             # but it's good practice to pass the correctly formatted list.
                                                                             # The function primarily uses class_id + 1 for category_id.
                                                                             # The list is mainly for printing and validation in this script.

Detected class names: ['wake']
Faster R-CNN class mapping (index 0 is background): ['__background__', 'wake']
Processing train split with 9997 images...
Processing valid split with 3443 images...
Processing test split with 6165 images...
Conversion complete. Annotations saved to ../Dataset/faster_rcnn_annotations.json


In [1]:
!pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (19 kB)
Collecting numpy<2.3.0,>=2 (from opencv-python)
  Downloading numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (67.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m162.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m154.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, opencv-python
[2K  Attempting uninstall: numpy
[2K    Found existing installation: numpy 1.26.4
[2K    Uninstalling numpy-1.26.4:━━━━━━━━━━━━━━━━━━[0m [32m0/2[0m [numpy]
[2K      Successfully uninstalle

In [2]:
!pip install torch

Collecting torch
  Downloading torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)


In [3]:
!pip install torchvision

Collecting torchvision
  Downloading torchvision-0.21.0-cp310-cp310-manylinux1_x86_64.whl.metadata (6.1 kB)
Downloading torchvision-0.21.0-cp310-cp310-manylinux1_x86_64.whl (7.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m40.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: torchvision
Successfully installed torchvision-0.21.0


In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
import os
import cv2
import json
from torchvision.transforms import functional as F
from PIL import Image

In [5]:
class WakeDetectionDataset(Dataset):
    def __init__(self, root_dir, annotation_file, split, transform=None):
        self.root_dir = root_dir # '../Dataset'
        self.transform = transform
        self.split = split # 'train' or 'valid'

        with open(annotation_file, 'r') as f:
            all_annotations = json.load(f)

        # Filter annotations based on the specified split
        expected_split_prefix = os.path.join('images', self.split) + os.sep
        self.annotations = [
            ann for ann in all_annotations
            if ann['file_name'].startswith(expected_split_prefix)
        ]

        # Create a mapping from image_id to its annotations
        self.img_data = {ann['image_id']: ann for ann in self.annotations}
        self.image_ids = list(self.img_data.keys())

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        data = self.img_data[img_id]

        img_path = os.path.join(self.root_dir, data['file_name'])
        image = Image.open(img_path).convert("RGB") # Use PIL for image loading

        boxes = []
        labels = []
        areas = []
        iscrowd = []

        for annotation in data['annotations']:
            boxes.append(annotation['bbox']) # [xmin, ymin, xmax, ymax]
            labels.append(annotation['category_id']) # 1-indexed labels
            areas.append(annotation['area'])
            iscrowd.append(annotation['iscrowd'])

        # Convert to PyTorch tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        areas = torch.as_tensor(areas, dtype=torch.float32)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)

        # Handle cases where an image might have no annotations (empty boxes/labels)
        if boxes.numel() == 0:
            # Create dummy tensors for consistency if no objects are present
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
            areas = torch.zeros((0,), dtype=torch.float32)
            iscrowd = torch.zeros((0,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = torch.tensor([img_id])
        target["area"] = areas
        target["iscrowd"] = iscrowd

        if self.transform is not None:
            image, target = self.transform(image, target) # Custom transform if it handles target

        return image, target

# Helper function for data collation (required for object detection datasets)
def collate_fn(batch):
    return tuple(zip(*batch))

# Example of a transform (basic, can be expanded)
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target

class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target

# You would add more transforms here, e.g., RandomHorizontalFlip, Resize, Normalize
# For object detection, transformations usually need to apply to both image and bounding boxes.
# This requires custom transform classes or libraries like Albumentations.

In [None]:
# train_faster_rcnn
import torch
import torch.nn as nn
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader
from torchvision import transforms as T
import os
import datetime
import yaml # Import yaml to load class names

# Import your custom dataset and collate_fn
# from faster_rcnn_dataset import WakeDetectionDataset, collate_fn, Compose, ToTensor

# --- Configuration ---
DATASET_ROOT = "../Dataset"
ANNOTATION_FILE = "../Dataset/faster_rcnn_annotations.json"
NUM_CLASSES = 2 # Placeholder, will be determined from YAML
BATCH_SIZE = 4
NUM_EPOCHS = 50
LEARNING_RATE = 0.005
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
OUTPUT_DIR = "runs-fasterrcnn"
MODEL_NAME = "faster_rcnn_resnet50_fpn"

def get_model(num_classes):
    model = fasterrcnn_resnet50_fpn(weights='DEFAULT')
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

def get_transform(is_train):
    transforms = [ToTensor()]
    # Add data augmentation for training
    if is_train:
        # Example: Random Horizontal Flip (make sure it handles bounding boxes correctly)
        # For torchvision transforms, you often need to define custom ones or use libraries like Albumentations
        # Here's a placeholder for torchvision's T.RandomHorizontalFlip which does NOT automatically flip boxes.
        # For object detection, you typically need to implement your own transforms that modify both image and target.
        pass # Add more transforms here compatible with object detection
    return Compose(transforms)

def train():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    run_name = f"{MODEL_NAME}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
    run_output_path = os.path.join(OUTPUT_DIR, run_name)
    os.makedirs(run_output_path, exist_ok=True)

    print(f"Using device: {DEVICE}")

    # Load class names from your YAML file to determine NUM_CLASSES
    yaml_path = "../Dataset/vessel_wakes.yaml"
    try:
        with open(yaml_path, 'r') as f:
            yaml_config = yaml.safe_load(f)
            class_names_dict = yaml_config['names']
            raw_class_names = [class_names_dict[i] for i in sorted(class_names_dict.keys())]
            global NUM_CLASSES
            NUM_CLASSES = len(raw_class_names) + 1 # +1 for background
            print(f"Inferred {len(raw_class_names)} custom classes. Setting NUM_CLASSES for Faster R-CNN to {NUM_CLASSES} (including background).")
    except FileNotFoundError:
        print(f"Error: {yaml_path} not found. Please check the path.")
        exit()
    except KeyError:
        print("Error: 'names' key not found in your YAML config. Ensure it defines your class names.")
        exit()
    except Exception as e:
        print(f"An error occurred while loading YAML: {e}")
        exit()

    # Prepare datasets and dataloaders
    # NOW WE LOAD TRAIN AND VALIDATION DATASETS SEPARATELY
    train_dataset = WakeDetectionDataset(
        root_dir=DATASET_ROOT,
        annotation_file=ANNOTATION_FILE,
        split='train', # Specify the 'train' split
        transform=get_transform(is_train=True)
    )

    val_dataset = WakeDetectionDataset(
        root_dir=DATASET_ROOT,
        annotation_file=ANNOTATION_FILE,
        split='valid', # Specify the 'valid' split
        transform=get_transform(is_train=False) # No augmentation for validation
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=os.cpu_count() // 2,
        collate_fn=collate_fn
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False, # No need to shuffle validation data
        num_workers=os.cpu_count() // 2,
        collate_fn=collate_fn
    )

    print(f"Train dataset size: {len(train_dataset)}")
    print(f"Validation dataset size: {len(val_dataset)}")

    model = get_model(NUM_CLASSES).to(DEVICE)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=LEARNING_RATE,
                                momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    best_val_loss = float('inf')

    print("Starting training...")
    for epoch in range(NUM_EPOCHS):
        model.train()
        total_loss = 0
        for i, (images, targets) in enumerate(train_loader):
            images = list(image.to(DEVICE) for image in images)
            targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            total_loss += losses.item()

            if (i + 1) % 10 == 0:
                print(f"Epoch: {epoch+1}/{NUM_EPOCHS}, Iter: {i+1}/{len(train_loader)}, Train Loss: {losses.item():.4f}")

        lr_scheduler.step()

        # Validation phase
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for i, (images, targets) in enumerate(val_loader):
                images = list(image.to(DEVICE) for image in images)
                targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

                # Model outputs are predictions during eval, not loss dict directly
                # To calculate loss during validation, you can either:
                # 1. Temporarily set model.train() then back to eval() after calculating loss.
                # 2. Or, if you primarily care about eval metrics (mAP, etc.), you'd use a different loop
                #    and an evaluator like pycocotools.
                # For simplicity here, we will temporarily set to train to get losses,
                # as direct loss calculation on validation set is common during training loop.
                model.train() # Temporarily set to train mode to get loss_dict from model()
                loss_dict = model(images, targets)
                model.eval() # Set back to eval mode
                losses = sum(loss for loss in loss_dict.values())
                val_loss += losses.item()

        avg_train_loss = total_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)

        print(f"Epoch {epoch+1} finished.")
        print(f"Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

        # Save the best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            model_save_path = os.path.join(run_output_path, f"best_model_epoch_{epoch+1}.pth")
            torch.save(model.state_dict(), model_save_path)
            print(f"Saved best model to {model_save_path}")

        # Save checkpoint periodically
        if (epoch + 1) % 5 == 0:
            checkpoint_path = os.path.join(run_output_path, f"checkpoint_epoch_{epoch+1}.pth")
            torch.save(model.state_dict(), checkpoint_path)
            print(f"Saved checkpoint to {checkpoint_path}")

    print("Training complete!")
    return run_output_path # Return the path to the run directory

if __name__ == "__main__":
    train()

Using device: cuda
Inferred 1 custom classes. Setting NUM_CLASSES for Faster R-CNN to 2 (including background).
Train dataset size: 9997
Validation dataset size: 3443
Starting training...
Epoch: 1/50, Iter: 10/2500, Train Loss: 0.2283
Epoch: 1/50, Iter: 20/2500, Train Loss: 0.1431
Epoch: 1/50, Iter: 30/2500, Train Loss: 0.1928
Epoch: 1/50, Iter: 40/2500, Train Loss: 0.1913
Epoch: 1/50, Iter: 50/2500, Train Loss: 0.2420
Epoch: 1/50, Iter: 60/2500, Train Loss: 0.1634
Epoch: 1/50, Iter: 70/2500, Train Loss: 0.2237
Epoch: 1/50, Iter: 80/2500, Train Loss: 0.1444
Epoch: 1/50, Iter: 90/2500, Train Loss: 0.1130
Epoch: 1/50, Iter: 100/2500, Train Loss: 0.1830
Epoch: 1/50, Iter: 110/2500, Train Loss: 0.1909
Epoch: 1/50, Iter: 120/2500, Train Loss: 0.1044
Epoch: 1/50, Iter: 130/2500, Train Loss: 0.1682
Epoch: 1/50, Iter: 140/2500, Train Loss: 0.1290
Epoch: 1/50, Iter: 150/2500, Train Loss: 0.1683
Epoch: 1/50, Iter: 160/2500, Train Loss: 0.1953
Epoch: 1/50, Iter: 170/2500, Train Loss: 0.3509
Epoch

Epoch: 1/50, Iter: 1670/2500, Train Loss: 0.1479
Epoch: 1/50, Iter: 1680/2500, Train Loss: 0.0940
Epoch: 1/50, Iter: 1690/2500, Train Loss: 0.1373
Epoch: 1/50, Iter: 1700/2500, Train Loss: 0.0928
Epoch: 1/50, Iter: 1710/2500, Train Loss: 0.0745
Epoch: 1/50, Iter: 1720/2500, Train Loss: 0.0831
Epoch: 1/50, Iter: 1730/2500, Train Loss: 0.1204
Epoch: 1/50, Iter: 1740/2500, Train Loss: 0.0843
Epoch: 1/50, Iter: 1750/2500, Train Loss: 0.0682
Epoch: 1/50, Iter: 1760/2500, Train Loss: 0.1529
Epoch: 1/50, Iter: 1770/2500, Train Loss: 0.1473
Epoch: 1/50, Iter: 1780/2500, Train Loss: 0.0797
Epoch: 1/50, Iter: 1790/2500, Train Loss: 0.1057
Epoch: 1/50, Iter: 1800/2500, Train Loss: 0.0686
Epoch: 1/50, Iter: 1810/2500, Train Loss: 0.0937
Epoch: 1/50, Iter: 1820/2500, Train Loss: 0.0681
Epoch: 1/50, Iter: 1830/2500, Train Loss: 0.1139
Epoch: 1/50, Iter: 1840/2500, Train Loss: 0.1163
Epoch: 1/50, Iter: 1850/2500, Train Loss: 0.1184
Epoch: 1/50, Iter: 1860/2500, Train Loss: 0.0922
Epoch: 1/50, Iter: 1

Epoch: 2/50, Iter: 830/2500, Train Loss: 0.0554
Epoch: 2/50, Iter: 840/2500, Train Loss: 0.0588
Epoch: 2/50, Iter: 850/2500, Train Loss: 0.1694
Epoch: 2/50, Iter: 860/2500, Train Loss: 0.0825
Epoch: 2/50, Iter: 870/2500, Train Loss: 0.0783
Epoch: 2/50, Iter: 880/2500, Train Loss: 0.0574
Epoch: 2/50, Iter: 890/2500, Train Loss: 0.1271
Epoch: 2/50, Iter: 900/2500, Train Loss: 0.0774
Epoch: 2/50, Iter: 910/2500, Train Loss: 0.1085
Epoch: 2/50, Iter: 920/2500, Train Loss: 0.0858
Epoch: 2/50, Iter: 930/2500, Train Loss: 0.1020
Epoch: 2/50, Iter: 940/2500, Train Loss: 0.1347
Epoch: 2/50, Iter: 950/2500, Train Loss: 0.0722
Epoch: 2/50, Iter: 960/2500, Train Loss: 0.0616
Epoch: 2/50, Iter: 970/2500, Train Loss: 0.0770
Epoch: 2/50, Iter: 980/2500, Train Loss: 0.0877
Epoch: 2/50, Iter: 990/2500, Train Loss: 0.1331
Epoch: 2/50, Iter: 1000/2500, Train Loss: 0.0763
Epoch: 2/50, Iter: 1010/2500, Train Loss: 0.1068
Epoch: 2/50, Iter: 1020/2500, Train Loss: 0.0705
Epoch: 2/50, Iter: 1030/2500, Train L

Epoch 2 finished.
Train Loss: 0.1017, Val Loss: 0.1006
Saved best model to runs-fasterrcnn/faster_rcnn_resnet50_fpn_20250707-145842/best_model_epoch_2.pth
Epoch: 3/50, Iter: 10/2500, Train Loss: 0.0685
Epoch: 3/50, Iter: 20/2500, Train Loss: 0.0438
Epoch: 3/50, Iter: 30/2500, Train Loss: 0.0899
Epoch: 3/50, Iter: 40/2500, Train Loss: 0.0364
Epoch: 3/50, Iter: 50/2500, Train Loss: 0.1644
Epoch: 3/50, Iter: 60/2500, Train Loss: 0.0754
Epoch: 3/50, Iter: 70/2500, Train Loss: 0.0747
Epoch: 3/50, Iter: 80/2500, Train Loss: 0.1166
Epoch: 3/50, Iter: 90/2500, Train Loss: 0.0861
Epoch: 3/50, Iter: 100/2500, Train Loss: 0.0676
Epoch: 3/50, Iter: 110/2500, Train Loss: 0.2648
Epoch: 3/50, Iter: 120/2500, Train Loss: 0.0948
Epoch: 3/50, Iter: 130/2500, Train Loss: 0.0767
Epoch: 3/50, Iter: 140/2500, Train Loss: 0.0228
Epoch: 3/50, Iter: 150/2500, Train Loss: 0.0820
Epoch: 3/50, Iter: 160/2500, Train Loss: 0.1470
Epoch: 3/50, Iter: 170/2500, Train Loss: 0.0738
Epoch: 3/50, Iter: 180/2500, Train Los

Epoch: 3/50, Iter: 1680/2500, Train Loss: 0.0781
Epoch: 3/50, Iter: 1690/2500, Train Loss: 0.1294
Epoch: 3/50, Iter: 1700/2500, Train Loss: 0.1026
Epoch: 3/50, Iter: 1710/2500, Train Loss: 0.0896
Epoch: 3/50, Iter: 1720/2500, Train Loss: 0.1270
Epoch: 3/50, Iter: 1730/2500, Train Loss: 0.1171
Epoch: 3/50, Iter: 1740/2500, Train Loss: 0.0641
Epoch: 3/50, Iter: 1750/2500, Train Loss: 0.1144
Epoch: 3/50, Iter: 1760/2500, Train Loss: 0.0456
Epoch: 3/50, Iter: 1770/2500, Train Loss: 0.0958
Epoch: 3/50, Iter: 1780/2500, Train Loss: 0.0374
Epoch: 3/50, Iter: 1790/2500, Train Loss: 0.0946
Epoch: 3/50, Iter: 1800/2500, Train Loss: 0.0986
Epoch: 3/50, Iter: 1810/2500, Train Loss: 0.0853
Epoch: 3/50, Iter: 1820/2500, Train Loss: 0.1268
Epoch: 3/50, Iter: 1830/2500, Train Loss: 0.0394
Epoch: 3/50, Iter: 1840/2500, Train Loss: 0.1345
Epoch: 3/50, Iter: 1850/2500, Train Loss: 0.0481
Epoch: 3/50, Iter: 1860/2500, Train Loss: 0.1473
Epoch: 3/50, Iter: 1870/2500, Train Loss: 0.0316
Epoch: 3/50, Iter: 1

Epoch: 4/50, Iter: 860/2500, Train Loss: 0.0529
Epoch: 4/50, Iter: 870/2500, Train Loss: 0.1230
Epoch: 4/50, Iter: 880/2500, Train Loss: 0.0678
Epoch: 4/50, Iter: 890/2500, Train Loss: 0.0596
Epoch: 4/50, Iter: 900/2500, Train Loss: 0.0730
Epoch: 4/50, Iter: 910/2500, Train Loss: 0.0755
Epoch: 4/50, Iter: 920/2500, Train Loss: 0.0761
Epoch: 4/50, Iter: 930/2500, Train Loss: 0.0782
Epoch: 4/50, Iter: 940/2500, Train Loss: 0.1571
Epoch: 4/50, Iter: 950/2500, Train Loss: 0.0762
Epoch: 4/50, Iter: 960/2500, Train Loss: 0.0489
Epoch: 4/50, Iter: 970/2500, Train Loss: 0.0887
Epoch: 4/50, Iter: 980/2500, Train Loss: 0.0913
Epoch: 4/50, Iter: 990/2500, Train Loss: 0.1748
Epoch: 4/50, Iter: 1000/2500, Train Loss: 0.0428
Epoch: 4/50, Iter: 1010/2500, Train Loss: 0.1836
Epoch: 4/50, Iter: 1020/2500, Train Loss: 0.1049
Epoch: 4/50, Iter: 1030/2500, Train Loss: 0.1604
Epoch: 4/50, Iter: 1040/2500, Train Loss: 0.0857
Epoch: 4/50, Iter: 1050/2500, Train Loss: 0.0639
Epoch: 4/50, Iter: 1060/2500, Trai

Epoch: 5/50, Iter: 10/2500, Train Loss: 0.0847
Epoch: 5/50, Iter: 20/2500, Train Loss: 0.0913
Epoch: 5/50, Iter: 30/2500, Train Loss: 0.1233
Epoch: 5/50, Iter: 40/2500, Train Loss: 0.0994
Epoch: 5/50, Iter: 50/2500, Train Loss: 0.1025
Epoch: 5/50, Iter: 60/2500, Train Loss: 0.0676
Epoch: 5/50, Iter: 70/2500, Train Loss: 0.0489
Epoch: 5/50, Iter: 80/2500, Train Loss: 0.0496
Epoch: 5/50, Iter: 90/2500, Train Loss: 0.1146
Epoch: 5/50, Iter: 100/2500, Train Loss: 0.1703
Epoch: 5/50, Iter: 110/2500, Train Loss: 0.0786
Epoch: 5/50, Iter: 120/2500, Train Loss: 0.0443
Epoch: 5/50, Iter: 130/2500, Train Loss: 0.0537
Epoch: 5/50, Iter: 140/2500, Train Loss: 0.1122
Epoch: 5/50, Iter: 150/2500, Train Loss: 0.0670
Epoch: 5/50, Iter: 160/2500, Train Loss: 0.0573
Epoch: 5/50, Iter: 170/2500, Train Loss: 0.0896
Epoch: 5/50, Iter: 180/2500, Train Loss: 0.1581
Epoch: 5/50, Iter: 190/2500, Train Loss: 0.1744
Epoch: 5/50, Iter: 200/2500, Train Loss: 0.0483
Epoch: 5/50, Iter: 210/2500, Train Loss: 0.0899
E

Epoch: 5/50, Iter: 1710/2500, Train Loss: 0.1274
Epoch: 5/50, Iter: 1720/2500, Train Loss: 0.0574
Epoch: 5/50, Iter: 1730/2500, Train Loss: 0.0439
Epoch: 5/50, Iter: 1740/2500, Train Loss: 0.1536
Epoch: 5/50, Iter: 1750/2500, Train Loss: 0.1065
Epoch: 5/50, Iter: 1760/2500, Train Loss: 0.0598
Epoch: 5/50, Iter: 1770/2500, Train Loss: 0.1511
Epoch: 5/50, Iter: 1780/2500, Train Loss: 0.0514
Epoch: 5/50, Iter: 1790/2500, Train Loss: 0.0500
Epoch: 5/50, Iter: 1800/2500, Train Loss: 0.1107
Epoch: 5/50, Iter: 1810/2500, Train Loss: 0.0858
Epoch: 5/50, Iter: 1820/2500, Train Loss: 0.0932
Epoch: 5/50, Iter: 1830/2500, Train Loss: 0.2033
Epoch: 5/50, Iter: 1840/2500, Train Loss: 0.1030
Epoch: 5/50, Iter: 1850/2500, Train Loss: 0.0771
Epoch: 5/50, Iter: 1860/2500, Train Loss: 0.2034
Epoch: 5/50, Iter: 1870/2500, Train Loss: 0.1015
Epoch: 5/50, Iter: 1880/2500, Train Loss: 0.1247
Epoch: 5/50, Iter: 1890/2500, Train Loss: 0.0816
Epoch: 5/50, Iter: 1900/2500, Train Loss: 0.0826
Epoch: 5/50, Iter: 1

Epoch: 6/50, Iter: 850/2500, Train Loss: 0.0534
Epoch: 6/50, Iter: 860/2500, Train Loss: 0.1128
Epoch: 6/50, Iter: 870/2500, Train Loss: 0.0796
Epoch: 6/50, Iter: 880/2500, Train Loss: 0.0717
Epoch: 6/50, Iter: 890/2500, Train Loss: 0.0801
Epoch: 6/50, Iter: 900/2500, Train Loss: 0.0788
Epoch: 6/50, Iter: 910/2500, Train Loss: 0.0695
Epoch: 6/50, Iter: 920/2500, Train Loss: 0.0505
Epoch: 6/50, Iter: 930/2500, Train Loss: 0.0783
Epoch: 6/50, Iter: 940/2500, Train Loss: 0.0966
Epoch: 6/50, Iter: 950/2500, Train Loss: 0.0637
Epoch: 6/50, Iter: 960/2500, Train Loss: 0.0684
Epoch: 6/50, Iter: 970/2500, Train Loss: 0.1053
Epoch: 6/50, Iter: 980/2500, Train Loss: 0.0944
Epoch: 6/50, Iter: 990/2500, Train Loss: 0.1028
Epoch: 6/50, Iter: 1000/2500, Train Loss: 0.0769
Epoch: 6/50, Iter: 1010/2500, Train Loss: 0.0820
Epoch: 6/50, Iter: 1020/2500, Train Loss: 0.0884
Epoch: 6/50, Iter: 1030/2500, Train Loss: 0.0761
Epoch: 6/50, Iter: 1040/2500, Train Loss: 0.0832
Epoch: 6/50, Iter: 1050/2500, Train

In [None]:
# second train maybe it reduce FP

# train_faster_rcnn (modified saving = same names, new folder)
import torch
import torch.nn as nn
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader
from torchvision import transforms as T
import os
import datetime
import yaml


# Use your custom dataset + collate + transforms that handle boxes
# from faster_rcnn_dataset import WakeDetectionDataset, collate_fn, Compose, ToTensor

# --- Configuration ---
DATASET_ROOT = "../Dataset"
ANNOTATION_FILE = "../Dataset/faster_rcnn_annotations.json"
NUM_CLASSES = 2  # will be inferred from YAML (+1 for background)
BATCH_SIZE = 4
NUM_EPOCHS = 50
LEARNING_RATE = 0.005
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
OUTPUT_DIR = "runs-fasterrcnn2"
MODEL_NAME = "faster_rcnn_resnet50_fpn"

from torchvision.models.detection.rpn import AnchorGenerator
try:
    from torchvision.models import ResNet50_Weights
    BACKBONE_WEIGHTS = ResNet50_Weights.DEFAULT
except Exception:
    BACKBONE_WEIGHTS = None

def get_model(num_classes):
    # Use the same number of sizes on each of the 5 FPN levels (here: 1 per level).
    sizes = ((32,), (64,), (128,), (256,), (512,))
    aspect = (0.2, 0.33, 0.5, 1.0, 2.0, 3.0, 5.0)  # long/thin-friendly
    aspect_ratios = (aspect, aspect, aspect, aspect, aspect)

    anchor_generator = AnchorGenerator(
        sizes=sizes,
        aspect_ratios=aspect_ratios
    )

    model = fasterrcnn_resnet50_fpn(
        weights=None,                       # avoid head mismatch when anchors change
        weights_backbone=BACKBONE_WEIGHTS,  # init backbone from ImageNet
        rpn_anchor_generator=anchor_generator,
        rpn_nms_thresh=0.5,
        box_score_thresh=0.35,
        box_nms_thresh=0.30,
        box_detections_per_img=75,
        box_fg_iou_thresh=0.60,
        box_bg_iou_thresh=0.40,
        box_batch_size_per_image=512,
        box_positive_fraction=0.20,
        min_size=1000,
        max_size=1800,
    )

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

def get_transform(is_train):
    transforms = [ToTensor()]
    # (Optional) add your train-time aug here if your Compose supports boxes
    return Compose(transforms)

def train():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    run_name = f"{MODEL_NAME}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
    run_output_path = os.path.join(OUTPUT_DIR, run_name)
    os.makedirs(run_output_path, exist_ok=True)

    print(f"Using device: {DEVICE}")

    # Load class names from YAML to determine NUM_CLASSES
    yaml_path = "../Dataset/vessel_wakes.yaml"
    try:
        with open(yaml_path, 'r') as f:
            yaml_config = yaml.safe_load(f)
            class_names = yaml_config['names']
            if isinstance(class_names, dict):
                raw_class_names = [class_names[i] for i in sorted(class_names.keys())]
            else:
                raw_class_names = list(class_names)
            global NUM_CLASSES
            NUM_CLASSES = len(raw_class_names) + 1  # +1 for background
            print(f"Inferred {len(raw_class_names)} custom classes. Setting NUM_CLASSES to {NUM_CLASSES}.")
    except FileNotFoundError:
        print(f"Error: {yaml_path} not found. Please check the path.")
        exit()
    except KeyError:
        print("Error: 'names' key not found in your YAML config. Ensure it defines your class names.")
        exit()
    except Exception as e:
        print(f"An error occurred while loading YAML: {e}")
        exit()

    # Datasets and loaders
    train_dataset = WakeDetectionDataset(
        root_dir=DATASET_ROOT,
        annotation_file=ANNOTATION_FILE,
        split='train',
        transform=get_transform(is_train=True)
    )
    val_dataset = WakeDetectionDataset(
        root_dir=DATASET_ROOT,
        annotation_file=ANNOTATION_FILE,
        split='valid',
        transform=get_transform(is_train=False)
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=os.cpu_count() // 2,
        collate_fn=collate_fn
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=os.cpu_count() // 2,
        collate_fn=collate_fn
    )

    print(f"Train dataset size: {len(train_dataset)}")
    print(f"Validation dataset size: {len(val_dataset)}")

    model = get_model(NUM_CLASSES).to(DEVICE)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    best_val_loss = float('inf')

    print("Starting training...")
    for epoch in range(NUM_EPOCHS):
        model.train()
        total_loss = 0.0
        for i, (images, targets) in enumerate(train_loader):
            images = [img.to(DEVICE) for img in images]
            targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            total_loss += losses.item()

            if (i + 1) % 100 == 0:
                print(f"Epoch: {epoch+1}/{NUM_EPOCHS}, Iter: {i+1}/{len(train_loader)}, Train Loss: {losses.item():.4f}")

        lr_scheduler.step()

        # Validation (compute loss; keep same pattern)
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for i, (images, targets) in enumerate(val_loader):
                images = [img.to(DEVICE) for img in images]
                targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

                model.train()  # temporarily to get loss dict
                loss_dict = model(images, targets)
                model.eval()
                losses = sum(loss for loss in loss_dict.values())
                val_loss += losses.item()

        avg_train_loss = total_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)

        print(f"Epoch {epoch+1} finished.")
        print(f"Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

        # --- Save EXACTLY like your original (just inside the new run folder) ---
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            model_save_path = os.path.join(run_output_path, f"best_model_epoch_{epoch+1}.pth")
            torch.save(model.state_dict(), model_save_path)
            print(f"Saved best model to {model_save_path}")

        if (epoch + 1) % 5 == 0:
            checkpoint_path = os.path.join(run_output_path, f"checkpoint_epoch_{epoch+1}.pth")
            torch.save(model.state_dict(), checkpoint_path)
            print(f"Saved checkpoint to {checkpoint_path}")

    print("Training complete!")
    return run_output_path

if __name__ == "__main__":
    train()


Using device: cuda
Inferred 1 custom classes. Setting NUM_CLASSES to 2.
Train dataset size: 9997
Validation dataset size: 3443
Starting training...
Epoch: 1/50, Iter: 100/2500, Train Loss: 0.0467
Epoch: 1/50, Iter: 200/2500, Train Loss: 0.0593
Epoch: 1/50, Iter: 300/2500, Train Loss: 0.0406
Epoch: 1/50, Iter: 400/2500, Train Loss: 0.0434
Epoch: 1/50, Iter: 500/2500, Train Loss: 0.0291
Epoch: 1/50, Iter: 600/2500, Train Loss: 0.0297
Epoch: 1/50, Iter: 700/2500, Train Loss: 0.0346
Epoch: 1/50, Iter: 800/2500, Train Loss: 0.0365
Epoch: 1/50, Iter: 900/2500, Train Loss: 0.0312
Epoch: 1/50, Iter: 1000/2500, Train Loss: 0.0259
Epoch: 1/50, Iter: 1100/2500, Train Loss: 0.0355
Epoch: 1/50, Iter: 1200/2500, Train Loss: 0.0255
Epoch: 1/50, Iter: 1300/2500, Train Loss: 0.0127
Epoch: 1/50, Iter: 1400/2500, Train Loss: 0.0237
Epoch: 1/50, Iter: 1500/2500, Train Loss: 0.0284
Epoch: 1/50, Iter: 1600/2500, Train Loss: 0.0220
Epoch: 1/50, Iter: 1700/2500, Train Loss: 0.0146
Epoch: 1/50, Iter: 1800/2500

Epoch 6 finished.
Train Loss: 0.0177, Val Loss: 0.0184
Saved best model to runs-fasterrcnn2/faster_rcnn_resnet50_fpn_20250811-005533/best_model_epoch_6.pth
Epoch: 7/50, Iter: 100/2500, Train Loss: 0.0154
Epoch: 7/50, Iter: 200/2500, Train Loss: 0.0167
Epoch: 7/50, Iter: 300/2500, Train Loss: 0.0912
Epoch: 7/50, Iter: 400/2500, Train Loss: 0.0146
Epoch: 7/50, Iter: 500/2500, Train Loss: 0.0302
Epoch: 7/50, Iter: 600/2500, Train Loss: 0.0384
Epoch: 7/50, Iter: 700/2500, Train Loss: 0.0137
Epoch: 7/50, Iter: 800/2500, Train Loss: 0.0144
Epoch: 7/50, Iter: 900/2500, Train Loss: 0.0390
Epoch: 7/50, Iter: 1000/2500, Train Loss: 0.0092
Epoch: 7/50, Iter: 1100/2500, Train Loss: 0.0121
Epoch: 7/50, Iter: 1200/2500, Train Loss: 0.0090
Epoch: 7/50, Iter: 1300/2500, Train Loss: 0.0145
Epoch: 7/50, Iter: 1400/2500, Train Loss: 0.0337
Epoch: 7/50, Iter: 1500/2500, Train Loss: 0.0088
Epoch: 7/50, Iter: 1600/2500, Train Loss: 0.0230
Epoch: 7/50, Iter: 1700/2500, Train Loss: 0.0170
Epoch: 7/50, Iter: 1

In [23]:
!pip install numpy==1.26.4 --force-reinstall

Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.6
    Uninstalling numpy-2.2.6:
      Successfully uninstalled numpy-2.2.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.3 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-1.26.4


In [19]:
# post processing
# Keep only predictions with score > 0.6

# Apply Non-Maximum Suppression (NMS) with IoU > 0.9 to remove boxes that almost
# entirely overlap (it keeps the higher score box)

import numpy as np

def postprocess_predictions(boxes, scores, labels, score_thresh=0.6, iou_thresh=0.9):
    """
    Filters boxes by confidence score, then applies NMS to remove duplicates.

    Args:
        boxes (ndarray): shape [N, 4] -> [xmin, ymin, xmax, ymax]
        scores (ndarray): shape [N]
        labels (ndarray): shape [N]
        score_thresh (float): score cutoff (keep boxes with score > this)
        iou_thresh (float): IoU threshold for removing overlapping boxes

    Returns:
        filtered_boxes, filtered_scores, filtered_labels
    """

    # Track how many boxes we start with
    initial_count = len(scores)

    # --- 1️⃣ Keep only boxes with score > threshold ---
#     keep = scores > score_thresh
#     boxes = boxes[keep]
#     scores = scores[keep]
#     labels = labels[keep]

    removed_low_score = initial_count - len(scores)

#     if len(boxes) == 0:
# #         print(f"[Postprocess] Removed {removed_low_score} low-score boxes, nothing left after filtering.")
#         return boxes, scores, labels, initial_count, removed_low_score, 0  # nothing left to filter 0 = no overlap was removed

    # --- 2️⃣ Sort boxes by score (highest first) ---
    order = scores.argsort()[::-1]
    boxes = boxes[order]
    scores = scores[order]
    labels = labels[order]

    # --- 3️⃣ NMS: Remove duplicates with IoU > iou_thresh ---
    keep_indices = []
    removed_nms = 0  # counter for how many boxes we remove due to overlap

    while len(boxes) > 0:
        # Always keep the top-scoring box
        keep_indices.append(0)

        if len(boxes) == 1:
            break

        # Compute IoU between the kept box and the rest
        ious = compute_iou_batch(boxes[0], boxes[1:])

        # Boxes to keep are those with IoU <= threshold
        remain = np.where(ious <= iou_thresh)[0] + 1  # +1 because of slice offset

        # Count how many overlapping boxes were removed in this round
        removed_nms += (len(ious) - len(remain))

        # Update the list of boxes and scores
        boxes = boxes[remain]
        scores = scores[remain]
        labels = labels[remain]

    # Return only the boxes we kept
    boxes = boxes[keep_indices]
    scores = scores[keep_indices]
    labels = labels[keep_indices]

    total_removed = removed_low_score + removed_nms
#     print(f"[Postprocess] Removed {removed_low_score} low-score boxes and {removed_nms} overlapping boxes "
#           f"(total removed: {total_removed}).")

    return boxes, scores, labels, initial_count, removed_low_score, total_removed


def compute_iou_batch(box, boxes):
    """Vectorized IoU for one box vs. many boxes."""
    xA = np.maximum(box[0], boxes[:, 0])
    yA = np.maximum(box[1], boxes[:, 1])
    xB = np.minimum(box[2], boxes[:, 2])
    yB = np.minimum(box[3], boxes[:, 3])

    inter_w = np.maximum(0, xB - xA)
    inter_h = np.maximum(0, yB - yA)
    inter = inter_w * inter_h

    area1 = (box[2] - box[0]) * (box[3] - box[1])
    area2 = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])

    return inter / (area1 + area2 - inter + 1e-6)


In [35]:
# second post processing
import torch
import torchvision.ops as ops
import numpy as np

def calculate_iou(boxA, boxB):
    # Determine the coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # Compute the area of intersection
    interArea = max(0, xB - xA) * max(0, yB - yA)

    # Compute the area of both the prediction and ground-truth rectangles
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])

    # Compute the intersection over union
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

def postprocess_predictions(boxes, scores, labels, iou_thresh):
    """
    Applies Non-Maximum Suppression (NMS) to the predicted bounding boxes.

    Args:
        boxes (np.array): Predicted bounding box coordinates in (x1, y1, x2, y2) format.
        scores (np.array): Confidence scores for each predicted box.
        labels (np.array): Labels for each predicted box.
        iou_thresh (float): The IoU threshold for NMS. Boxes with IoU above this
                            with a higher-scoring box will be suppressed.

    Returns:
        tuple: (filtered_boxes, filtered_scores, filtered_labels, initial_count, duplicated_removed)
               - filtered_boxes: Bounding boxes after NMS.
               - filtered_scores: Scores after NMS.
               - filtered_labels: Labels after NMS.
               - initial_count: Number of boxes before NMS.
               - duplicated_removed: Number of boxes removed by NMS.
    """
    initial_count = len(boxes)

    if initial_count == 0:
        return boxes, scores, labels, initial_count, 0

    # Convert to torch tensors for torchvision.ops.nms
    # torchvision.ops.nms expects boxes in (x1, y1, x2, y2) format
    boxes_tensor = torch.from_numpy(boxes).float()
    scores_tensor = torch.from_numpy(scores).float()

    # Apply NMS
    # The `nms` function returns the indices of the boxes to keep
    keep_indices = ops.nms(boxes_tensor, scores_tensor, iou_thresh)

    filtered_boxes = boxes[keep_indices.cpu().numpy()]
    filtered_scores = scores[keep_indices.cpu().numpy()]
    filtered_labels = labels[keep_indices.cpu().numpy()]

    duplicated_removed = initial_count - len(filtered_boxes)

    return filtered_boxes, filtered_scores, filtered_labels, initial_count, duplicated_removed


In [37]:
################### (Soft-NMS) ###################
import torch
import torchvision.ops as ops
import numpy as np

# You can keep calculate_iou if you want to manually test IoU,
# but it's not strictly needed for torchvision.ops.soft_nms

def postprocess_predictions_soft_nms(boxes, scores, labels, iou_thresh, sigma=0.5, score_threshold=0.001):
    """
    Applies Soft-Non-Maximum Suppression (Soft-NMS) to the predicted bounding boxes.

    Args:
        boxes (np.array): Predicted bounding box coordinates in (x1, y1, x2, y2) format.
        scores (np.array): Confidence scores for each predicted box.
        labels (np.array): Labels for each predicted box.
        iou_thresh (float): The IoU threshold for Soft-NMS. Overlapping boxes
                            will have their scores reduced based on this.
        sigma (float): The sigma parameter for the Gaussian penalty function.
                       Higher sigma means a "softer" penalty (scores reduce less).
                       Lower sigma means a "harder" penalty, closer to traditional NMS.
                       Common range: 0.1 to 0.7.
        score_threshold (float): A final threshold to filter out boxes whose scores
                                 have been reduced below this value by Soft-NMS.

    Returns:
        tuple: (filtered_boxes, filtered_scores, filtered_labels, initial_count, duplicated_removed)
               - filtered_boxes: Bounding boxes after Soft-NMS.
               - filtered_scores: Scores after Soft-NMS.
               - filtered_labels: Labels after Soft-NMS.
               - initial_count: Number of boxes before NMS.
               - duplicated_removed: Number of boxes effectively removed (score below final threshold).
    """
    initial_count = len(boxes)

    if initial_count == 0:
        return boxes, scores, labels, initial_count, 0

    boxes_tensor = torch.from_numpy(boxes).float()
    scores_tensor = torch.from_numpy(scores).float()
    labels_tensor = torch.from_numpy(labels).long() # Labels are needed for batched_nms, though Soft-NMS on its own doesn't use them

    # torchvision.ops.soft_nms doesn't return indices directly for `keep_indices` in the same way
    # it returns updated scores and then you threshold those scores.
    # Also, soft_nms does NOT have a label parameter, so if you have multiple classes
    # you would typically run NMS/Soft-NMS per class or use batched_nms for NMS.
    # For single class (vessel wake), this is fine.

    # Soft-NMS (assumes single class or you handle per class before this)
    # The `soft_nms` function returns updated scores and indices of original boxes
    # in the sorted order. We need to apply it to unsorted boxes and scores for correct behavior.

    # torchvision.ops.soft_nms works best when applied directly to unsorted predictions
    # and then you filter based on the new scores.
    # However, a common way to use it if you want to still get indices to filter
    # and if you have multiple labels is to combine with batched_nms for batched Soft-NMS logic,
    # or apply it directly and then threshold.

    # Let's use the direct approach with torchvision.ops.soft_nms and then threshold
    # Note: torchvision's soft_nms returns (kept_boxes, kept_scores) where kept_boxes
    # are indices and kept_scores are the modified scores.
    # This requires a bit of re-indexing.

    # For simpler integration, let's stick to the common pattern for object detection
    # where NMS/Soft-NMS are applied based on scores and then labels are filtered.
    # We will sort by score first, which is standard.

    # Sort by scores in descending order
    order = scores_tensor.argsort(descending=True)
    boxes_sorted = boxes_tensor[order]
    scores_sorted = scores_tensor[order]
    labels_sorted = labels_tensor[order]

    # Apply Soft-NMS
    # soft_nms returns scores, and then you apply a final score_threshold
    # It does not directly return indices to keep in the original tensor.
    # It typically returns a tensor of updated scores.
    # Let's re-evaluate torchvision's soft_nms
    # It returns (updated_scores, keep_indices) where keep_indices are relative to the input tensor.

    updated_scores, keep_indices_relative = ops.soft_nms(boxes_sorted, scores_sorted, iou_thresh, sigma)

    # Filter based on the final score_threshold after Soft-NMS
    final_keep_indices_mask = updated_scores >= score_threshold
    
    # Map back to original indices if needed, or just apply to sorted tensors
    final_keep_indices = keep_indices_relative[final_keep_indices_mask]
    
    # Apply these indices to the sorted tensors
    filtered_boxes = boxes_sorted[final_keep_indices].cpu().numpy()
    filtered_scores = updated_scores[final_keep_indices].cpu().numpy()
    filtered_labels = labels_sorted[final_keep_indices].cpu().numpy() # Keep the original labels corresponding to sorted boxes

    duplicated_removed = initial_count - len(filtered_boxes)

    return filtered_boxes, filtered_scores, filtered_labels, initial_count, duplicated_removed

# In your validation loop:
# boxes, scores, labels, initial_count, duplicated_removed = postprocess_predictions_soft_nms(
#     boxes, scores, labels,
#     iou_thresh=0.5, # Adjust based on your findings (maybe 0.3-0.5)
#     sigma=0.5,      # Tune this. Higher = less aggressive, lower = more aggressive
#     score_threshold=0.001 # A very low threshold to keep most boxes unless suppressed significantly
# )

In [25]:
# ---------- NEW: IoS + WBF helpers ----------
import numpy as np

def _area(b):  # b = [x1,y1,x2,y2]
    return max(0.0, b[2]-b[0]) * max(0.0, b[3]-b[1])

def _inter(a,b):
    x1 = max(a[0], b[0]); y1 = max(a[1], b[1])
    x2 = min(a[2], b[2]); y2 = min(a[3], b[3])
    return max(0.0, x2-x1) * max(0.0, y2-y1)

def _ios(a,b):
    I = _inter(a,b)
    return I / (min(_area(a), _area(b)) + 1e-9)

def _ios_suppress_single_class(boxes, scores, ios_thr=0.90):
    """Keep high-score boxes; drop others if IoS>=thr to any kept box."""
    order = np.argsort(scores)[::-1]
    keep = []
    for idx_m, i in enumerate(order):
        keep_it = True
        for j in keep:  # j are indices in original array
            if _ios(boxes[i], boxes[j]) >= ios_thr:
                keep_it = False
                break
        if keep_it:
            keep.append(i)
    keep = np.array(keep, dtype=int)
    return boxes[keep], scores[keep], keep

def _wbf_single_class(boxes, scores, ios_thr=0.90, p=2.0, score_fuse='max', strategy='fuse'):
    """
    Cluster boxes by IoS and either fuse them (weighted mean) or keep the 'best' one.
    boxes: (N,4) np.array [x1,y1,x2,y2]
    scores: (N,) np.array
    """
    n = len(scores)
    if n == 0:
        return np.empty((0,4)), np.empty((0,))
    used = np.zeros(n, dtype=bool)
    order = np.argsort(scores)[::-1]
    fused_boxes, fused_scores = [], []

    for i in order:
        if used[i]:
            continue
        # form a cluster around i
        cluster_idx = [i]
        used[i] = True
        for j in order:
            if used[j]:
                continue
            if _ios(boxes[i], boxes[j]) >= ios_thr:
                cluster_idx.append(j)
                used[j] = True

        b = boxes[cluster_idx]
        s = scores[cluster_idx]

        if strategy == 'best':           # keep highest-score box only
            k = int(np.argmax(s))
            fused = b[k]
            fused_score = float(s[k])

        elif strategy == 'min_area':     # keep smallest area box in cluster
            areas = (b[:,2]-b[:,0]) * (b[:,3]-b[:,1])
            k = int(np.argmin(areas))
            fused = b[k]
            fused_score = float(s[k])

        else:                            # 'fuse' (weighted average of coords)
            w = (s ** p)[:, None]
            fused = (w * b).sum(axis=0) / w.sum()
            fused_score = float(s.max() if score_fuse == 'max' else s.mean())

        fused_boxes.append(fused)
        fused_scores.append(fused_score)

    return np.vstack(fused_boxes), np.array(fused_scores)

def postprocess_ios_only(boxes, scores, labels, ios_thr=0.90):
    """Per-class IoS containment suppression."""
    initial_count = len(boxes)
    if initial_count == 0:
        return boxes, scores, labels, 0, 0

    out_b, out_s, out_l = [], [], []
    for c in np.unique(labels):
        m = labels == c
        b, s = boxes[m], scores[m]
        if len(b) == 0: 
            continue
        b2, s2, keep_idx = _ios_suppress_single_class(b, s, ios_thr=ios_thr)
        out_b.append(b2)
        out_s.append(s2)
        out_l.append(np.full(len(s2), c, dtype=labels.dtype))

    if out_b:
        boxes_f = np.concatenate(out_b, axis=0)
        scores_f = np.concatenate(out_s, axis=0)
        labels_f = np.concatenate(out_l, axis=0)
        order = np.argsort(scores_f)[::-1]
        boxes_f, scores_f, labels_f = boxes_f[order], scores_f[order], labels_f[order]
    else:
        boxes_f, scores_f, labels_f = boxes, scores, labels

    duplicated_removed = initial_count - len(boxes_f)
    return boxes_f, scores_f, labels_f, initial_count, duplicated_removed

def postprocess_ios_wbf(boxes, scores, labels, ios_thr=0.90, p=2.0, score_fuse='max', strategy='fuse'):
    initial_count = len(boxes)
    if initial_count == 0:
        return boxes, scores, labels, 0, 0

    out_b, out_s, out_l = [], [], []
    for c in np.unique(labels):
        m = labels == c
        b, s = boxes[m], scores[m]
        if len(b) == 0:
            continue
        fb, fs = _wbf_single_class(b, s, ios_thr=ios_thr, p=p, score_fuse=score_fuse, strategy=strategy)
        out_b.append(fb); out_s.append(fs); out_l.append(np.full(len(fs), c, dtype=labels.dtype))

    if out_b:
        boxes_f = np.concatenate(out_b, axis=0)
        scores_f = np.concatenate(out_s, axis=0)
        labels_f = np.concatenate(out_l, axis=0)
        order = np.argsort(scores_f)[::-1]
        boxes_f, scores_f, labels_f = boxes_f[order], scores_f[order], labels_f[order]
    else:
        boxes_f, scores_f, labels_f = boxes, scores, labels

    duplicated_removed = initial_count - len(boxes_f)
    return boxes_f, scores_f, labels_f, initial_count, duplicated_removed




In [29]:
# __________________________ draw FN causing by post processing _____________________

from PIL import Image, ImageDraw
import numpy as np
import os

def draw_debug_pil(img_np, gt_boxes, pre_boxes, post_boxes, became_fn, save_path):
    # expect img_np in [0,1] float or uint8
    if img_np.dtype != np.uint8:
        img = Image.fromarray((img_np * 255).clip(0,255).astype(np.uint8))
    else:
        img = Image.fromarray(img_np)
    draw = ImageDraw.Draw(img)

    # GT (red, thin)
    for (x1,y1,x2,y2) in gt_boxes:
        draw.rectangle([x1,y1,x2,y2], outline=(255,0,0), width=2)

    # pre (blue)
    for (x1,y1,x2,y2) in pre_boxes:
        draw.rectangle([x1,y1,x2,y2], outline=(255, 255, 0), width=2)

    # post (green, thicker)
    for (x1,y1,x2,y2) in post_boxes:
        draw.rectangle([x1,y1,x2,y2], outline=(0,255,0), width=3)

    # became-FN GTs (thick red highlight)
    for (x1,y1,x2,y2) in became_fn:
        draw.rectangle([x1,y1,x2,y2], outline=(255,0,0), width=5)

    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    if save_path.lower().endswith(".jpg"):
        save_path = save_path[:-4] + ".png"   # prefer lossless
    img.save(save_path)



In [6]:
!pip install pycocotools

Collecting pycocotools
  Downloading pycocotools-2.0.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Downloading pycocotools-2.0.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (455 kB)
Installing collected packages: pycocotools
Successfully installed pycocotools-2.0.10


In [7]:
!pip install torch



In [8]:
!pip install torchvision



In [9]:
import torch
import torch.nn as nn
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader
from torchvision import transforms as T
import os
import json
import yaml
from collections import defaultdict
import numpy as np
import datetime
from collections import defaultdict

# Pycoc|otools for evaluation
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

# For confusion matrix
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/traitlets/config/application.

AttributeError: _ARRAY_API not found

In [15]:
def get_model_normal(num_classes, model_path):
    model = fasterrcnn_resnet50_fpn(weights=None) # Load without pre-trained weights first
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.to(DEVICE)
    model.eval() # Set model to evaluation mode
    return model

from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
import torch

# optional (for newer torchvision); safe to keep
try:
    from torchvision.models import ResNet50_Weights
    BACKBONE_WEIGHTS = ResNet50_Weights.DEFAULT
except Exception:
    BACKBONE_WEIGHTS = None

def get_model_7anchor(num_classes, ckpt_path=None, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    # ==== MUST MATCH TRAINING ====
    # 5 FPN levels, same count per level
    sizes = ((32,), (64,), (128,), (256,), (512,))
    aspect = (0.2, 0.33, 0.5, 1.0, 2.0, 3.0, 5.0)
    aspect_ratios = (aspect, aspect, aspect, aspect, aspect)

    anchor_generator = AnchorGenerator(sizes=sizes, aspect_ratios=aspect_ratios)

    model = fasterrcnn_resnet50_fpn(
        weights=None,                       # do NOT load COCO head (shapes won't match)
        weights_backbone=BACKBONE_WEIGHTS,  # OK to use ImageNet backbone
        rpn_anchor_generator=anchor_generator,
        rpn_nms_thresh=0.5,
        box_score_thresh=0.35,
        box_nms_thresh=0.30,
        box_detections_per_img=75,
        box_fg_iou_thresh=0.60,
        box_bg_iou_thresh=0.40,
        box_batch_size_per_image=512,
        box_positive_fraction=0.20,
        min_size=1000,
        max_size=1800,
    )

    # class head must match your dataset size
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    if ckpt_path is not None:
        state = torch.load(ckpt_path, map_location='cpu')
        # strict=True should work if anchors/classes match; set False if you want leniency
        missing, unexpected = model.load_state_dict(state, strict=False)
        print("Loaded checkpoint. Missing keys:", len(missing), "Unexpected keys:", len(unexpected))

    model.to(device).eval()
    return model


def get_transform():
    return Compose([
        ToTensor(),
        # No augmentation for evaluation
    ])

def create_coco_annotations(dataset, output_file_path, class_names_list):
    """
    Creates a COCO format annotation file from the WakeDetectionDataset.
    This is needed for pycocotools.
    """
    coco_format = {
        "info": {
            "description": "Wake Detection Dataset - Faster R-CNN Evaluation",
            "version": "1.0",
            "year": datetime.datetime.now().year, # Current year
            "contributor": "Your Name/Organization",
            "date_created": datetime.datetime.now().strftime("%Y/%m/%d")
        },
        "licenses": [
            {
                "id": 1,
                "name": "Unknown License",
                "url": ""
            }
        ],
        "images": [],
        "annotations": [],
        "categories": []
    }

    # Categories (start from 1, class 0 is background)
    for i, class_name in enumerate(class_names_list[1:]): # Skip '__background__' at index 0
        coco_format['categories'].append({
            "id": i + 1, # COCO category IDs start from 1
            "name": class_name,
            "supercategory": "none"
        })

    ann_id_counter = 0
    # Collect all image data first from the dataset's internal structure
    image_data_map = dataset.img_data # This is a dict of image_id -> annotation_dict

    # Populate images and annotations
    for img_id in dataset.image_ids:
        data = image_data_map[img_id]
        image_info = {
            "id": img_id,
            "file_name": data['file_name'],
            "width": data['width'],
            "height": data['height']
        }
        coco_format['images'].append(image_info)

        for ann_item in data['annotations']:
            xmin, ymin, xmax, ymax = ann_item['bbox']
            width = xmax - xmin
            height = ymax - ymin
            bbox_coco = [xmin, ymin, width, height] # COCO uses [xmin, ymin, width, height]

            annotation_info = {
                "id": ann_id_counter,
                "image_id": img_id,
                "category_id": ann_item['category_id'],
                "bbox": bbox_coco,
                "area": ann_item['area'],
                "iscrowd": ann_item['iscrowd']
            }
            coco_format['annotations'].append(annotation_info)
            ann_id_counter += 1

    with open(output_file_path, 'w') as f:
        json.dump(coco_format, f, indent=4)
    print(f"COCO format annotations saved to {output_file_path}")
    return coco_format

In [20]:
# --- Configuration (Match with Training) ---
DATASET_ROOT = "../Dataset"
ANNOTATION_FILE = "../Dataset/faster_rcnn_annotations.json"
NUM_CLASSES = 2 # Placeholder, will be determined from YAML
BATCH_SIZE = 4 # Can be higher for evaluation if memory allows
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# --- Path to Trained Model ---
# first train
# TRAINED_MODEL_PATH = "runs-fasterrcnn/faster_rcnn_resnet50_fpn_20250707-145842/best_model_epoch_7.pth"
# second train
anchor_model = True
TRAINED_MODEL_PATH = "runs-fasterrcnn2/faster_rcnn_resnet50_fpn_20250811-005533/best_model_epoch_20.pth"
EVAL_OUTPUT_DIR = os.path.dirname(TRAINED_MODEL_PATH) # Save eval results in the same run directory

# --- Post-processing mode ---
POSTPROC_MODE = "none"      # options: "none", "ios", "ios_wbf"
IOS_THR = 0.92
WBF_P = 3.0
WBF_SCORE_FUSE = "max"     # "max" or "mean"
WBF_STRATEGY = "fused"   # options: "fuse", "best", "min_area"
SCORE_THRESH = 0.5
MATCH_IOU_THR = 0.5

"""
strategy='fuse' → green is a weighted average of the clustered yellows.

strategy='best' → green is exactly one of the yellows (the top-score in the cluster)."""


SAVE_FN_DEBUG = True # save the images that became FN after post processing
fn_debug_dir = os.path.join(EVAL_OUTPUT_DIR, "debug_fn_due_to_postproc")
if SAVE_FN_DEBUG:
    os.makedirs(fn_debug_dir, exist_ok=True)


In [21]:
def evaluate():
    os.makedirs(EVAL_OUTPUT_DIR, exist_ok=True)

    # Load class names from your YAML file to determine NUM_CLASSES
    yaml_path = "../Dataset/vessel_wakes.yaml"
    try:
        with open(yaml_path, 'r') as f:
            yaml_config = yaml.safe_load(f)
            class_names_dict = yaml_config['names']
            raw_class_names = [class_names_dict[i] for i in sorted(class_names_dict.keys())]
            global NUM_CLASSES
            NUM_CLASSES = len(raw_class_names) + 1 # +1 for background
            fasterrcnn_class_names = ['__background__'] + raw_class_names # For indexing
            print(f"Inferred {len(raw_class_names)} custom classes. Setting NUM_CLASSES for Faster R-CNN to {NUM_CLASSES} (including background).")
    except FileNotFoundError:
        print(f"Error: {yaml_path} not found. Please check the path.")
        exit()
    except KeyError:
        print("Error: 'names' key not found in your YAML config. Ensure it defines your class names.")
        exit()
    except Exception as e:
        print(f"An error occurred while loading YAML: {e}")
        exit()

    # Create validation dataset and dataloader
    val_dataset = WakeDetectionDataset(
        root_dir=DATASET_ROOT,
        annotation_file=ANNOTATION_FILE,
        split='valid', # **************************************************** metrics for validation
        transform=get_transform()
    )
    
    # keep a quick handle to the dataset file map so we can load images (for ploting FN)
    img_data_map = val_dataset.img_data  # img_id -> {'file_name','width','height',...}

    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=os.cpu_count() // 2,
        collate_fn=collate_fn
    )
    print(f"Validation dataset size: {len(val_dataset)}")

    # Create COCO ground truth file
    gt_coco_file = os.path.join(EVAL_OUTPUT_DIR, "val_gt_coco.json")
    # Need to pass fasterrcnn_class_names (which includes background)
    gt_coco_data = create_coco_annotations(val_dataset, gt_coco_file, fasterrcnn_class_names)

    # Load model
    print(f"Loading model from {TRAINED_MODEL_PATH}...")
    if anchor_model:
        model = get_model_7anchor(NUM_CLASSES, TRAINED_MODEL_PATH)
    else:
        model = get_model(NUM_CLASSES, TRAINED_MODEL_PATH)
    print("Model loaded.")

    # --- Generate Predictions ---
    predictions = []
    all_gt_labels = [] # List of actual class IDs (1-indexed) for each detected object
    all_pred_labels = [] # (for confusion matrix): List of predicted class IDs
    all_pred_scores = [] # (for confusion matrix thresholding)
    
    total_removed_low_score = 0
    total_duplicated_removed = 0
    total_initial_count = 0
    gt_boxes_by_image = defaultdict(list)
    pred_boxes_by_image = defaultdict(list)
    with torch.no_grad():
        for i, (images, targets) in enumerate(val_loader):
            images = list(image.to(DEVICE) for image in images)
            outputs = model(images)

            for img_idx, output in enumerate(outputs):
                img_id = targets[img_idx]['image_id'].item()
                gt_boxes = targets[img_idx]['boxes'].cpu().detach().numpy() # for CM loop
                boxes = output['boxes'].cpu().numpy()
                labels = output['labels'].cpu().numpy()
                scores = output['scores'].cpu().numpy()
                
                # --- keep a copy BEFORE post-processing ---
                pre_boxes_all   = boxes.copy()
                pre_scores_all  = scores.copy()
                pre_labels_all  = labels.copy()
                # image for plotting
                img_np = images[img_idx].detach().cpu().permute(1,2,0).numpy()
                img_np = img_np.clip(0,1)


                
                # Apply post processing
                # initial_count, removed_low_score, duplicated_removed
#                 print("\n boxes", boxes)
#                 boxes, scores, labels, initial_count, duplicated_removed = postprocess_predictions(
#                     boxes, scores, labels,
# #                     score_thresh=0.6,   # only keep >0.6
#                     iou_thresh=0.40      # remove duplicates if IoU > 0.9
#                 )
#                 boxes, scores, labels, initial_count, duplicated_removed = postprocess_predictions_soft_nms(
#                     boxes, scores, labels,
#                     iou_thresh=0.5, # Adjust based on your findings (maybe 0.3-0.5)
#                     sigma=0.5,      # Tune this. Higher = less aggressive, lower = more aggressive
#                     score_threshold=0.001 # A very low threshold to keep most boxes unless suppressed significantly
#                 )

                # === NEW: choose post-processing ===
                if POSTPROC_MODE == "ios":
                    boxes, scores, labels, initial_count, duplicated_removed = postprocess_ios_only(
                        boxes, scores, labels, ios_thr=IOS_THR
                    )
                elif POSTPROC_MODE == "ios_wbf":
                     boxes, scores, labels, initial_count, duplicated_removed = postprocess_ios_wbf(
                            boxes, scores, labels,
                            ios_thr=IOS_THR, p=WBF_P, score_fuse=WBF_SCORE_FUSE, strategy=WBF_STRATEGY
                        )
                else:
                    # no extra postproc (keep your original NMS/Soft-NMS if you like)
                    initial_count = len(boxes)
                    duplicated_removed = 0
                    
#                 total_removed_low_score += removed_low_score
                total_duplicated_removed += duplicated_removed
                total_initial_count += initial_count
        
                # for ploting FN after post processing
                # Filter by score for matching + debug
                pre_keep = pre_scores_all >= SCORE_THRESH
                pre_boxes_kept  = pre_boxes_all[pre_keep]
                pre_scores_kept = pre_scores_all[pre_keep]
                pre_labels_kept = pre_labels_all[pre_keep]

                post_keep = scores >= SCORE_THRESH
                post_boxes_kept  = boxes[post_keep]
                post_scores_kept = scores[post_keep]
                post_labels_kept = labels[post_keep]

                def _iou(a,b):
                    x1 = max(a[0], b[0]); y1 = max(a[1], b[1])
                    x2 = min(a[2], b[2]); y2 = min(a[3], b[3])
                    inter = max(0, x2-x1) * max(0, y2-y1)
                    Aa = max(0, a[2]-a[0]) * max(0, a[3]-a[1])
                    Ab = max(0, b[2]-b[0]) * max(0, b[3]-b[1])
                    den = Aa + Ab - inter + 1e-9
                    return inter/den

                if SAVE_FN_DEBUG:
                    # Find GTs matched before but not after
                    gt_bxs = gt_boxes  # already numpy from your code
                    became_fn = []
                    for g in gt_bxs:
                        matched_pre  = any(_iou(g, p) >= MATCH_IOU_THR for p in pre_boxes_kept)
                        matched_post = any(_iou(g, p) >= MATCH_IOU_THR for p in post_boxes_kept)
                        if matched_pre and not matched_post:
                            became_fn.append(g)

                    if len(became_fn):
                        # Draw with emphasis on became-FN GTs
                        # We’ll reuse drawer, then add thick red overlays for became-FN
                        save_path = os.path.join(fn_debug_dir, f"{img_id}_became_FN.jpg")
                        draw_debug_pil(img_np, gt_boxes, pre_boxes_kept, post_boxes_kept,
                           became_fn, os.path.join(fn_debug_dir, f"{img_id}_became_FN.png"))
                        # Thicken the specific became-FN boxes (optional second pass)
                        # quick overlay: draw again only those boxes
                        fig, ax = plt.subplots(figsize=(12,6))
                        ax.imshow(img_np)
                        for (x1,y1,x2,y2) in became_fn:
                            ax.add_patch(plt.Rectangle((x1,y1), x2-x1, y2-y1, fill=False, lw=3.0, edgecolor='r'))
                        ax.axis('off')
                        fig.savefig(os.path.join(fn_debug_dir, f"{img_id}_became_FN_only.jpg"), bbox_inches='tight', dpi=150)
                        plt.close(fig)

                # Collect GT labels for confusion matrix
                gt_labels_for_image = targets[img_idx]['labels'].cpu().numpy()
                all_gt_labels.extend(gt_labels_for_image) # Add all ground truth labels
                gt_boxes_by_image[img_id].extend(gt_boxes) # for CM loop

                # Prepare predictions for COCO evaluation
                for box, label, score in zip(boxes, labels, scores):
                    xmin, ymin, xmax, ymax = box
                    # COCO format requires [xmin, ymin, width, height]
                    bbox_coco = [float(xmin), float(ymin), float(xmax - xmin), float(ymax - ymin)]
                    predictions.append({
                        "image_id": img_id,
                        "category_id": int(label), # Use the 1-indexed label
                        "bbox": bbox_coco,
                        "score": float(score)
                    })
                    if score >= SCORE_THRESH:
                        pred_boxes_by_image[img_id].append(box)

                all_pred_labels.extend(labels)
                all_pred_scores.extend(scores)


    print(f"\n--- Postprocess Summary ---")
    print("\n total_initial_count = ", total_initial_count)
    print("\n total_removed_low_score = ", total_removed_low_score)
    print("\n total_duplicated_removed = ", total_duplicated_removed)


    pred_coco_file = os.path.join(EVAL_OUTPUT_DIR, "predictions_coco.json")
    with open(pred_coco_file, 'w') as f:
        json.dump(predictions, f, indent=4)
    print(f"Predictions saved to {pred_coco_file}")

    # --- COCO Evaluation ---
    print("\n--- Running COCO Evaluation ---")
    cocoGt = COCO(gt_coco_file)
    cocoDt = cocoGt.loadRes(pred_coco_file)

    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()

    # --- Confusion Matrix Generation (Simplified) --- 
    # This is a simplified confusion matrix for object detection.
    # A full object detection confusion matrix involves matching predicted boxes to ground truth
    # boxes based on IoU. For simplicity, we'll generate one based on all predicted labels
    # vs. all ground truth labels, assuming a reasonable confidence threshold.
    # This will NOT be exactly like YOLO's, which does box matching.

    print("\n--- Generating Confusion Matrix (Simplified) ---")
    # Filter predictions by a confidence threshold
    pred_labels_filtered = [
        label for label, score in zip(all_pred_labels, all_pred_scores)
        if score >= SCORE_THRESH # Confidence threshold for CM.
    ]

    # Total actual wakes (sum of GT labels == 1)
    num_gt_wakes = sum(1 for label in all_gt_labels if label == 1)
    
    # Total predicted wakes (sum of pred labels == 1 above threshold)
    num_pred_wakes = sum(1 for label in pred_labels_filtered if label == 1)

    print(f"Total Ground Truth Objects (Wake): {num_gt_wakes}")
    print(f"Total Predicted Objects (Wake, score >= 0.5): {num_pred_wakes}")
    print("\nNote: A detailed object detection confusion matrix (like YOLO's) requires IoU matching, which is not directly implemented here. Use COCOeval metrics for primary evaluation.")

    # IoU function
    def compute_iou(boxA, boxB):
        xA = max(boxA[0], boxB[0])
        yA = max(boxA[1], boxB[1])
        xB = min(boxA[2], boxB[2])
        yB = min(boxA[3], boxB[3])
        interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
        boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
        boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
        return interArea / float(boxAArea + boxBArea - interArea)

    # Matching-based evaluation
    TP = 0
    FP = 0
    FN = 0
    iou_threshold = 0.5

    for img_id in gt_boxes_by_image:
        gt = gt_boxes_by_image[img_id]
        pred = pred_boxes_by_image.get(img_id, [])

        matched_gt = set()
        matched_pred = set()

        for i, pbox in enumerate(pred):
            for j, gbox in enumerate(gt):
                if j in matched_gt:
                    continue
                if compute_iou(pbox, gbox) >= iou_threshold:
                    TP += 1
                    matched_pred.add(i)
                    matched_gt.add(j)
                    break

        FP += len(pred) - len(matched_pred)
        FN += len(gt) - len(matched_gt)

    # Calculate metrics
    precision = TP / (TP + FP) if (TP + FP) else 0
    recall = TP / (TP + FN) if (TP + FN) else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) else 0

    
    print(f"\n--- Matching-based Metrics (IoU \u2265 {iou_threshold}) ---")
    print(f"TP: {TP}, FP: {FP}, FN: {FN}")
    print(f"Precision: {precision:.3f}, Recall: {recall:.3f}, F1 Score: {f1:.3f}")
    

# ============================
    # The `cocoEval.summarize()` output provides the most important numbers.

    # Save evaluation results
    with open(os.path.join(EVAL_OUTPUT_DIR, "evaluation_summary.txt"), 'w') as f:
        f.write("COCO Evaluation Summary:\n")
        # Redirect stdout of cocoEval.summarize() to file
        import sys
        old_stdout = sys.stdout
        sys.stdout = f
        cocoEval.summarize()
        sys.stdout = old_stdout # Restore stdout
    print(f"Evaluation summary saved to {os.path.join(EVAL_OUTPUT_DIR, 'evaluation_summary.txt')}")

if __name__ == "__main__":
    # Ensure you update TRAINED_MODEL_PATH before running!
    if TRAINED_MODEL_PATH == "PATH_TO_YOUR_BEST_MODEL.pth":
        print("WARNING: TRAINED_MODEL_PATH not updated. Please set it to your actual trained model path.")
        # As a fallback, try to find the latest run and best model if you ran train()
        # This is a hack, a better way is to pass the path from train script.
        try:
            latest_run = sorted([d for d in os.listdir("runs-fasterrcnn") if os.path.isdir(os.path.join("runs-fasterrcnn", d))], reverse=True)[0]
            latest_run_path = os.path.join("runs-fasterrcnn", latest_run)
            best_model_file = [f for f in os.listdir(latest_run_path) if f.startswith("best_model") and f.endswith(".pth")]
            if best_model_file:
                TRAINED_MODEL_PATH = os.path.join(latest_run_path, best_model_file[0])
                global EVAL_OUTPUT_DIR
                EVAL_OUTPUT_DIR = latest_run_path
                print(f"Attempting to use latest best model: {TRAINED_MODEL_PATH}")
            else:
                print("Could not find a 'best_model' in the latest run directory. Exiting.")
                exit()
        except Exception as e:
            print(f"Error finding latest model: {e}. Please manually set TRAINED_MODEL_PATH.")
            exit()
            
    evaluate()

Inferred 1 custom classes. Setting NUM_CLASSES for Faster R-CNN to 2 (including background).
Validation dataset size: 3443
COCO format annotations saved to runs-fasterrcnn2/faster_rcnn_resnet50_fpn_20250811-005533/val_gt_coco.json
Loading model from runs-fasterrcnn2/faster_rcnn_resnet50_fpn_20250811-005533/best_model_epoch_20.pth...
Loaded checkpoint. Missing keys: 0 Unexpected keys: 0
Model loaded.

--- Postprocess Summary ---

 total_initial_count =  4107

 total_removed_low_score =  0

 total_duplicated_removed =  0
Predictions saved to runs-fasterrcnn2/faster_rcnn_resnet50_fpn_20250811-005533/predictions_coco.json

--- Running COCO Evaluation ---
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.02s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.96s).
Accumulating evaluation results...
DONE (t=0.16s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | ar

In [37]:
!pip install --upgrade --force-reinstall --no-cache-dir scipy

Collecting scipy
  Downloading scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting numpy<2.5,>=1.23.5 (from scipy)
  Downloading numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Downloading scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.7/37.7 MB[0m [31m214.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m251.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, scipy
[2K  Attempting uninstall: numpy
[2K    Found existing installation: numpy 2.2.6
[2K    Uninstalling numpy-2.2.6:
[2K      Successfully uninstalled numpy-2.2.6
[2K  Attempting uninstall: scipy━━━━━━━━━━━━━━━━━━━[0m [32m0/2[0m [numpy]
[2K    Fo

In [38]:
!pip install --upgrade --force-reinstall --no-cache-dir scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.7.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting numpy>=1.22.0 (from scikit-learn)
  Downloading numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Downloading scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.7.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m154.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading joblib-1.5.1-py3-none-any.whl (307 kB)
Downloading numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_

In [14]:
!pip install --upgrade --force-reinstall numpy scipy scikit-learn

Collecting numpy
  Using cached numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting scipy
  Downloading scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
Downloading scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.7/37.7 MB[0m [31m140.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading scikit_learn-1.7.1-cp310-cp310-manylinux2014_x86_64.many

In [39]:
import numpy, scipy
print(numpy.__version__)
print(scipy.__version__)

1.26.4
1.15.2
