# Task 3: Object Recognition in Surgical Video
## download dataset
Obtain a publicly available surgical video (e.g., from YouTube or any open-source dataset). For brevity, focus on a small number of frames.

In [19]:
# !pip install pytube pytubefix opencv-python

from pytubefix import YouTube
from pytubefix.cli import on_progress
import cv2
import os

video_url = 'https://www.youtube.com/watch?v=a4dBgu3a968'
download_path = r'C:\Users\giles\Github\vesselFM\surgical_video'
video_filename = 'Making_an_Incision.mp4'

#check if video already exists
video_file_path = os.path.join(download_path, video_filename)
if os.path.exists(video_file_path):
    print(f"Video already exists at {video_file_path}. Skipping download.")
else:
    print(f"Attempting to download video from: {video_url}")
    try:
        stream = YouTube(video_url, on_progress_callback=on_progress).streams.get_highest_resolution()
        if stream:
            if not os.path.exists(download_path):
                os.makedirs(download_path)
                print(f"Created directory: {download_path}")

            video_file_path = os.path.join(download_path, video_filename)
            print(f"Downloading to: {video_file_path}")
            stream.download(output_path=download_path, filename=video_filename)
            print(f"Video downloaded successfully to {video_file_path}")
        else:
            print("No suitable MP4 stream found for download.")
            video_file_path = None
    except Exception as e:
        print(f"Error downloading video: {e}")
        video_file_path = None

# ----------------------- 2. Extract frames from the video
output_frames_dir = r'C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames'

#check if the frames in the output directory already exist
if os.path.exists(output_frames_dir):
    print(f"Output frames directory already exists at {output_frames_dir}. Skipping frame extraction.")
else:
    if video_file_path and os.path.exists(video_file_path):
        if not os.path.exists(output_frames_dir):
            os.makedirs(output_frames_dir)
            print(f"Created directory: {output_frames_dir}")

        print(f"Extracting frames from {video_file_path} to {output_frames_dir}")
        cap = cv2.VideoCapture(video_file_path)

        frame_count = 0
        saved_frame_count = 0

        # Limit the number of frames to extract for brevity
        # For example, save 1 frame every 30 frames, up to a maximum of 50 saved frames.
        # max_frames_to_save = 50
        frames_interval = 15 # Save one frame every 'frames_interval' frames

        while cap.isOpened(): # and saved_frame_count < max_frames_to_save:
            ret, frame = cap.read()
            if not ret:
                print("Reached end of video or error reading frame.")
                break

            if frame_count % frames_interval == 0:
                frame_filename = os.path.join(output_frames_dir, f"frame_{saved_frame_count:04d}.png")
                cv2.imwrite(frame_filename, frame)
                # print(f"Saved {frame_filename}")
                saved_frame_count += 1

            frame_count += 1
            if frame_count % 100 == 0: # Print progress periodically
                print(f"Processed {frame_count} frames, saved {saved_frame_count} frames...")

        cap.release()
        print(f"Finished extracting frames. Total {saved_frame_count} frames saved in '{output_frames_dir}'.")
    else:
        if not video_file_path:
            print("Video download failed or was skipped. Frame extraction cannot proceed.")
        elif not os.path.exists(video_file_path):
            print(f"Video file not found at {video_file_path}. Frame extraction skipped.")

Video already exists at C:\Users\giles\Github\vesselFM\surgical_video\Making_an_Incision.mp4. Skipping download.
Output frames directory already exists at C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames. Skipping frame extraction.


## Next Steps: Object Annotation

Now that the frames are extracted, the next step is to perform object annotation on these frames to identify surgical instruments like scalpels for the training and validation of the model.


In [20]:
# Blade action frames:

# # side view - diffrent holds - single frame
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0140.png
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0148.png
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0156.png

# along left to right
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0167.png
# to
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0178.png

# blade on view - thin
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0194.png
# to
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0206.png

# horizontal view
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0235.png
# to
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0255.png

#tip down close
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0337.png
# to
# C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0347.png

# Lets use the first single hold frames, and a few from each action shot (say 5 frames each)

# Create list of frames to use

import os

output_frames_dir = r'C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames'

selected_frames_paths = []

# Helper function to generate frame paths
def get_frame_path(frame_number):
    return os.path.join(output_frames_dir, f"frame_{frame_number:04d}.png")

# Single frames
single_frames = [140, 148, 156]
for frame_num in single_frames:
    selected_frames_paths.append(get_frame_path(frame_num))

# Frame ranges (select 5 frames from each range, evenly spaced if possible, or just the first 5)
# For simplicity, we'll take the first 5 frames from each specified sequence.
# If the sequence is shorter than 5, we take all available.

# Range 1: along left to right (0167 to 0178)
start_frame = 167
end_frame = 178
count = 0
for i in range(start_frame, end_frame + 1):
    if count < 5:
        selected_frames_paths.append(get_frame_path(i))
        count += 1
    else:
        break

# Range 2: blade on view - thin (0194 to 0206)
start_frame = 194
end_frame = 206
count = 0
for i in range(start_frame, end_frame + 1):
    if count < 5:
        selected_frames_paths.append(get_frame_path(i))
        count += 1
    else:
        break

# Range 3: horizontal view (0235 to 0255)
start_frame = 235
end_frame = 255
count = 0
for i in range(start_frame, end_frame + 1):
    if count < 5:
        selected_frames_paths.append(get_frame_path(i))
        count += 1
    else:
        break
        
# Range 4: tip down close (0337 to 0347)
start_frame = 337
end_frame = 347
count = 0
for i in range(start_frame, end_frame + 1):
    if count < 5:
        selected_frames_paths.append(get_frame_path(i))
        count += 1
    else:
        break

# Remove duplicates if any and print the list
selected_frames_paths = sorted(list(set(selected_frames_paths)))
print(f"Selected {len(selected_frames_paths)} frame paths:")
for path in selected_frames_paths:
    print(path)

# You can verify if these files exist
for path in selected_frames_paths:
    if not os.path.exists(path):
        print(f"MISSING: {path}")

Selected 23 frame paths:
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0140.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0148.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0156.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0167.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0168.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0169.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0170.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0171.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0194.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0195.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0196.png
C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0197.png
C:\Users\giles\Github\vesselFM\surgical

## Data annotation - interactive tool using matplotlib

In [None]:
# Ensure you have an interactive matplotlib backend for the best experience.
%matplotlib tk

In [22]:
import cv2
import matplotlib.pyplot as plt
from matplotlib.widgets import RectangleSelector, Button
import matplotlib.patches as patches
import os
import json
import shutil

# --- Configuration ---
# Ensure 'selected_frames_paths' is available from a previous cell.
# Example: selected_frames_paths = ['path/to/image1.png', 'path/to/image2.png']

annotated_base_dir = r'C:\Users\giles\Github\vesselFM\surgical_video\annotated'
annotated_images_dir = os.path.join(annotated_base_dir, 'images')
annotations_file_path = os.path.join(annotated_base_dir, 'annotations.json')
class_name = "scalpel"

#check if annotations file already exists
annotations_file_path = r'C:\Users\giles\Github\vesselFM\surgical_video\annotated\annotations.json'
if os.path.exists(annotations_file_path):
    print(f"Annotations file already exists at {annotations_file_path}. Skipping creation.")
else:
    # --- Global State for Annotation UI ---
    all_annotations_data = {} # Stores final annotations: {filename: [{"class": ..., "bbox_xyxy": ...}]}
    current_image_index = 0
    # _temp_bbox_coords_for_current_image will store the [xmin, ymin, xmax, ymax] for the image currently being annotated.
    # It's updated by RectangleSelector or loaded from all_annotations_data when an image is displayed.
    _temp_bbox_coords_for_current_image = None

    fig, ax = None
    rs = None # RectangleSelector instance
    current_image_display = None # Matplotlib image artist
    current_bbox_display_patch = None # Patch to show existing/drawn bbox

    prev_button = None
    next_button = None

    def get_image_data_for_ui(index):
        if not selected_frames_paths or index < 0 or index >= len(selected_frames_paths):
            return None, None, None
        image_path = selected_frames_paths[index]
        image_filename = os.path.basename(image_path)
        img_bgr = cv2.imread(image_path)
        if img_bgr is None:
            print(f"Warning: Could not read image {image_path}")
            return None, image_filename, image_path # Return filename and path for error messages
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        return img_rgb, image_filename, image_path

    def onselect_callback(eclick, erelease):
        global _temp_bbox_coords_for_current_image, current_bbox_display_patch, ax
        x1, y1 = int(eclick.xdata), int(eclick.ydata)
        x2, y2 = int(erelease.xdata), int(erelease.ydata)
        # Store the newly drawn coordinates
        _temp_bbox_coords_for_current_image = [min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)]
        print(f"  Box drawn/updated via selector: {_temp_bbox_coords_for_current_image}")

        # If a static patch was displaying an old annotation, remove it.
        # The RectangleSelector itself will display the new selection.
        if current_bbox_display_patch:
            current_bbox_display_patch.remove()
            current_bbox_display_patch = None
        fig.canvas.draw_idle()

    def update_display():
        global current_image_index, ax, rs, current_image_display, _temp_bbox_coords_for_current_image
        global prev_button, next_button, selected_frames_paths, current_bbox_display_patch

        img_rgb, image_filename, _ = get_image_data_for_ui(current_image_index)

        if img_rgb is None:
            ax.set_title(f"Error loading image: {image_filename}")
            if current_image_display: current_image_display.set_data(numpy.zeros((100,100,3), dtype=numpy.uint8)) # Blank image
            fig.canvas.draw_idle()
            return

        # Reset temp bbox for the new image; it will be populated if there's saved data or by new drawing
        _temp_bbox_coords_for_current_image = None

        ax.clear() # Clear axis for new image and selector
        current_image_display = ax.imshow(img_rgb)
        ax.set_title(f"Annotate: {image_filename} ({current_image_index + 1}/{len(selected_frames_paths)})")

        current_bbox_display_patch = None # Clear old patch reference

        # Check if there's an existing annotation for this image
        if image_filename in all_annotations_data and all_annotations_data[image_filename]:
            # Load existing annotation into temp storage and display it
            bbox = all_annotations_data[image_filename][0]['bbox_xyxy'] # Assuming one annotation per image
            _temp_bbox_coords_for_current_image = bbox
            rect_patch = patches.Rectangle((bbox[0], bbox[1]), bbox[2]-bbox[0], bbox[3]-bbox[1],
                                        linewidth=2, edgecolor='lime', facecolor='none', label='Saved Annotation')
            current_bbox_display_patch = ax.add_patch(rect_patch)

        # Re-initialize RectangleSelector for the current image
        # Important: pass a new onselect_callback instance or ensure it uses the correct global context
        rs = RectangleSelector(ax, lambda eclick, erelease: onselect_callback(eclick, erelease),
                            useblit=True, button=[1], minspanx=5, minspany=5,
                            spancoords='pixels', interactive=True)

        # Update button states
        if prev_button: prev_button.ax.set_visible(current_image_index > 0)
        if next_button:
            next_button.ax.set_visible(True)
            if current_image_index == len(selected_frames_paths) - 1:
                next_button.label.set_text("Finish & Save")
            else:
                next_button.label.set_text("Next")

        fig.canvas.draw_idle()

    def save_current_annotation():
        global _temp_bbox_coords_for_current_image, all_annotations_data, current_image_index

        _, image_filename, image_path = get_image_data_for_ui(current_image_index)
        if not image_filename: return # Error getting image data

        if _temp_bbox_coords_for_current_image:
            all_annotations_data[image_filename] = [{
                "class": class_name,
                "bbox_xyxy": _temp_bbox_coords_for_current_image
            }]
            # Copy image to annotated directory
            destination_image_path = os.path.join(annotated_images_dir, image_filename)
            if not os.path.exists(destination_image_path): # Copy only if not already there
                try:
                    shutil.copy(image_path, destination_image_path)
                    print(f"  Image {image_filename} copied to {annotated_images_dir}")
                except Exception as e:
                    print(f"  Error copying image {image_filename}: {e}")
            print(f"  Annotation for {image_filename} saved/updated: {_temp_bbox_coords_for_current_image}")
        else:
            # If there was an annotation and user cleared it (e.g. by not drawing one after it was loaded)
            # and _temp_bbox_coords_for_current_image is None, we might want to remove it from all_annotations_data
            if image_filename in all_annotations_data:
                del all_annotations_data[image_filename]
                print(f"  Annotation for {image_filename} cleared.")


    def next_button_on_clicked(event):
        global current_image_index, selected_frames_paths
        save_current_annotation() # Save annotation for the image we are leaving

        current_image_index += 1
        if current_image_index >= len(selected_frames_paths):
            save_all_annotations_to_file()
            print("Annotation finished. All data saved.")
            plt.close(fig)
            return
        update_display()

    def prev_button_on_clicked(event):
        global current_image_index
        save_current_annotation() # Save annotation for the image we are leaving

        current_image_index -= 1
        # Bounds check already handled by button visibility, but good practice
        if current_image_index < 0:
            current_image_index = 0
        update_display()

    def save_all_annotations_to_file():
        global all_annotations_data, annotations_file_path
        try:
            with open(annotations_file_path, 'w') as f:
                json.dump(all_annotations_data, f, indent=4)
            print(f"\nAll annotations successfully saved to: {annotations_file_path}")
        except Exception as e:
            print(f"\nError saving annotations to JSON file: {e}")

    # --- Main Execution ---
    if 'selected_frames_paths' not in globals() or not selected_frames_paths:
        print("Error: 'selected_frames_paths' is not defined or is empty. Please run the previous cell to generate it.")
    else:
        os.makedirs(annotated_images_dir, exist_ok=True)
        print(f"Annotated images will be saved in: {annotated_images_dir}")
        print(f"Annotations JSON will be saved to: {annotations_file_path}")
        print(f"Starting annotation for {len(selected_frames_paths)} images...")
        print("Draw a box for the scalpel. Use Next/Previous buttons. Close window if you want to stop early (unsaved changes on current image might be lost).")

        fig, ax = plt.subplots(1, figsize=(12, 9))
        plt.subplots_adjust(bottom=0.15) # Make space for buttons

        ax_prev = plt.axes([0.7, 0.03, 0.1, 0.055]) # [left, bottom, width, height]
        prev_button = Button(ax_prev, 'Previous')
        prev_button.on_clicked(prev_button_on_clicked)

        ax_next = plt.axes([0.81, 0.03, 0.1, 0.055])
        next_button = Button(ax_next, 'Next')
        next_button.on_clicked(next_button_on_clicked)

        # Initial display
        update_display()
        plt.show() # Blocks until window is closed

        # If window closed before "Finish & Save", save what we have
        if plt.fignum_exists(fig.number): # Check if closed by button or manually
            print("Window closed manually.")
        # The final save is triggered by the "Finish & Save" button logic or if an error occurs and plt.show() exits.
        # Consider an explicit save here if not closed via "Finish & Save", though current logic saves on nav.
        # if all_annotations_data:
        #     print("Attempting to save any pending annotations if window was closed manually...")
        #     save_all_annotations_to_file()


    print("\nAnnotation process cell finished.")
    #could add to put the same starting box on next frame to speed up annotation

Annotations file already exists at C:\Users\giles\Github\vesselFM\surgical_video\annotated\annotations.json. Skipping creation.


## Dataset creation and dataloader initialization

In [None]:
# dataloader
import torch
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from PIL import Image
import json
import os
import numpy as np

# --- Configuration for Dataset ---
annotated_base_dir = r'C:\Users\giles\Github\vesselFM\surgical_video\annotated'
annotated_images_dir = os.path.join(annotated_base_dir, 'images')
annotations_file_path = os.path.join(annotated_base_dir, 'annotations.json')

IMG_SIZE = 640 # Standard YOLO image size
batch_size = 64

class ScalpelDetectionDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform

        with open(annotations_file, 'r') as f:
            self.annotations = json.load(f)

        self.image_files = []
        self.image_annotations = []

        for img_filename, ann_list in self.annotations.items():
            if not ann_list:
                continue
            full_img_path = os.path.join(self.img_dir, img_filename)
            if os.path.exists(full_img_path):
                self.image_files.append(img_filename)
                self.image_annotations.append(ann_list[0])  # One annotation per image
        print(f"Initialized dataset with {len(self.image_files)} images.")

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_filename = self.image_files[idx]
        img_path = os.path.join(self.img_dir, img_filename)
        image = Image.open(img_path).convert("RGB")

        annotation = self.image_annotations[idx]
        bbox = annotation['bbox_xyxy']
        boxes = torch.tensor([bbox], dtype=torch.float32)
        labels = torch.tensor([1], dtype=torch.int64)  # Class 1 = scalpel

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([idx])
        }

        if self.transform:
            image = self.transform(image)

        return image, target

# --------------------------- Data augmentation ------------------------
# Define transformations - cant move the pixel values around due to the bounding boxes
transform = transforms.Compose([
    # transforms.Resize((IMG_SIZE, IMG_SIZE)), # Resize to fixed size - done as part of the GeneralizedRCNNTransform
    # Color-based augmentations
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
    # Gaussian blur
    transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0)),
    # Random grayscale
    transforms.RandomGrayscale(p=0.1),
    transforms.ToTensor(), # Converts to [C, H, W] and scales to [0, 1]
    # Random Erasing (applied on tensor)
    transforms.RandomErasing(p=0.2, scale=(0.02, 0.2), ratio=(0.3, 3.3), value=0),
    # Normalize if model was pretrained with ImageNet normalization
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])# done as part of the GeneralizedRCNNTransform
])

# -------------------- Initialize Dataset and DataLoaders ------------------------

dataset = ScalpelDetectionDataset(annotations_file_path, annotated_images_dir, transform)
val_split = 0.2
val_size = int(len(dataset) * val_split)
train_size = len(dataset) - val_size

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

#show example batch shape

print("Example batch shapes:")
for images, targets in train_loader:
    print(f"Images batch shape: {images[0].shape}")  # Each image in the batch
    print(f"Targets batch length: {len(targets)}")  # Number of targets in the batch
    print(f"First target boxes shape: {targets[0]['boxes'].shape}")  # Shape of boxes for the first target
    print(f"First target labels shape: {targets[0]['labels'].shape}")  # Shape of labels for the first target
    break  # Just show one example batch

# Initialized dataset with 23 images.
# Example batch shapes:
# Images batch shape: torch.Size([3, 360, 640])
# Targets batch length: 19
# First target boxes shape: torch.Size([1, 4])
# First target labels shape: torch.Size([1])

Initialized dataset with 23 images.
Example batch shapes:
Images batch shape: torch.Size([3, 360, 640])
Targets batch length: 19
First target boxes shape: torch.Size([1, 4])
First target labels shape: torch.Size([1])


## Faster-RCNN Model definition

In [24]:
import torch
from torchvision.models.detection import fasterrcnn_mobilenet_v3_large_fpn

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model: https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading/#project-status
model = fasterrcnn_mobilenet_v3_large_fpn(
    weights_backbone='IMAGENET1K_V2',  # or None
    num_classes=2  # background + scalpel
)
model.to(device)


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): FrozenBatchNorm2d(16, eps=1e-05)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
            (2): ReLU(inplace=True)
          )
          (1): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
          )
        )
      )
      (2): InvertedResidual(
        (block):

## Model training

In [25]:
from torch.amp import autocast #for mixed precision training
from torchmetrics.detection.mean_ap import MeanAveragePrecision

def train_epoch(model, loader, optimizer, scaler, device, print_freq=10, clip_grad_norm=None):
    """Runs a single training epoch.

    Args:
        model (nn.Module): The YOLOv5 model (DetectMultiBackend instance).
        loader (DataLoader): DataLoader for the training data.
        criterion: The loss function (ComputeLoss instance).
        optimizer: The optimizer.
        scaler (GradScaler): Gradient scaler for mixed precision.
        device (torch.device): The device to run training on (CPU or CUDA).
        print_freq (int): How often to print batch loss.

    Returns:
        float: The average training loss for the epoch.
    """

    model.train()
    total_loss = 0.0
    num_batches = len(loader)
    if num_batches == 0:
        print("Warning: DataLoader is empty.")
        return float('nan')

    for i, (images, targets) in enumerate(loader):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()

        # Mixed precision
        with autocast(device_type=device.type, enabled=(device.type == 'cuda')):
            loss_dict = model(images, targets)
            #when the model is in training mode (model.train()),
            # and you pass both images and targets to it, the model directly returns a dictionary of losses.
            loss = sum(loss for loss in loss_dict.values())


        # Check loss before backward pass
        if not torch.isfinite(loss):
            print(f"Warning: Non-finite loss detected ({loss.item()}) before backward pass at step {i}. Skipping gradient update for this batch.")
            # Continue to the next iteration without updating weights for this batch
            continue

        scaler.scale(loss).backward()

        # gradient clipping (optional)
        if clip_grad_norm is not None: # Clip gradients
            scaler.unscale_(optimizer) # Unscale gradients before clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_grad_norm)

        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

        # Prevent division by zero
        # print_step = max(1, num_batches // print_freq) if print_freq > 0 else num_batches
        # if print_freq > 0 and (i + 1) % print_step == 0: # Print progress based on calculated print_step
            # print(f'Step [{i+1}/{num_batches}], Batch Loss: {loss.item():.8f}')

    return total_loss / num_batches

def validate_epoch(model, loader, device):
    """Runs a single validation epoch.

    Args:
        model (nn.Module): The model.
        loader (DataLoader): DataLoader for the validation data.
        criterion: The loss function (ComputeLoss instance).
        device (torch.device): The device to run validation on (CPU or CUDA).

    Returns:
        float: The average validation loss for the epoch.
    """
    model.eval()

    total_loss = 0.0
    num_batches = len(loader)
    if num_batches == 0:
        print("Warning: Validation DataLoader is empty.")
        return float('nan')
    
    MeanAveragePrecisionMetric = MeanAveragePrecision()  # Initialize mAP metric

    with torch.no_grad():
        for i, (images, targets) in enumerate(loader):
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # --- Loss Calculation ---
            # Temporarily switch to train() mode to get the loss dictionary from the model,
            # as torchvision detection models typically return losses only in train() mode.
            model.train()
            with autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                loss_dict = model(images, targets)
                loss = sum(loss for loss in loss_dict.values())
            model.eval() # Switch back to eval mode immediately
            total_loss += loss.item()

            # Prediction for metrics (optional)
            with autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                preds = model(images, targets)

            # Move targets and preds to CPU for metrics
            preds_cpu = [{k: v.cpu() for k, v in p.items()} for p in preds]
            targets_cpu = [{k: v.cpu() for k, v in t.items()} for t in targets]

            MeanAveragePrecisionMetric.update(preds_cpu, targets_cpu)

            # Optionally print components for validation too
            # if i == 0: # e.g. for the first batch
            #     print(f'Validation First Batch Loss Components: (Box: {loss_components[0].item():.4f}, Obj: {loss_components[1].item():.4f}, Cls: {loss_components[2].item():.4f})')
        # Compute final metrics
        MeanAveragePrecisionMetrics = MeanAveragePrecisionMetric.compute()

        avg_loss = total_loss / num_batches
    return avg_loss, MeanAveragePrecisionMetrics

In [None]:
import torch.optim as optim
from torch.amp import GradScaler

# Hyperparameters
num_epochs = 100
learning_rate = 1e-4  # Adjust as needed
clip_grad_norm = None # Gradient clipping value, set to None to disable
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
# cosine annealing scheduler
# scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.000001) #Had better results with a constant learning rate

PRINT_FREQ = 10  # Print every 10 batches # For use with larger datasets where multiple batches are processed
scaler = GradScaler(enabled=(device.type == "cuda")) # For mixed precision acceleration

In [None]:
import matplotlib.pyplot as plt

print("Starting fine-tuning...")
train_losses = []
val_losses = []
val_metrics = []
for epoch in range(num_epochs):
    avg_train_loss = train_epoch(
        model=model,
        loader=train_loader,
        # criterion=criterion, # not needed as Faster R-CNN returns loss dict directly
        optimizer=optimizer,
        scaler=scaler,
        device=device,
        print_freq=PRINT_FREQ,
    )
    # scheduler.step()  # Step the scheduler after each epoch # commented out as we are not using a CosineAnnealing scheduler here

    avg_val_loss, val_MeanAveragePrecisionMetrics = validate_epoch(
        model=model,
        loader=val_loader,
        # criterion=criterion,
        device=device
    )
    
    # https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html
    # Extract metrics for clarity, using .item() to get float values
    # Using .get() with a default tensor in case a key is unexpectedly missing
    map50_metric = val_MeanAveragePrecisionMetrics.get('map_50', torch.tensor(0.0)).item()
    map50_95_metric = val_MeanAveragePrecisionMetrics.get('map', torch.tensor(0.0)).item() # Mean Average Precision (mAP) averaged across IoU thresholds from 0.50 to 0.95
    #P and R reported by YOLO are simple point estimates (often at IoU=0.5 and a fixed confidence threshold) while map, map_50, and mar_100 are more comprehensive "average" metrics.
    map_75_metric = val_MeanAveragePrecisionMetrics.get('map_75', torch.tensor(0.0)).item() # mAP at IoU=0.75

    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)
    val_metrics.append({
        'map50': map50_metric,
        'map50_95': map50_95_metric,
        'map_75': map_75_metric
    })

    print(
        f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.8f}, Validation [Loss: {avg_val_loss:.8f}, "
        f"Box(mAP50={map50_metric:.4f} mAP50-95={map50_95_metric:.4f} mAP75={map_75_metric:.4f})]"
    )

#plot train and validation losses and val mAP metrics over epochs
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss', color='blue')
plt.plot(val_losses, label='Validation Loss', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot([m['map50'] for m in val_metrics], label='mAP@0.50', color='green')
plt.plot([m['map50_95'] for m in val_metrics], label='mAP@0.50-0.95', color='red')
plt.plot([m['map_75'] for m in val_metrics], label='mAP@0.75', color='purple')
plt.xlabel('Epoch')
plt.ylabel('mAP')
plt.title('Validation mAP Metrics')
plt.legend()
plt.tight_layout()
plt.show()

# # --- Save the Fine-tuned Model ---
FINE_TUNED_MODEL_SAVE_PATH = r'C:\Users\giles\Github\vesselFM\surgical_video\fine_tuned_fasterrcnn_mobilenet_v3_large.pth'
torch.save(model.state_dict(), FINE_TUNED_MODEL_SAVE_PATH)
print("Fine-tuned model saved.")

Starting fine-tuning...
Epoch [1/100], Train Loss: 1.45191312, Validation [Loss: 0.83017886, Box(mAP50=0.0000 mAP50-95=0.0000 mAP75=0.0000)]
Epoch [2/100], Train Loss: 0.85469329, Validation [Loss: 0.88885933, Box(mAP50=0.0000 mAP50-95=0.0000 mAP75=0.0000)]
Epoch [3/100], Train Loss: 0.90174586, Validation [Loss: 0.90260053, Box(mAP50=0.0000 mAP50-95=0.0000 mAP75=0.0000)]
Epoch [4/100], Train Loss: 0.87481534, Validation [Loss: 0.87441546, Box(mAP50=0.0000 mAP50-95=0.0000 mAP75=0.0000)]
Epoch [5/100], Train Loss: 0.86152369, Validation [Loss: 0.88281322, Box(mAP50=0.0208 mAP50-95=0.0056 mAP75=0.0000)]
Epoch [6/100], Train Loss: 0.87549686, Validation [Loss: 0.79900378, Box(mAP50=0.0235 mAP50-95=0.0115 mAP75=0.0105)]
Epoch [7/100], Train Loss: 0.81516021, Validation [Loss: 0.76517904, Box(mAP50=0.0331 mAP50-95=0.0126 mAP75=0.0036)]
Epoch [8/100], Train Loss: 0.79270136, Validation [Loss: 0.76471531, Box(mAP50=0.0513 mAP50-95=0.0151 mAP75=0.0000)]
Epoch [9/100], Train Loss: 0.77853721, V

## Video inference
And inference timing.

In [None]:
import torch
import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_mobilenet_v3_large_fpn
from PIL import Image
import cv2
import os
import numpy as np
import time # Import time module

# --- Configuration ---
# Ensure this path matches where you saved your model
# FINE_TUNED_MODEL_SAVE_PATH = r'C:\Users\giles\Github\vesselFM\surgical_video\fasterrcnn_mobilenet_scalpel_finetuned.pth'
# video_file_path = r'C:\Users\giles\Github\vesselFM\surgical_video\Making_an_Incision.mp4'
# Output video path
output_video_dir = r'C:\Users\giles\Github\vesselFM\surgical_video\inference_output'
os.makedirs(output_video_dir, exist_ok=True)
output_video_filename = 'Making_an_Incision_annotated_FasterRCNN.mp4'
output_video_path = os.path.join(output_video_dir, output_video_filename)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 2  # background + scalpel
CONFIDENCE_THRESHOLD = 0.5 # Minimum score to display a detection - same as YOLO

# --- Load Model ---
print(f"Loading model from {FINE_TUNED_MODEL_SAVE_PATH}...")
model_infer = fasterrcnn_mobilenet_v3_large_fpn(weights_backbone=None, num_classes=NUM_CLASSES)
if not os.path.exists(FINE_TUNED_MODEL_SAVE_PATH):
    print(f"ERROR: Model file not found at {FINE_TUNED_MODEL_SAVE_PATH}. Please ensure the path is correct and the model was saved.")
else:
    model_infer.load_state_dict(torch.load(FINE_TUNED_MODEL_SAVE_PATH, map_location=device))
    model_infer.to(device)
    model_infer.eval()
    print("Model loaded successfully and set to evaluation mode.")

    # --- Inference Transforms ---
    # The torchvision Faster R-CNN models internally handle resizing and normalization if not done by transforms.
    infer_transform = T.Compose([T.ToTensor()])

    # --- Video Processing ---
    if not os.path.exists(video_file_path):
        print(f"ERROR: Input video file not found at {video_file_path}. Please ensure 'video_file_path' is correct.")
    else:
        print(f"Processing video: {video_file_path}")
        cap = cv2.VideoCapture(video_file_path)
        if not cap.isOpened():
            print("Error: Could not open video.")
        else:
            # Get video properties
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fps = cap.get(cv2.CAP_PROP_FPS)
            total_frames_from_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            # Define the codec and create VideoWriter object
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out_video = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
            print(f"Output video will be saved to: {output_video_path}")

            frame_num = 0
            total_inference_time_seconds = 0.0
            frames_processed_for_timing = 0

            # Optional: Limit frames for quicker testing of annotation + timing
            # max_frames_to_process = 300 # Process only 300 frames
            # print(f"Limiting processing to {max_frames_to_process} frames for this run.")


            while cap.isOpened():
                # if max_frames_to_process and frame_num >= max_frames_to_process:
                #     print(f"Reached max_frames_to_process ({max_frames_to_process}). Stopping.")
                #     break
                ret, frame_bgr = cap.read()
                if not ret:
                    print("Reached end of video or error reading frame.")
                    break

                frame_num += 1
                if frame_num % (int(fps if fps > 0 else 30) * 2) == 0: # Print progress every ~2 seconds
                    print(f"Processing frame {frame_num}/{total_frames_from_video if total_frames_from_video > 0 else 'N/A'}...")
                # Convert frame to PIL Image
                frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
                pil_img = Image.fromarray(frame_rgb)
                # Apply transforms and add batch dimension
                img_tensor = infer_transform(pil_img).unsqueeze(0).to(device)

                # Time the inference step
                inference_start_time = time.perf_counter()
                with torch.no_grad():
                    predictions = model_infer(img_tensor)
                inference_end_time = time.perf_counter()

                current_frame_inference_time = inference_end_time - inference_start_time
                total_inference_time_seconds += current_frame_inference_time
                frames_processed_for_timing += 1

                # Process predictions
                # `predictions` is a list of dicts, one per image in the batch.
                # Each dict has 'boxes', 'labels', 'scores'.
                if predictions and len(predictions) > 0:
                    pred = predictions[0]
                    boxes = pred['boxes'].cpu().numpy()
                    labels = pred['labels'].cpu().numpy()
                    scores = pred['scores'].cpu().numpy()

                    for box, label, score in zip(boxes, labels, scores):
                        if score >= CONFIDENCE_THRESHOLD:
                            if label == 1: # 1 is 'scalpel'
                                xmin, ymin, xmax, ymax = map(int, box)
                                cv2.rectangle(frame_bgr, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
                                label_text = f"Scalpel: {score:.2f}"
                                cv2.putText(frame_bgr, label_text, (xmin, ymin - 10),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                out_video.write(frame_bgr)

            # Release everything
            cap.release()
            out_video.release()
            cv2.destroyAllWindows()# Should not be strictly necessary in a notebook but good practice
            print(f"Finished processing. Annotated video saved to {output_video_path}")

            if frames_processed_for_timing > 0:
                avg_inference_time_ms = (total_inference_time_seconds / frames_processed_for_timing) * 1000
                fps_inference_only = frames_processed_for_timing / total_inference_time_seconds
                print(f"\n--- Faster R-CNN Inference Timing Results ---")
                print(f"Total frames processed and timed: {frames_processed_for_timing}")
                print(f"Total inference-only time: {total_inference_time_seconds:.4f} seconds")
                print(f"Average inference-only time per frame: {avg_inference_time_ms:.2f} ms")
                print(f"Inference-only FPS: {fps_inference_only:.2f}")
            else:
                print("\nNo frames were processed for timing.")

Loading model from C:\Users\giles\Github\vesselFM\surgical_video\fine_tuned_fasterrcnn_mobilenet_v3_large.pth...
Model loaded successfully and set to evaluation mode.
Processing video: C:\Users\giles\Github\vesselFM\surgical_video\Making_an_Incision.mp4
Output video will be saved to: C:\Users\giles\Github\vesselFM\surgical_video\inference_output\Making_an_Incision_annotated_FasterRCNN_timed.mp4
Processing frame 50/5430...
Processing frame 100/5430...
Processing frame 150/5430...
Processing frame 200/5430...
Processing frame 250/5430...
Processing frame 300/5430...
Processing frame 350/5430...
Processing frame 400/5430...
Processing frame 450/5430...
Processing frame 500/5430...
Processing frame 550/5430...
Processing frame 600/5430...
Processing frame 650/5430...
Processing frame 700/5430...
Processing frame 750/5430...
Processing frame 800/5430...
Processing frame 850/5430...
Processing frame 900/5430...
Processing frame 950/5430...
Processing frame 1000/5430...
Processing frame 1050/

## YOLOv11 model
Requires ultralytics

Convert dataset to YOLO format

In [29]:
import os
import json
import shutil
from sklearn.model_selection import train_test_split
from PIL import Image

# --- Configuration ---
annotated_base_dir = r'C:\Users\giles\Github\vesselFM\surgical_video\annotated'
source_annotations_file = os.path.join(annotated_base_dir, 'annotations.json')
source_images_dir = os.path.join(annotated_base_dir, 'images')

yolo_base_dir = r'C:\Users\giles\Github\vesselFM\surgical_video\yolo_converted'
yolo_images_dir = os.path.join(yolo_base_dir, 'images')
yolo_labels_dir = os.path.join(yolo_base_dir, 'labels')
yolo_train_images_dir = os.path.join(yolo_images_dir, 'train')
yolo_val_images_dir = os.path.join(yolo_images_dir, 'val')
yolo_train_labels_dir = os.path.join(yolo_labels_dir, 'train')
yolo_val_labels_dir = os.path.join(yolo_labels_dir, 'val')

# Class name and index (YOLO uses 0-indexed classes)
class_names = ["scalpel"] # Your single class
class_map = {name: i for i, name in enumerate(class_names)}

# Validation split percentage
val_split_ratio = 0.2

# --- Helper function to convert bbox_xyxy to YOLO format ---
def convert_to_yolo_format(bbox_xyxy, img_width, img_height):
    """
    Converts [xmin, ymin, xmax, ymax] to YOLO format
    [class_index, x_center_norm, y_center_norm, width_norm, height_norm]
    """
    xmin, ymin, xmax, ymax = bbox_xyxy
    dw = 1.0 / img_width
    dh = 1.0 / img_height

    x_center = (xmin + xmax) / 2.0
    y_center = (ymin + ymax) / 2.0
    width = xmax - xmin
    height = ymax - ymin

    x_center_norm = x_center * dw
    y_center_norm = y_center * dh
    width_norm = width * dw
    height_norm = height * dh

    return x_center_norm, y_center_norm, width_norm, height_norm

# --- Main Conversion Script ---
def create_yolo_dataset():
    print(f"Creating YOLO dataset in: {yolo_base_dir}")

    # Create directories
    os.makedirs(yolo_train_images_dir, exist_ok=True)
    os.makedirs(yolo_val_images_dir, exist_ok=True)
    os.makedirs(yolo_train_labels_dir, exist_ok=True)
    os.makedirs(yolo_val_labels_dir, exist_ok=True)
    print("Created YOLO directory structure.")

    # Load original annotations
    if not os.path.exists(source_annotations_file):
        print(f"ERROR: Source annotations file not found at {source_annotations_file}")
        return
    with open(source_annotations_file, 'r') as f:
        annotations_data = json.load(f)
    
    if not annotations_data:
        print("ERROR: No annotations found in the source file.")
        return

    image_filenames = list(annotations_data.keys())
    
    # Split data into training and validation sets
    if len(image_filenames) < 2 : # Need at least one for train and one for val if splitting
        print("Warning: Not enough images to create a validation split. All images will be used for training.")
        train_files = image_filenames
        val_files = []
    elif val_split_ratio > 0:
        train_files, val_files = train_test_split(image_filenames, test_size=val_split_ratio, random_state=42)
    else:
        train_files = image_filenames
        val_files = []

    print(f"Total images: {len(image_filenames)}")
    print(f"Training images: {len(train_files)}")
    print(f"Validation images: {len(val_files)}")

    # Process files for train and val sets
    for split_name, file_list, img_dest_dir, lbl_dest_dir in [
        ('train', train_files, yolo_train_images_dir, yolo_train_labels_dir),
        ('val', val_files, yolo_val_images_dir, yolo_val_labels_dir)
    ]:
        if not file_list:
            print(f"No files for {split_name} set.")
            continue
        
        print(f"\nProcessing {split_name} set...")
        for img_filename in file_list:
            source_img_path = os.path.join(source_images_dir, img_filename)
            
            if not os.path.exists(source_img_path):
                print(f"  Warning: Image file {img_filename} not found at {source_img_path}. Skipping.")
                continue

            # Get image dimensions
            try:
                with Image.open(source_img_path) as img:
                    img_width, img_height = img.size
            except Exception as e:
                print(f"  Warning: Could not read image {img_filename} to get dimensions: {e}. Skipping.")
                continue

            # Copy image
            dest_img_path = os.path.join(img_dest_dir, img_filename)
            shutil.copy(source_img_path, dest_img_path)

            # Create label file
            label_filename_base = os.path.splitext(img_filename)[0]
            label_file_path = os.path.join(lbl_dest_dir, f"{label_filename_base}.txt")
            
            with open(label_file_path, 'w') as lf:
                img_annotations = annotations_data.get(img_filename)
                if not img_annotations:
                    print(f"  Warning: No annotations found for {img_filename} in JSON. Creating empty label file.")
                    continue # Creates an empty .txt file, which is correct for YOLO if no objects

                for ann in img_annotations: # Expecting a list of annotations per image
                    class_label = ann.get("class")
                    bbox_xyxy = ann.get("bbox_xyxy")

                    if class_label not in class_map:
                        print(f"  Warning: Unknown class '{class_label}' in {img_filename}. Skipping this annotation.")
                        continue
                    
                    class_idx = class_map[class_label]
                    
                    x_c, y_c, w_norm, h_norm = convert_to_yolo_format(bbox_xyxy, img_width, img_height)
                    lf.write(f"{class_idx} {x_c:.6f} {y_c:.6f} {w_norm:.6f} {h_norm:.6f}\n")
            # print(f"  Processed and copied: {img_filename}, Label: {label_file_path}")

    # Create dataset.yaml file
    yaml_content = f"""
path: {os.path.abspath(yolo_base_dir)}  # dataset root dir
train: images/train  # train images (relative to 'path')
val: images/val  # val images (relative to 'path')
# test: # test images (optional)

# Classes
names:
"""
    for i, name in enumerate(class_names):
        yaml_content += f"  {i}: {name}\n"

    dataset_yaml_path = os.path.join(yolo_base_dir, 'dataset.yaml')
    with open(dataset_yaml_path, 'w') as yf:
        yf.write(yaml_content)
    print(f"\nCreated dataset.yaml at: {dataset_yaml_path}")
    print("YOLO dataset creation complete.")

#Check yolo_base_dir doesnt exist or is empty before running
if not (os.path.exists(yolo_base_dir) and os.listdir(yolo_base_dir)):
    # Run the conversion
    create_yolo_dataset()

Creating YOLO dataset in: C:\Users\giles\Github\vesselFM\surgical_video\yolo_converted
Created YOLO directory structure.
Total images: 23
Training images: 18
Validation images: 5

Processing train set...

Processing val set...

Created dataset.yaml at: C:\Users\giles\Github\vesselFM\surgical_video\yolo_converted\dataset.yaml
YOLO dataset creation complete.


In [45]:
# !pip install -U ultralytics
from ultralytics import YOLO
model = YOLO("yolo11x.pt")

# Train the model - use string literal for the dataset path
results = model.train(data="C:/Users/giles/Github/vesselFM/surgical_video/yolo_converted/dataset.yaml", epochs=100, imgsz=640,
                    deterministic=False, device=0, batch=64, workers=4, save=True, save_period=10, project="runs/train", name="yolo11x_scalpel_AdamW_e-5",
                    lr0=1e-5, optimizer="AdamW",
                    )

Ultralytics 8.3.146  Python-3.9.21 torch-2.7.0+cu128 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=64, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:/Users/giles/Github/vesselFM/surgical_video/yolo_converted/dataset.yaml, degrees=0.0, deterministic=False, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=1e-05, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11x.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolo11x_scalpel_AdamW_e-5, nbs=64, nms=False, opset=None, optimize=False, optim

[34m[1mtrain: [0mScanning C:\Users\giles\Github\vesselFM\surgical_video\yolo_converted\labels\train.cache... 18 images, 0 backgrounds, 0 corrupt: 100%|██████████| 18/18 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 1131.4123.7 MB/s, size: 190.2 KB)


[34m[1mval: [0mScanning C:\Users\giles\Github\vesselFM\surgical_video\yolo_converted\labels\val.cache... 5 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5/5 [00:00<?, ?it/s]


Plotting labels to runs\train\yolo11x_scalpel_AdamW_e-5\labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=1e-05, momentum=0.937) with parameter groups 167 weight(decay=0.0), 174 weight(decay=0.0005), 173 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mruns\train\yolo11x_scalpel_AdamW_e-5[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      17.8G      1.168      3.999      1.405         41        640: 100%|██████████| 1/1 [00:02<00:00,  2.50s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  8.30it/s]

                   all          5          5    0.00333          1      0.111      0.063






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100      17.8G      1.265      4.129      1.694         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  6.83it/s]

                   all          5          5    0.00333          1      0.106     0.0583






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/100      17.8G     0.8786      3.698      1.382         40        640: 100%|██████████| 1/1 [00:00<00:00,  1.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  9.17it/s]

                   all          5          5    0.00333          1     0.0908     0.0484






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/100      17.8G      1.073      4.047      1.596         41        640: 100%|██████████| 1/1 [00:00<00:00,  1.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.42it/s]

                   all          5          5    0.00333          1     0.0906     0.0494






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/100      17.8G       1.02      3.715      1.492         48        640: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.13it/s]

                   all          5          5    0.00333          1     0.0881     0.0485






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      6/100      17.8G      1.041      4.258      1.423         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.09it/s]

                   all          5          5    0.00333          1     0.0845      0.046






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      7/100      17.8G     0.9765      3.905      1.407         45        640: 100%|██████████| 1/1 [00:00<00:00,  1.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.52it/s]

                   all          5          5     0.0894        0.8      0.174      0.136






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      8/100      18.2G     0.7585      2.389      1.175         47        640: 100%|██████████| 1/1 [00:00<00:00,  1.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.61it/s]

                   all          5          5      0.734        0.6      0.658      0.558






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      9/100      18.2G     0.8388       1.57      1.212         35        640: 100%|██████████| 1/1 [00:00<00:00,  1.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.71it/s]

                   all          5          5       0.96        0.6      0.683      0.614






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     10/100      18.2G     0.6843      1.178      1.149         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.63it/s]

                   all          5          5        0.7        0.6      0.613      0.489






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     11/100      18.2G     0.8913      1.107      1.309         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.50it/s]

                   all          5          5      0.231        0.6      0.213       0.15






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     12/100      18.2G     0.5058      0.721     0.9888         35        640: 100%|██████████| 1/1 [00:00<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.24it/s]

                   all          5          5      0.433        0.2      0.142     0.0643






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     13/100      18.2G      0.538     0.7416      1.046         44        640: 100%|██████████| 1/1 [00:00<00:00,  1.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.18it/s]

                   all          5          5      0.896        0.4      0.429     0.0963






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     14/100      18.2G      0.775     0.8192      1.127         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.15it/s]

                   all          5          5      0.661      0.393      0.294      0.183






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     15/100      18.2G     0.9186     0.8178      1.324         40        640: 100%|██████████| 1/1 [00:00<00:00,  1.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.63it/s]

                   all          5          5      0.731      0.554      0.451      0.227






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     16/100      18.2G     0.7248     0.7575      1.103         50        640: 100%|██████████| 1/1 [00:00<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.99it/s]

                   all          5          5      0.638      0.365      0.399      0.303






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     17/100      18.2G     0.5627     0.4866     0.9579         45        640: 100%|██████████| 1/1 [00:00<00:00,  1.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.99it/s]

                   all          5          5      0.249        0.4      0.271      0.215






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     18/100      18.2G     0.7947     0.9287      1.216         31        640: 100%|██████████| 1/1 [00:00<00:00,  1.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.89it/s]

                   all          5          5     0.0545        0.2     0.0428     0.0344






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     19/100      18.2G     0.5595      0.637      1.079         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.50it/s]

                   all          5          5    0.00985        0.8     0.0166     0.0108






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     20/100      18.2G     0.7031     0.6739      1.033         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.82it/s]

                   all          5          5    0.00704        0.6    0.00677    0.00437






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     21/100      18.2G     0.7401     0.7254      1.171         34        640: 100%|██████████| 1/1 [00:00<00:00,  1.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.50it/s]

                   all          5          5     0.0086        0.8    0.00788    0.00431






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     22/100      18.2G     0.6196     0.5229      1.072         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.05it/s]

                   all          5          5    0.00823        0.8     0.0077    0.00371






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     23/100      18.2G     0.6119     0.5119      0.962         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.56it/s]

                   all          5          5    0.00862        0.8    0.00782    0.00308






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     24/100      18.2G     0.5096     0.5793     0.9396         48        640: 100%|██████████| 1/1 [00:00<00:00,  1.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.51it/s]

                   all          5          5    0.00665        0.6    0.00554    0.00143






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     25/100      18.2G     0.7899     0.8219      1.094         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.07it/s]

                   all          5          5    0.00452        0.4    0.00399    0.00135






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     26/100      18.2G      0.543     0.4503     0.9338         47        640: 100%|██████████| 1/1 [00:00<00:00,  1.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.82it/s]

                   all          5          5    0.00465        0.4     0.0037    0.00104






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     27/100      18.2G     0.5534     0.5243      1.038         44        640: 100%|██████████| 1/1 [00:00<00:00,  1.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.70it/s]

                   all          5          5    0.00472        0.4    0.00361    0.00138






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     28/100      18.2G     0.4247      0.441     0.9649         39        640: 100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.70it/s]

                   all          5          5    0.00482        0.4    0.00377    0.00122






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     29/100      18.2G     0.7251     0.6883      1.105         41        640: 100%|██████████| 1/1 [00:00<00:00,  1.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.25it/s]

                   all          5          5    0.00504        0.4     0.0041    0.00139






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     30/100      18.2G     0.6998     0.4821      1.079         38        640: 100%|██████████| 1/1 [00:00<00:00,  1.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.12it/s]

                   all          5          5    0.00541        0.4    0.00465    0.00184






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     31/100      18.2G     0.5055     0.4365     0.9432         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.89it/s]

                   all          5          5    0.00557        0.4     0.0047     0.0016






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     32/100      18.2G     0.5628     0.4454     0.9788         41        640: 100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.58it/s]

                   all          5          5    0.00554        0.4     0.0237    0.00826






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     33/100      18.2G     0.4779     0.5041     0.9561         50        640: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.33it/s]

                   all          5          5      0.214        0.2      0.207     0.0374






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     34/100      18.2G     0.6966     0.6044      1.053         40        640: 100%|██████████| 1/1 [00:00<00:00,  1.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.42it/s]

                   all          5          5      0.881        0.4      0.397      0.154






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     35/100      18.2G     0.6074      0.494      1.019         34        640: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.82it/s]

                   all          5          5          1      0.366      0.397      0.277






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     36/100      18.2G      0.435     0.3819     0.8776         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.99it/s]

                   all          5          5          1      0.376      0.397      0.337






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     37/100      18.2G     0.5175     0.4542     0.9564         51        640: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.16it/s]

                   all          5          5          1      0.399        0.4      0.319






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     38/100      18.2G     0.6422     0.5643       1.02         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.16it/s]

                   all          5          5      0.922        0.4      0.433       0.32






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     39/100      18.2G     0.5342     0.4699      1.011         46        640: 100%|██████████| 1/1 [00:00<00:00,  1.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.09it/s]

                   all          5          5      0.931        0.4      0.548      0.369






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     40/100      18.2G     0.5995     0.6319      1.066         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.24it/s]

                   all          5          5          1      0.546      0.598      0.432






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     41/100      18.2G     0.4919     0.5145     0.9536         45        640: 100%|██████████| 1/1 [00:00<00:00,  1.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.42it/s]

                   all          5          5          1      0.562       0.61      0.429






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     42/100      18.2G     0.6106     0.5476      1.049         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.89it/s]

                   all          5          5          1      0.595      0.667      0.439






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     43/100      18.2G      0.402     0.3835     0.8949         49        640: 100%|██████████| 1/1 [00:00<00:00,  1.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.42it/s]

                   all          5          5      0.963        0.6      0.739      0.463






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     44/100      18.2G     0.6595     0.5805      1.068         40        640: 100%|██████████| 1/1 [00:00<00:00,  1.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.16it/s]

                   all          5          5      0.959        0.6      0.795      0.496






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     45/100      18.2G     0.4841     0.3819     0.8775         50        640: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.36it/s]

                   all          5          5      0.961        0.6      0.817      0.529






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     46/100      18.2G      0.533     0.4826     0.9723         50        640: 100%|██████████| 1/1 [00:00<00:00,  1.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.25it/s]

                   all          5          5      0.965        0.6      0.845      0.557






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     47/100      18.2G      0.508     0.4398     0.9611         44        640: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.89it/s]

                   all          5          5      0.967        0.6      0.845      0.577






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     48/100      18.2G     0.4497     0.3397     0.9193         37        640: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.16it/s]

                   all          5          5      0.968        0.6      0.853      0.573






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     49/100      18.2G     0.5475     0.4673      1.007         47        640: 100%|██████████| 1/1 [00:00<00:00,  1.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.16it/s]

                   all          5          5      0.969        0.6      0.881      0.589






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     50/100      18.2G     0.4951     0.4241     0.9801         34        640: 100%|██████████| 1/1 [00:00<00:00,  1.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.84it/s]

                   all          5          5      0.971        0.6      0.881      0.602






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     51/100      18.2G     0.4927     0.4154     0.9375         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.42it/s]

                   all          5          5      0.971        0.6      0.881      0.617






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     52/100      18.2G     0.4657     0.4119     0.9415         49        640: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.35it/s]

                   all          5          5      0.972        0.6      0.898      0.642






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     53/100      18.2G     0.4555     0.4194     0.9736         44        640: 100%|██████████| 1/1 [00:00<00:00,  1.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.63it/s]

                   all          5          5      0.972        0.6      0.898      0.651






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     54/100      18.2G     0.5145      0.402      0.983         47        640: 100%|██████████| 1/1 [00:00<00:00,  1.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.16it/s]

                   all          5          5          1      0.773      0.962      0.667






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     55/100      18.2G     0.6528     0.5788     0.8922         40        640: 100%|██████████| 1/1 [00:00<00:00,  1.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.20it/s]

                   all          5          5          1      0.782      0.962      0.697






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     56/100      18.2G     0.4728     0.4415     0.9633         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.82it/s]

                   all          5          5          1      0.789      0.962       0.71






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     57/100      18.2G     0.5565     0.4722     0.9919         44        640: 100%|██████████| 1/1 [00:00<00:00,  1.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.66it/s]

                   all          5          5          1      0.961      0.995      0.735






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     58/100      18.2G     0.3545     0.3123     0.9014         36        640: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.16it/s]

                   all          5          5          1      0.994      0.995      0.735






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     59/100      18.2G     0.4884     0.4315     0.9236         31        640: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.16it/s]

                   all          5          5      0.987          1      0.995      0.785






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     60/100      18.2G     0.4644     0.3629     0.9093         41        640: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.74it/s]

                   all          5          5      0.984          1      0.995      0.792






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     61/100      18.2G     0.5922     0.4627     0.9445         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.27it/s]

                   all          5          5      0.984          1      0.995      0.804






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     62/100      18.2G     0.4832      0.315     0.9889         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.82it/s]

                   all          5          5      0.985          1      0.995      0.804






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     63/100      18.2G     0.4143     0.3911      0.922         46        640: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.63it/s]

                   all          5          5      0.986          1      0.995      0.777






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     64/100      18.2G     0.4333     0.3444     0.9487         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.42it/s]

                   all          5          5      0.988          1      0.995      0.764






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     65/100      18.2G     0.3535      0.392     0.9124         32        640: 100%|██████████| 1/1 [00:00<00:00,  1.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.25it/s]

                   all          5          5      0.988          1      0.995      0.765






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     66/100      18.2G     0.4981     0.3863      1.012         40        640: 100%|██████████| 1/1 [00:00<00:00,  1.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.42it/s]

                   all          5          5      0.989          1      0.995      0.792






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     67/100      18.2G     0.4224     0.3848     0.9697         46        640: 100%|██████████| 1/1 [00:00<00:00,  1.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.07it/s]

                   all          5          5      0.989          1      0.995      0.793






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     68/100      18.2G     0.3791     0.3493     0.8701         47        640: 100%|██████████| 1/1 [00:00<00:00,  1.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.99it/s]

                   all          5          5       0.99          1      0.995      0.793






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     69/100      18.2G     0.3715     0.3558     0.9274         41        640: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.05it/s]

                   all          5          5       0.99          1      0.995      0.793






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     70/100      18.2G     0.4397     0.4072     0.9246         45        640: 100%|██████████| 1/1 [00:00<00:00,  1.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.82it/s]

                   all          5          5       0.99          1      0.995      0.798






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     71/100      18.2G     0.4778     0.3393     0.9861         47        640: 100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.76it/s]

                   all          5          5       0.99          1      0.995      0.828






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     72/100      18.2G     0.4391     0.3625     0.9438         40        640: 100%|██████████| 1/1 [00:00<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.58it/s]

                   all          5          5       0.99          1      0.995      0.811






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     73/100      18.2G     0.4254     0.3398     0.9344         46        640: 100%|██████████| 1/1 [00:00<00:00,  1.06it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.42it/s]

                   all          5          5       0.99          1      0.995      0.855






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     74/100      18.2G     0.5654     0.3653      1.002         44        640: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.83it/s]

                   all          5          5       0.99          1      0.995      0.875






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     75/100      18.2G     0.3774     0.3389     0.8716         44        640: 100%|██████████| 1/1 [00:00<00:00,  1.06it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.69it/s]

                   all          5          5       0.99          1      0.995       0.88






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     76/100      18.2G     0.4683     0.3543     0.9629         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.53it/s]

                   all          5          5       0.99          1      0.995      0.861






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     77/100      18.2G     0.4322     0.4014     0.9852         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.63it/s]

                   all          5          5       0.99          1      0.995      0.854






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     78/100      18.2G     0.3698     0.2755     0.9368         38        640: 100%|██████████| 1/1 [00:00<00:00,  1.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.99it/s]

                   all          5          5      0.991          1      0.995      0.854






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     79/100      18.2G     0.3913      0.343     0.9712         37        640: 100%|██████████| 1/1 [00:00<00:00,  1.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.60it/s]

                   all          5          5       0.99          1      0.995      0.854






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     80/100      18.2G     0.3166     0.2979     0.8902         47        640: 100%|██████████| 1/1 [00:00<00:00,  1.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.82it/s]

                   all          5          5      0.991          1      0.995      0.855






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     81/100      18.2G      0.462     0.3922     0.9145         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.05it/s]

                   all          5          5       0.99          1      0.995      0.875






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     82/100      18.2G     0.3669     0.3004     0.8833         37        640: 100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.71it/s]

                   all          5          5       0.99          1      0.995      0.855






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     83/100      18.2G     0.5724     0.4052     0.9938         39        640: 100%|██████████| 1/1 [00:01<00:00,  1.04s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.53it/s]

                   all          5          5       0.99          1      0.995      0.866






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     84/100      18.2G      0.444     0.3304     0.9435         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.06it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.11it/s]

                   all          5          5       0.99          1      0.995      0.843






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     85/100      18.2G      0.328     0.2799     0.9123         39        640: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.75it/s]

                   all          5          5       0.99          1      0.995      0.844






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     86/100      18.2G     0.3335     0.2551     0.9119         29        640: 100%|██████████| 1/1 [00:00<00:00,  1.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.93it/s]

                   all          5          5       0.99          1      0.995      0.843






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     87/100      18.2G     0.3387     0.3047     0.8897         42        640: 100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.11it/s]

                   all          5          5      0.991          1      0.995      0.843






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     88/100      18.2G     0.3369     0.3044       0.91         41        640: 100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.43it/s]

                   all          5          5      0.991          1      0.995      0.847






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     89/100      18.2G     0.4011     0.3367      0.916         43        640: 100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.20it/s]

                   all          5          5      0.991          1      0.995      0.868






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     90/100      18.2G     0.3692     0.3789     0.9817         32        640: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.81it/s]

                   all          5          5      0.991          1      0.995      0.868





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     91/100      18.2G     0.1845     0.1818     0.7883         18        640: 100%|██████████| 1/1 [00:03<00:00,  3.45s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  9.76it/s]

                   all          5          5      0.991          1      0.995      0.868






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     92/100      18.2G      0.267     0.2073     0.8419         18        640: 100%|██████████| 1/1 [00:00<00:00,  1.01it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.10it/s]

                   all          5          5      0.991          1      0.995      0.868






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     93/100      18.2G     0.2794     0.2286     0.7914         18        640: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  8.70it/s]

                   all          5          5      0.991          1      0.995      0.867






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     94/100      18.2G     0.2129     0.2013     0.7774         18        640: 100%|██████████| 1/1 [00:00<00:00,  1.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.05it/s]

                   all          5          5      0.991          1      0.995      0.867






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     95/100      18.2G     0.1858     0.1924     0.8169         18        640: 100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.83it/s]

                   all          5          5      0.991          1      0.995      0.867






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     96/100      18.2G     0.1801      0.154     0.7694         18        640: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.58it/s]

                   all          5          5      0.991          1      0.995      0.868






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     97/100      18.2G     0.2088      0.227     0.8225         18        640: 100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.90it/s]

                   all          5          5      0.991          1      0.995      0.868






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     98/100      18.2G     0.1717     0.1609     0.7171         18        640: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.12it/s]

                   all          5          5      0.991          1      0.995      0.868






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     99/100      18.2G     0.2106     0.1676     0.7188         18        640: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.49it/s]

                   all          5          5      0.991          1      0.995      0.868






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    100/100      18.2G     0.1849     0.1636     0.7489         18        640: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 10.70it/s]

                   all          5          5      0.991          1      0.995      0.868






100 epochs completed in 0.084 hours.
Optimizer stripped from runs\train\yolo11x_scalpel_AdamW_e-5\weights\last.pt, 114.4MB
Optimizer stripped from runs\train\yolo11x_scalpel_AdamW_e-5\weights\best.pt, 114.4MB

Validating runs\train\yolo11x_scalpel_AdamW_e-5\weights\best.pt...
Ultralytics 8.3.146  Python-3.9.21 torch-2.7.0+cu128 CUDA:0 (NVIDIA GeForce RTX 3090 Ti, 24564MiB)
YOLO11x summary (fused): 190 layers, 56,828,179 parameters, 0 gradients, 194.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.05it/s]


                   all          5          5       0.99          1      0.995       0.88
Speed: 0.1ms preprocess, 9.5ms inference, 0.0ms loss, 1.6ms postprocess per image
Results saved to [1mruns\train\yolo11x_scalpel_AdamW_e-5[0m


YOLO inference

In [46]:
from PIL import Image
# Predict with the model
#give it a sample frame
results = model("C:/Users\giles/Github/vesselFM/surgical_video/extracted_frames/frame_0175.png")  # predict on an image

# Access the results
for result in results:
    xywh = result.boxes.xywh  # center-x, center-y, width, height
    xywhn = result.boxes.xywhn  # normalized
    xyxy = result.boxes.xyxy  # top-left-x, top-left-y, bottom-right-x, bottom-right-y
    xyxyn = result.boxes.xyxyn  # normalized
    names = [result.names[cls.item()] for cls in result.boxes.cls.int()]  # class name of each box
    confs = result.boxes.conf  # confidence score of each box

# Visualize the results
for i, r in enumerate(results):
    # Plot results image
    im_bgr = r.plot()  # BGR-order numpy array
    im_rgb = Image.fromarray(im_bgr[..., ::-1])  # RGB-order PIL image

    # Show results to screen (in supported environments)
    r.show()

    # Save results to disk
    # r.save(filename=f"results{i}.jpg")


image 1/1 C:\Users\giles\Github\vesselFM\surgical_video\extracted_frames\frame_0175.png: 384x640 1 scalpel, 24.0ms
Speed: 1.0ms preprocess, 24.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)


YOLO video annotation

In [47]:
from ultralytics import YOLO
import os

model = YOLO(r'C:\Users\giles\Github\vesselFM\runs\train\yolo11x_scalpel_AdamW_e-5\weights\best.pt') # Or use last.pt

video_file_path = r'C:\Users\giles\Github\vesselFM\surgical_video\Making_an_Incision.mp4'

# Define the main directory for YOLO inference outputs
yolo_inference_project_dir = r'C:\Users\giles\Github\vesselFM\surgical_video\inference_output'
# Define a specific name for this inference run (a subdirectory will be created)
yolo_inference_run_name = 'scalpel_detection_run_ADAMW_e-5'

if 'video_file_path' not in globals() or not os.path.exists(video_file_path):
    print(f"ERROR: 'video_file_path' is not defined or the file does not exist.")
    print(f"Please ensure the video path is correct. Expected: C:\\Users\\giles\\Github\\vesselFM\\surgical_video\\Making_an_Incision.mp4")
else:
    print(f"Performing inference on video: {video_file_path}")
    print(f"Annotated video will be saved in a subdirectory within: {os.path.join(yolo_inference_project_dir, yolo_inference_run_name)}")

    results = model.predict(
        source=video_file_path,
        save=True,
        project=yolo_inference_project_dir,
        name=yolo_inference_run_name,
        exist_ok=True,
        conf=0.5  # Adjust confidence threshold as needed (0.0 to 1.0)
    )

    # The Ultralytics library will print the exact path where results are saved.
    # The annotated video will have the same name as the input video,
    # located inside yolo_inference_project_dir/yolo_inference_run_name/
    
    output_video_location = os.path.join(yolo_inference_project_dir, yolo_inference_run_name, os.path.basename(video_file_path))
    print(f"\nVideo inference complete.")
    if os.path.exists(output_video_location):
        print(f"Annotated video should be saved at: {output_video_location}")
    else:
        print(f"Annotated video was expected at: {output_video_location} but not found. Please check the console output from Ultralytics for the exact save path.")

Performing inference on video: C:\Users\giles\Github\vesselFM\surgical_video\Making_an_Incision.mp4
Annotated video will be saved in a subdirectory within: C:\Users\giles\Github\vesselFM\surgical_video\inference_output\scalpel_detection_run_ADAMW_e-5

inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/5430) C:\Users\giles\Github\vesselFM\surgical_video\Making_an_Incision.mp4: 384x640 (no detections), 25.3ms
video 1/1 (frame 2/5430) C:\Users\giles\Github\vesselFM\surgical_video\Making_an_Incision.

## Summary of Results

This notebook explored two deep learning approaches for detecting scalpels in surgical video footage: a custom-trained Faster R-CNN model (with a MobileNetV3 Large backbone) and a custom-trained YOLOv11x model.

### 1. Faster R-CNN (MobileNetV3 Large FPN)

*   **Approach**:
    *   A Faster R-CNN model with a MobileNetV3 Large FPN backbone, pre-trained on ImageNet, was fine-tuned on a custom dataset of scalpel images extracted from the surgical video.
    *   The dataset was manually annotated, and standard data augmentation techniques were applied during training.
*   **Accuracy (Validation Set)**:
    *   The model's (last epoch) performance on the validation set achieved the following metrics:
        *   mAP@0.50: 1.0000
        *   mAP@0.50-0.95: 0.8257
        *   mAP@0.75: 1.0000
    *   Training and validation loss curves, along with mAP progression, can be seen in the plots generated in `Faster RCNN training curves.png`.
*   **Runtime (Video Inference)**:
    *   The inference speed on the full video was: 108.9159 seconds for 5430 frames.
        *   Average inference-only time per frame: 20.06 ms
        *   Inference-only FPS: 49.85

### 2. YOLO (e.g., YOLOv11x)

*   **Approach**:
    *   A YOLO model (specifically, `yolo11x.pt` as the base) was fine-tuned on the same custom dataset, converted to the YOLO annotation format.
    *   Training was performed using the Ultralytics library with specified hyperparameters (e.g., 100 epochs, AdamW optimizer, lr0=1e-5).
*   **Accuracy (Validation Set)**:
    *   The Ultralytics training process outputs validation metrics, including mAP50 and mAP50-95. Refer to the console output or the saved run directory (`runs/train/yolo11x_scalpel_AdamW_e-5/results.csv` or plots) for these values:
        *   Precision: 0.99
        *   Recall: 1
        *   mAP50: 0.995
        *   mAP50-95: 0.88
*   **Runtime (Video Inference)**:
    *   The inference speed using the Ultralytics `model.predict()` method on the full video was: 178.9 seconds for 5430 frames.
        *   Average inference-only time per frame (from `result.speed['inference']`): 25.8ms inference
        *   Inference-only FPS (based on `result.speed['inference']`): 38.76 FPS
        *   Full output: Speed: 1.4ms preprocess, 25.8ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

### Conclusion

Both models were successfully trained to detect scalpels. 
The Faster R-CNN model demonstrated notably faster inference speeds compared to the YOLO model, 
making it more suitable for real-time applications if its accuracy is acceptable. 
Alternative backbones or further optimizations could potentially improve Faster R-CNN's performance, especially in terms of inference speed.

However the YOLOv11 model is a more recent architecture and generally offers a good balance between speed and accuracy, especially in real-time applications. 
As the YOLOv11 xl model varient was used, experimentation with smaller variants (like YOLOv11l (~half the size) or YOLOv11m) could yield faster inference times while maintaining good accuracy especially with larger datasets.

The performance metrics on the validation set indicate that the models are capable of detecting scalpels with high precision and recall. 
Even with the extreamly small dataset, the models achieved high mAP scores, indicating that they can generalize well to the task of scalpel detection in surgical videos.

The choice between them would depend on the specific requirements for accuracy versus speed for the target application, 
as well as the level of control and customization needed for the model and its training.

Further hyperparameter tuning, dataset augmentation, or using different model backbones/sizes could potentially improve the performance of both approaches.

Qualitative analysis of the model outputs on the video frames shows that both models effectively detect scalpels, with bounding boxes accurately placed around the instruments. 
However with the Faster R-CNN model, the bounding boxes are more erratic and temporally inconsistent, while the YOLO model provides more stable and consistent detections across frames 
as well as being less prone to false positives that could be very important for surgical applications.