### Begin
Select kernel `segment-anything`

Run this cell to import necessary packages and initialise SAM model and mask generator

In [5]:
import numpy as np # for operations on masks 

import matplotlib.pyplot as plt # for plotting images
import cv2 # for image processing
from scipy import ndimage # for image processing
import base64 # for encoding images

import os # for file operations
import shutil # for file operations
import glob # for file operations
import pickle # for data serialization
import json # for reading json files

import torch # for deep learning
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor # for the SAM model
from sklearn.model_selection import train_test_split # for splitting the dataset
from IPython.display import clear_output # for clearing the output
import yaml # for creating yaml file for YOLO dataset

from ultralytics import YOLO # for YOLO model

from jupyter_bbox_widget import BBoxWidget # for creating bounding box widget
import ipywidgets as widgets # for creating widgets

from bboxidea_functions import * # for functions used in the notebook

os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # since mps does not support all the operations, we need to enable fallback to cpu for some operations

sam_checkpoint = "../models/sam_vit_b_01ec64.pth" # Path to the checkpoint file
model_type = 'vit_b' # Model type

device = "cuda" if torch.cuda.is_available() else "mps" # Use GPU if available, otherwise use CPU

sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) # Load the model
sam.to(device=device) # Move the model to the device

mask_generator = SamAutomaticMaskGenerator(sam) # Create a mask generator
mask_predictor = SamPredictor(sam) # Create a mask predictor

### Generate dataset

In [6]:
vid_path = '../1-source/'
img_path = '../2-source-extracted/'
ds_path = '../3-dataset/'
frame_interval = 900 # specify the frame interval to extract from the video

# extract frames from videos in 1-source folder, extract them to 2-source-extracted folder, and split them into train and valid sets in 3-dataset folder
load_images_from_video(img_path, vid_path, ds_path, frame_interval)

NameError: name 'os' is not defined

### Annotate training set

In [42]:
path = '../3-dataset/train/images/' # path to training images
images = sorted(os.listdir(path))

annotations = {} # dictionary with key = image name, value = corresponding bbox
data = {} # dictionary with key = image name, value = list of list containing: [label_id, x1, y1, x2, y2, ...]
cur_img_idx = 0 # current image index
classes = ['tiger'] # list of classes

Initialise the widget

In [43]:
# initialise the bbox widget
w_bbox = BBoxWidget(
    image = encode_image(os.path.join(path, images[cur_img_idx])),
    classes=classes
)
# a progress bar to show how far we got
w_progress = widgets.IntProgress(value=0, max=len(images), description='Progress')

# initialise the buttons
button_next = widgets.Button(description="Next")
button_prev = widgets.Button(description="Previous")
# combine the buttons and the bbox widget into a container
w_container = widgets.VBox([
    w_progress,
    button_prev,
    button_next,
    w_bbox

])

# function that updates the image when the buttons are clicked so that the next or previous image is shown
def update_image(change):
    global cur_img_idx
    annotations[images[cur_img_idx]] = w_bbox.bboxes # save the annotations for the current image before moving to the next image

    if images[cur_img_idx] not in data: # this is for the first time the image is shown
        data[images[cur_img_idx]] = []
    
    # move the current image index forward or backward
    if change.description == "Next":
        cur_img_idx = (cur_img_idx + 1) % len(images)
    elif change.description == "Previous":
        cur_img_idx = (cur_img_idx - 1) % len(images)
    w_progress.value = cur_img_idx # update the progress bar
    if images[cur_img_idx] not in data: # if the next image is not in the data dictionary, add it as an empty list
        data[images[cur_img_idx]] = []

    # check if annotations[cur_img_idx] exists
    if images[cur_img_idx] in annotations:
        w_bbox.bboxes = annotations[images[cur_img_idx]] # if it exists, load the annotations for the image
    else:
        w_bbox.bboxes = [] # if it doesn't exist, the image has no annotations, so set the bounding boxes to an empty list

    w_bbox.image = encode_image_existing_mask(os.path.join(path, images[cur_img_idx]), data[images[cur_img_idx]])
# add the update_image function to the buttons
button_next.on_click(update_image)
button_prev.on_click(update_image)

# defines what happens when the submit button is clicked, which is to run SAM with the bounding boxes specified by the user and to display the result and save the result to the data dictionary for conversion to YOLO format later
@w_bbox.on_submit
def submit():
    data[images[cur_img_idx]] = []

    if len(w_bbox.bboxes) > 0:
        w_bbox.image, poly_coords_list, h, w = encode_image_mask(os.path.join(path, images[cur_img_idx]), w_bbox.bboxes)
        i = 0
        for polygon_coords in poly_coords_list:
            label_id = [classes.index(w_bbox.bboxes[i]['label'])]
            flat_segment_coords = numpy_to_list(polygon_coords)

            for j in range(len(flat_segment_coords)): # normalise the coordinates of the segment
                if j%2 == 0:
                    flat_segment_coords[j] = flat_segment_coords[j]/w
                else:
                    flat_segment_coords[j] = flat_segment_coords[j]/h
            
            data[images[cur_img_idx]].append(label_id + flat_segment_coords)
            i += 1
    else:
        w_bbox.image = encode_image(os.path.join(path, images[cur_img_idx]))
        data[images[cur_img_idx]] = [] # if no bounding boxes are specified, then the image is not annotated

  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)
  filled_mask = ndimage.morphology.binary_fill_holes(largest_mask)


In [44]:
clear_output()
w_container # display the widget

VBox(children=(BBoxWidget(classes=['tiger'], colors=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#…

In [47]:
output_to_txt(data, '../3-dataset/train/labels/') # output the data to txt files for YOLO training
delete_empty_labels_and_images('../3-dataset/train/labels/', '../3-dataset/train/images/') # delete empty labels

### Annotate validation set

In [48]:
path = '../3-dataset/valid/images/' # path to validation images
images = sorted(os.listdir(path))

annotations = {} # dictionary with key = image name, value = corresponding bbox
data = {} # dictionary with key = image name, value = list of list containing: [label_id, x1, y1, x2, y2, ...]
cur_img_idx = 0 # current image index
classes = ['tiger'] # list of classes

In [50]:
clear_output()
w_container # display the widget

VBox(children=(BBoxWidget(classes=['tiger'], colors=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#…

In [51]:
output_to_txt(data, '../3-dataset/valid/labels/') # output the data to txt files for YOLO training
delete_empty_labels_and_images('../3-dataset/valid/labels/', '../3-dataset/valid/images/') # delete empty labels and

### Clean up

In [52]:
move_files('../3-dataset/train/images/', '../3-dataset/train/labels/', '../dataset/train/images/', '../dataset/train/labels/')
move_files('../3-dataset/valid/images/', '../3-dataset/valid/labels/', '../dataset/valid/images/', '../dataset/valid/labels/') # moves the data collected in 3-dataset to the actual dataset folder for use in YOLO training
move_source_vid('../1-source/', '../dataset/source/') # moves the videos from 1-source to dataset/sources for documentation/backup

In [53]:
if(input("Are you sure you want to clear the source videos, extracted images, and dataset? (y/n): ")) == 'y':
    clear_directory('../1-source/')
    clear_directory('../2-source-extracted/')
    clear_directory('../3-dataset/')

### YOLO Training

In [54]:
abspath_ds = os.path.abspath('../dataset/') # get the absolute path of the dataset folder
output_path = '../dataset/data.yaml' # path to the data.yaml file

create_yaml(classes, abspath_ds, output_path) # create the data.yaml file in the dataset folder

In [59]:
# YOLO Training
model = YOLO('../models/yolov8n-seg.pt')
model.train(data='../dataset/data.yaml', epochs=10, imgsz=640, batch=3, device='mps')

New https://pypi.org/project/ultralytics/8.0.117 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.114 🚀 Python-3.8.0 torch-2.0.1 MPS
[34m[1myolo/engine/trainer: [0mtask=segment, mode=train, model=../models/yolov8n-seg.pt, data=../dataset/data.yaml, epochs=10, patience=50, batch=3, imgsz=640, save=True, save_period=-1, cache=False, device=mps, workers=8, project=None, name=None, exist_ok=False, pretrained=False, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_ma

In [4]:
trained_model = YOLO('../models/yolov8n-seg.pt')
results = trained_model.predict(source='../dataset/source/Unique Tigers Collection 8K HDR 60FPS ULTRA HD.mp4', show=True)



    causing potential out-of-memory errors for large sources or long-running streams/videos.

    Usage:
        results = model(source=..., stream=True)  # generator of Results objects
        for r in results:
            boxes = r.boxes  # Boxes object for bbox outputs
            masks = r.masks  # Masks object for segment masks outputs
            probs = r.probs  # Class probabilities for classification outputs



KeyboardInterrupt: 