## Setting Up Your Environment - Install Required Libraries

In [1]:
# Install the Ultralytics YOLOv8 package and other requirements
!pip install ultralytics
!pip install -q opencv-python matplotlib


Collecting ultralytics
  Downloading ultralytics-8.3.153-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [2]:
# Import required libraries
import torch
from ultralytics import YOLO
import matplotlib.pyplot as plt
import cv2
import numpy as np
from google.colab.patches import cv2_imshow  # For displaying images in Colab

# Check if GPU is available
print(f"Is CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Current device: {torch.cuda.get_device_name(0)}")
else:
    print("Running on CPU. This will be much slower!")

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Is CUDA available: True
Current device: Tesla T4


## Download a Pre-trained YOLO Segmentation Model

In [3]:
from ultralytics import YOLO
# Load the YOLOv8 nano segmentation model
model = YOLO('yolov8n-seg.pt')
# Print model summary
print("Model summary:")
model.info()  # This prints directly to output


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-seg.pt to 'yolov8n-seg.pt'...


100%|██████████| 6.74M/6.74M [00:00<00:00, 134MB/s]


Model summary:
YOLOv8n-seg summary: 151 layers, 3,409,968 parameters, 0 gradients, 12.8 GFLOPs


(151, 3409968, 0, 12.772710400000001)

## Download and Prepare the Segmentation Dataset

In [4]:
# Download the COCO128-seg dataset (a subset of COCO with segmentation annotations)
!yolo task=segment mode=train data=coco128-seg.yaml epochs=1

print("\nDataset information:")
print("====================")
print("Number of classes: 80 (COCO dataset)")
print("Number of images: 128")
print("Annotations include both bounding boxes and segmentation masks")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt to 'yolo11n-seg.pt'...
100% 5.90M/5.90M [00:00<00:00, 101MB/s]
Ultralytics 8.3.153 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=coco128-seg.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=1, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=False, n

## Train Your YOLO Segmentation Model

In [5]:
# Train the model
print("Starting training...")
results = model.train(
    data='coco128-seg.yaml',  # Dataset
    epochs=10,                # Number of training epochs
    imgsz=640,                # Image size
    batch=8,                  # Batch size (smaller than detection due to higher memory usage)
    patience=3,               # Early stopping patience
    verbose=True              # Print detailed information
)

print("Training complete!")


Starting training...
Ultralytics 8.3.153 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=coco128-seg.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train2, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=3, perspective=0.0, plots=Tru

[34m[1mtrain: [0mScanning /content/datasets/coco128-seg/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100%|██████████| 128/128 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 842.6±439.3 MB/s, size: 52.5 KB)


[34m[1mval: [0mScanning /content/datasets/coco128-seg/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100%|██████████| 128/128 [00:00<?, ?it/s]


Plotting labels to runs/segment/train2/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.0005), 76 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/segment/train2[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/10      1.63G      1.322      3.008      2.016      1.258         44        640: 100%|██████████| 16/16 [00:05<00:00,  2.84it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 8/8 [00:02<00:00,  3.44it/s]

                   all        128        929      0.572      0.558      0.598      0.439      0.573      0.478      0.547      0.363






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       2/10      2.11G      1.312      2.855      1.754       1.29         42        640: 100%|██████████| 16/16 [00:04<00:00,  3.94it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 8/8 [00:01<00:00,  4.52it/s]

                   all        128        929      0.627      0.522      0.594      0.436       0.56      0.483      0.534      0.351






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       3/10      2.11G      1.237      2.729      1.639      1.227         62        640: 100%|██████████| 16/16 [00:03<00:00,  5.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 8/8 [00:01<00:00,  4.68it/s]

                   all        128        929      0.584      0.546      0.601       0.43      0.536      0.512      0.531      0.348






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       4/10      2.11G      1.243      2.711      1.701      1.262         52        640: 100%|██████████| 16/16 [00:04<00:00,  3.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 8/8 [00:01<00:00,  4.53it/s]

                   all        128        929      0.594      0.593      0.609      0.435      0.554      0.518       0.54      0.351
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 3 epochs. Best results observed at epoch 1, best model saved as best.pt.
To update EarlyStopping(patience=3) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






4 epochs completed in 0.008 hours.
Optimizer stripped from runs/segment/train2/weights/last.pt, 7.1MB
Optimizer stripped from runs/segment/train2/weights/best.pt, 7.1MB

Validating runs/segment/train2/weights/best.pt...
Ultralytics 8.3.153 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLOv8n-seg summary (fused): 85 layers, 3,404,320 parameters, 0 gradients, 12.6 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95):   0%|          | 0/8 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95):  12%|█▎        | 1/8 [00:00<00:01,  4.16it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95):  25%|██▌       | 2/8 [00:00<00:02,  2.51it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 8/8 [00:04<00:00,  1.76it/s]


                   all        128        929      0.572      0.558      0.598      0.435      0.573      0.478      0.544      0.361
                person         61        254      0.781      0.681      0.769      0.538      0.805      0.626      0.729      0.395
               bicycle          3          6      0.387      0.333      0.364       0.23      0.302      0.167      0.349      0.202
                   car         12         46      0.525      0.196      0.246      0.142      0.563      0.169      0.187     0.0929
            motorcycle          4          5      0.748          1      0.928      0.735      0.829      0.975      0.928      0.485
              airplane          5          6      0.746      0.981      0.913      0.653      0.606      0.776      0.727      0.534
                   bus          5          7      0.459      0.714       0.72      0.571      0.505      0.714       0.72      0.574
                 train          3          3      0.572      0.907   

## Evaluate Your Segmentation Model's Performance

In [6]:
# Validate the model on the validation set
val_results = model.val()

# Print the validation results
print("\nValidation Results:")
print("==================")
print(f"Box mAP@0.5 (detection metric): {val_results.box.map50:.4f}")
print(f"Mask mAP@0.5 (segmentation metric): {val_results.seg.map50:.4f}")
print(f"Mask mAP@0.5:0.95 (COCO metric): {val_results.seg.map:.4f}")
print(f"Mask precision: {val_results.seg.mp:.4f}")
print(f"Mask recall: {val_results.seg.mr:.4f}")

Ultralytics 8.3.153 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLOv8n-seg summary (fused): 85 layers, 3,404,320 parameters, 0 gradients, 12.6 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1508.0±578.8 MB/s, size: 53.4 KB)


[34m[1mval: [0mScanning /content/datasets/coco128-seg/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100%|██████████| 128/128 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95):   0%|          | 0/16 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95):   6%|▋         | 1/16 [00:00<00:05,  2.75it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95):  12%|█▎        | 2/16 [00:00<00:05,  2.48it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:04<00:00,  3.84it/s]


                   all        128        929      0.611      0.525      0.595      0.437      0.575      0.489      0.543      0.357
                person         61        254      0.834      0.651      0.769      0.537      0.807      0.626       0.73      0.393
               bicycle          3          6      0.281      0.167      0.272      0.219      0.293      0.167      0.266      0.178
                   car         12         46       0.64      0.194      0.245      0.145      0.566      0.171      0.188     0.0873
            motorcycle          4          5      0.832       0.99      0.928      0.752       0.83       0.98      0.928      0.485
              airplane          5          6      0.738      0.945       0.87      0.649      0.607      0.776       0.71      0.532
                   bus          5          7      0.502      0.714      0.719      0.571      0.504      0.714      0.719      0.574
                 train          3          3      0.445      0.667   