In [1]:
# Install Ultralytics and Roboflow
!pip install -q roboflow ultralytics

# Verify the installation and check for GPU
import ultralytics
ultralytics.checks()


Ultralytics 8.4.19 🚀 Python-3.12.12 torch-2.10.0+cu128 CUDA:0 (Tesla T4, 14913MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 43.6/112.6 GB disk)


# YOLOv8 Instance Segmentation Training Pipeline
This notebook handles:
1. Downloading an Instance Segmentation dataset from Roboflow in `yolov8` format.
2. Cleaning the dataset of mixed annotation types (removing stray bounding boxes).
3. Training a pretrained `yolov8n-seg.pt` model.
4. Maximizing T4 GPU VRAM utilization by forcing batch size and switching to SGD optimizer.
5. Zipping and downloading the full results folder locally.

Download dataset from Roboflow

In [2]:
import os
from roboflow import Roboflow

# Initialize Roboflow with your API key
rf = Roboflow(api_key="T8Z5l6LmNEM6Ny69OfQf")
project = rf.workspace("computer-vision-yyh42").project("segmentation_model-bdcct")
version = project.version(1)
dataset = version.download("yolov8")

# Extract the path to the data.yaml file needed for training
data_yaml_path = os.path.join(dataset.location, "data.yaml")
print(f"\nDataset fully downloaded and ready at: {dataset.location}")
print(f"Data configuration file: {data_yaml_path}")


loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Segmentation_model-1 to yolov8:: 100%|██████████| 12177/12177 [00:00<00:00, 14771.41it/s]





Extracting Dataset Version Zip to Segmentation_model-1 in yolov8:: 100%|██████████| 682/682 [00:00<00:00, 1607.55it/s]


Dataset fully downloaded and ready at: /content/Segmentation_model-1
Data configuration file: /content/Segmentation_model-1/data.yaml





## Dataset Cleaning Strategy
**The Issue:** If YOLOv8 detects a mixed dataset format (e.g. some polygons, some standard bounding boxes), it will immediately drop *all* segmentation masks and attempt to train as an Object Detection model, ultimately causing an index crash when calculating the segmentation loss.\n
**The Fix:** This script ensures only valid polygons (>5 values) remain in the `.txt` files.

In [3]:
import os
import glob

def clean_yolo_segmentation_labels(dataset_path):
    # Find all .txt files in the train, valid, and test label folders
    label_files = glob.glob(f"{dataset_path}/**/labels/*.txt", recursive=True)

    removed_lines = 0
    for file_path in label_files:
        with open(file_path, 'r') as f:
            lines = f.readlines()

        clean_lines = []
        for line in lines:
            parts = line.strip().split()
            # YOLOv8 segmentation polygons have many points (at least class ID + 6 coordinates)
            # Standard bounding boxes only have exactly 5 values (class, x_center, y_center, width, height)
            if len(parts) > 5:
                clean_lines.append(line)
            else:
                removed_lines += 1

        # Overwrite the file with the cleaned annotations
        if len(clean_lines) != len(lines):
            with open(file_path, 'w') as f:
                f.writelines(clean_lines)

    print(f"Dataset Cleaned! Removed {removed_lines} stray bounding box annotations.")

# Pass the path to your dataset from the Roboflow download cell
clean_yolo_segmentation_labels("/content/Segmentation_model-1")


Dataset Cleaned! Removed 22 stray bounding box annotations.


## Training Optimization (Maximum GPU Utilization)
By default, `AutoBatch` restricts VRAM usage to ~60%. To push the Colab T4 GPU to its 15GB limit:
1. **`optimizer="SGD"`**: `AdamW` uses twice the memory of `SGD`. Switching to `SGD` frees up GigaBytes of VRAM.
2. **`batch=48`**: With the extra VRAM, we force a massive static batch size.
3. **`workers=8` & `cache=True`**: Caches dataset images in Colab RAM for instant loads.

In [4]:
from ultralytics import YOLO

model = YOLO('yolov8n-seg.pt')

results = model.train(
    data=data_yaml_path,
    epochs=50,
    imgsz=640,

    # 1. FORCE BATCH SIZE (Override AutoBatch)
    batch=48,                   # A batch size of 40-48 will push the T4 right to its 15GB limit

    # 2. REDUCE MEMORY OVERHEAD
    workers=8,
    cache=True,
    optimizer='SGD',            # The AdamW optimizer uses 2x more memory than SGD. SGD allows much larger batches!

    project="Weld_Segmentation",
    name="yolo_seam_seg_fast",
    plots=True
)


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov8n-seg.pt to 'yolov8n-seg.pt': 100% ━━━━━━━━━━━━ 6.7MB 134.5MB/s 0.1s
Ultralytics 8.4.19 🚀 Python-3.12.12 torch-2.10.0+cu128 CUDA:0 (Tesla T4, 14913MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=48, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/Segmentation_model-1/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, end2end=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=

Evaluate and download the best weights

In [7]:
import os
import shutil
from google.colab import files

# 1. The path to the folder YOLOv8 just created
results_folder = "runs/segment/Weld_Segmentation/yolo_seam_seg_fast"
zip_name = "yolo_seam_seg_fast_results"

if os.path.exists(results_folder):
    print(f"Compressing the results folder: {results_folder} ...")

    # 2. Create a zip archive of the entire directory
    shutil.make_archive(zip_name, 'zip', results_folder)
    print(f"Compression complete! Created {zip_name}.zip")

    # 3. Trigger the browser download
    print("Downloading to your local machine...")
    files.download(f"{zip_name}.zip")
else:
    print(f"Error: Could not find the folder at {results_folder}. Did the training finish successfully?")


Compressing the results folder: runs/segment/Weld_Segmentation/yolo_seam_seg_fast ...
Compression complete! Created yolo_seam_seg_fast_results.zip
Downloading to your local machine...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>