# Ultralytics Object Detection

In [None]:
import os

from dotenv import load_dotenv

load_dotenv()


def get_token(token_name: str) -> str:
    """Get a token from the environment variables

    Args:
        token_name (str): The name of the token to get

    Returns:
        str: The token
    """
    token = os.environ.get(token_name)
    if token is None:
        raise ValueError(f"{token_name} not found in environment variables")
    return token


In [None]:
from glob import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch

In [None]:
import os

# Connect to Google Drive
from google.colab import drive

if not os.path.exists('/content/drive'):
  drive.mount('/content/drive')

In [None]:
# Update as needed
base_dir = os.getcwd()

if not os.path.exists(base_dir):
  raise FileNotFoundError(f'Base directory {base_dir} does not exist')

# Define the path where Ultralytics will store the final model and training metrics
output_dir = base_dir + '/export'

if not os.path.exists(output_dir):
    raise FileNotFoundError(f"No such directory {output_dir}!")

In [None]:
%pip install -q ultralytics roboflow
import ultralytics
from roboflow import Roboflow
from ultralytics import YOLO
ultralytics.checks()

## YOLO11 Model Variants

YOLO11 builds on the versatility of YOLOv8 and supports multiple **computer vision tasks** using different **prefixes** and **sizes**.

**Model Variants and Tasks** </br>

| **Model**      | **Filename Prefix**   | **Task**                     |
|---------------|---------------------|------------------------------|
| YOLO11       | `yolo11`             | Object Detection             |
| YOLO11-seg   | `yolo11-seg`         | Instance Segmentation        |
| YOLO11-pose  | `yolo11-pose`        | Pose Estimation (Keypoints)  |
| YOLO11-obb   | `yolo11-obb`         | Oriented Object Detection    |
| YOLO11-cls   | `yolo11-cls`         | Image Classification         |

**Available Model Sizes** </br>

Each model type comes in multiple sizes for different performance needs:

- **n** (Nano) → Smallest, optimized for low-power devices  
- **s** (Small) → Balanced for speed and accuracy  
- **m** (Medium) → More accurate, moderate speed  
- **l** (Large) → High accuracy, requires more computing power  
- **x** (Extra Large) → Highest accuracy, most computationally expensive  

**Example Usage** </br>

To use a specific model:
```python
from ultralytics import YOLO

model = YOLO("yolo11s-seg.pt")  # Load the small version of YOLO11 for instance segmentation
```

## Download Dataset from Roboflow

In [None]:
HOME = '/content'
!rm -rf {HOME}/datasets
!mkdir -p {HOME}/datasets
%cd {HOME}/datasets

# Update the values for WORKSPACE_ID, PROJECT_ID, API_KEY, and PROJECT_VERSION
WORKSPACE_ID = get_token("ROBOFLOW_WORKSPACE_ID")
PROJECT_ID = get_token("ROBOFLOW_PROJECT_ID")
API_KEY = get_token("ROBOFLOW_API_KEY")
PROJECT_VERSION = get_token("ROBOFLOW_PROJECT_VERSION")

rf = Roboflow(api_key=API_KEY)
project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)
version = project.version(PROJECT_VERSION)
dataset = version.download("yolov11")

## Training the YOLO Model with Ultralytics

### General Information


**Epochs**

The `epochs` parameter defines how many complete passes the model makes through the entire training dataset. Just like in real life, repeating the practice (processing the data) helps the model learn and improve. However, performance gains diminish after a certain point, and too many epochs can lead to overfitting.

**Data Augmentation**

Data augmentation artificially increases the diversity of your training data, helping the model generalize better. While services like Roboflow can apply augmentations when you prepare your dataset, the Ultralytics API (`model.train()`) also has built-in augmentation capabilities.

You can find the full list of augmentations controllable via the Ultralytics training configuration here: [Ultralytics Configuration Docs](https://docs.ultralytics.com/usage/cfg/) (Check the augmentation section).

**Our Augmentation Strategy & Cautions:**

* **Mosaic:** We will utilize the `mosaic` augmentation (`mosaic=1.0` in parameters). It's a powerful technique that combines multiple images, improving detection of objects in various contexts and scales.
* **`fliplr` (Flip Left-Right):** **Be very careful with this!** `fliplr` flips the image horizontally. If your task involves directionality (e.g., detecting *left* vs. *right* lane lines), this augmentation will flip the image *but not the labels*. This means your model will incorrectly learn from images where the visual right lane is labeled as left, and vice-versa. **For such tasks, set `fliplr=0.0` to disable it.**
* **Combined Augmentations:** Be mindful if you've already applied augmentations via Roboflow (or another service). Applying heavy augmentations *both* during dataset preparation *and* during Ultralytics training might overlay excessively, potentially making images unrecognizable or confusing for the model. Adjust settings to avoid overly strong combined effects.

| **Metric / Loss**  | **Explanation** |
|-------------------|---------------|
| **Box Loss (`box_loss`)** | Measures how well the model's predicted bounding boxes match the ground truth. Lower is better. |
| **Classification Loss (`cls_loss`)** | Evaluates how accurately the model assigns the correct class to detected objects. Lower is better. |
| **Distribution Focal Loss (`dfl_loss`)** | Helps refine bounding box locations by improving precision at the edges. Lower is better. |
| **Precision (`P`)** | The proportion of predicted objects that are correct (True Positives / (True Positives + False Positives)). Higher means fewer false positives. |
| **Recall (`R`)** | The proportion of actual objects that were detected (True Positives / (True Positives + False Negatives)). Higher means fewer false negatives. |
| **mAP50 (`Mean Average Precision @ IoU 0.5`)** | Measures how well predicted boxes match ground truth at **Intersection over Union (IoU) ≥ 0.5**. Higher means better accuracy. |
| **mAP50-95 (`Mean Average Precision @ IoU 0.5:0.95`)** | A stricter evaluation, averaging mAP over IoU thresholds from **0.5 to 0.95** (harder to score high). Higher means better performance. |

### Train

In [None]:
gpu_count = torch.cuda.device_count()
device_str = ",".join(str(i) for i in range(gpu_count)) if gpu_count > 0 else "cpu"

print(f"🧠 Training on: {device_str} ({gpu_count} GPU(s) detected)")

# model = YOLO("yolo11n-seg.pt")
model = YOLO("yolo11s-seg.pt") # Load the nano version of YOLO11 for instance segmentation

results = model.train(
    data=f"{dataset.location}/data.yaml",
    epochs=100,
    imgsz=640,
    batch=8 * max(gpu_count, 1), # Scale batch size with GPU count
    workers=2,
    device=device_str, # ✅ Multi-GPU support via comma-separated string
    fliplr=0.5,
    mosaic=1.0,
    degrees=10.0,
    translate=0.1,
    scale=0.5,
    shear=2.0,
    perspective=0.001,
    project=output_dir + '/runs/segment', # Set project to create the 'runs/segment' subfolders
    name='train'
)

### Evaluate

In [None]:
# Load the trained YOLO model from the specified path
model_path = base_dir + '/export/runs/segment/train/weights/best.pt'
model = YOLO(model_path)

# Run evaluation on the test split using the provided dataset configuration
metrics = model.val(split='test', data=f"{dataset.location}/data.yaml")

### Export as ONNX

In [None]:
model.export(format="onnx")