**Important information**

This notebook was meant to run on google colab in order to use a GPU to make training faster. It will need to be refactored to run on a local machine.

## Imports and initial checks

In [None]:
!nvidia-smi

Mon Apr  7 09:13:43 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   35C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
import os
HOME = os.getcwd()
print(HOME)

/content


In [None]:
%pip install "ultralytics<=8.3.40" supervision roboflow
import ultralytics
ultralytics.checks()

Ultralytics 8.3.40 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 41.2/112.6 GB disk)


## Get datasets

In [None]:
!mkdir {HOME}/datasets
%cd {HOME}/datasets

from google.colab import userdata
from roboflow import Roboflow

ROBOFLOW_API_KEY = userdata.get('ROBOFLOW_API_KEY')
rf = Roboflow(api_key=ROBOFLOW_API_KEY)

# Dataset 1: https://universe.roboflow.com/new-workspace-fkiyn/pig-detection-cne8r
project = rf.workspace("new-workspace-fkiyn").project("pig-detection-cne8r")
version = project.version(3)
dataset = version.download("yolov11")

# Dataset 2: https://universe.roboflow.com/li-yao-tseng/pig-tilpu
project = rf.workspace("li-yao-tseng").project("pig-tilpu")
version = project.version(2)
dataset = version.download("yolov11")

# Dataset 3: https://universe.roboflow.com/plp1/deteccao-porcos/dataset/2
project = rf.workspace("plp1").project("deteccao-porcos")
version = project.version(2)
dataset = version.download("yolov11")


/content/datasets
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Pig-detection-3 to yolov11:: 100%|██████████| 58531/58531 [00:01<00:00, 39052.71it/s]





Extracting Dataset Version Zip to Pig-detection-3 in yolov11:: 100%|██████████| 2006/2006 [00:00<00:00, 4872.44it/s]

loading Roboflow workspace...





loading Roboflow project...


Downloading Dataset Version Zip in Pig-2 to yolov11:: 100%|██████████| 58694/58694 [00:02<00:00, 21042.32it/s]





Extracting Dataset Version Zip to Pig-2 in yolov11:: 100%|██████████| 2600/2600 [00:00<00:00, 7104.37it/s]

loading Roboflow workspace...





loading Roboflow project...


Downloading Dataset Version Zip in Deteccao-Porcos-2 to yolov11:: 100%|██████████| 553393/553393 [00:08<00:00, 68659.47it/s]





Extracting Dataset Version Zip to Deteccao-Porcos-2 in yolov11:: 100%|██████████| 2892/2892 [00:02<00:00, 1155.46it/s]


## Combine datasets

### Create new directory

In [None]:
%pwd

'/content/datasets'

In [None]:
!mkdir {HOME}/datasets/combined_datasets

### Separate 4th dataset in train val test

In [None]:
import random
import shutil
import os


def split_dataset(base_folder, train_ratio=0.70, valid_ratio=0.20, test_ratio=0.10, seed=42):
    random.seed(seed)

    image_folder = os.path.join(base_folder, 'train', 'images')
    label_folder = os.path.join(base_folder, 'train', 'labels')

    images = os.listdir(image_folder)
    random.shuffle(images)  # shuffle for random split

    total = len(images)
    train_end = int(train_ratio * total)
    valid_end = train_end + int(valid_ratio * total)

    splits = {
        'train_': images[:train_end],
        'valid': images[train_end:valid_end],
        'test': images[valid_end:]
    }

    for split, files in splits.items():
        img_dst = os.path.join(base_folder, split, 'images')
        lbl_dst = os.path.join(base_folder, split, 'labels')
        os.makedirs(img_dst, exist_ok=True)
        os.makedirs(lbl_dst, exist_ok=True)

        for img_file in files:
            # Copy image
            shutil.copy(os.path.join(image_folder, img_file), os.path.join(img_dst, img_file))

            # Copy label (labels has same name as image)
            label_file = os.path.splitext(img_file)[0] + '.txt'
            shutil.copy(os.path.join(label_folder, label_file), os.path.join(lbl_dst, label_file))

    # Delete original train folder
    shutil.rmtree(os.path.join(base_folder, 'train', 'images'))
    shutil.rmtree(os.path.join(base_folder, 'train', 'labels'))
    shutil.rmtree(os.path.join(base_folder, 'train'))

    # Rename new train folder
    os.rename("train_", "train")

    print("Dataset successfully split into train, valid, and test.")

split_dataset('Pig-detection-3')

Dataset successfully split into train, valid, and test.


### Concatenate folders

In [None]:
def concatenate_folders(destination_folder):
  os.makedirs(destination_folder, exist_ok=True)

  for dataset in ['Deteccao-Porcos-2', 'Pig-2', 'Pig-detection-3']:
    for split in ['test', 'train', 'valid']:
      for  folder in ['images', 'labels']:
        os.makedirs(os.path.join(destination_folder, split, folder), exist_ok=True)
        for file in os.listdir(os.path.join(dataset, split, folder)):
          shutil.copy(os.path.join(dataset, split, folder, file), os.path.join(destination_folder, split, folder, file))

concatenate_folders('combined_datasets')

### Create yaml file

In [None]:
import yaml


def create_data_yaml(combined_dataset_folder, num_classes, class_names):
    # Define the paths
    data_yaml = {
        'train': '../train/images',
        'val': '../valid/images',
        'test': '../test/images',
        'nc': num_classes,
        'names': class_names
    }

    # Write to the YAML file
    with open(os.path.join(combined_dataset_folder, 'data.yaml'), 'w') as yaml_file:
        yaml.dump(data_yaml, yaml_file, default_flow_style=False)

    print("data.yml file created in", combined_dataset_folder)

combined_dataset_folder = 'combined_datasets'
num_classes = 1
class_names = ['Pig']

create_data_yaml(combined_dataset_folder, num_classes, class_names)

data.yml file created in combined_datasets


In [None]:
path_to_combined_datasets = '/content/datasets/combined_datasets'

## Train new model

In [None]:
!yolo task=detect mode=train model=yolo11s.pt data={path_to_combined_datasets}/data.yaml epochs=6 imgsz=832 plots=True batch=32

New https://pypi.org/project/ultralytics/8.3.103 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.40 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11s.pt, data=/content/datasets/combined_datasets/data.yaml, epochs=6, time=None, patience=100, batch=32, imgsz=832, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, emb

In [None]:
!yolo task=detect mode=val model={HOME}/datasets/runs/detect/train2/weights/best.pt data={path_to_combined_datasets}/data.yaml

Ultralytics 8.3.40 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLO11s summary (fused): 238 layers, 9,413,187 parameters, 0 gradients, 21.3 GFLOPs
[34m[1mval: [0mScanning /content/datasets/combined_datasets/valid/labels.cache... 744 images, 0 backgrounds, 0 corrupt: 100% 744/744 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% 47/47 [00:18<00:00,  2.56it/s]
                   all        744       7203      0.917      0.911      0.962      0.612
Speed: 1.1ms preprocess, 12.4ms inference, 0.0ms loss, 1.8ms postprocess per image
Results saved to [1mruns/detect/val[0m
💡 Learn more at https://docs.ultralytics.com/modes/val


In [None]:
!yolo task=detect mode=test model={HOME}/datasets/runs/detect/train2/weights/best.pt data={path_to_combined_datasets}/data.yaml

Traceback (most recent call last):
  File "/usr/local/bin/yolo", line 8, in <module>
    sys.exit(entrypoint())
             ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ultralytics/cfg/__init__.py", line 911, in entrypoint
    raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}")
ValueError: Invalid 'mode=test'. Valid modes are {'val', 'train', 'export', 'benchmark', 'predict', 'track'}.

    Arguments received: ['yolo', 'task=detect', 'mode=test', 'model=/content/datasets/runs/detect/train2/weights/best.pt', 'data=/content/datasets/combined_datasets/data.yaml']. Ultralytics 'yolo' commands use the following syntax:

        yolo TASK MODE ARGS

        Where   TASK (optional) is one of {'classify', 'segment', 'obb', 'pose', 'detect'}
                MODE (required) is one of {'val', 'train', 'export', 'benchmark', 'predict', 'track'}
                ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override

In [None]:
from google.colab import files
files.download('/content/datasets/runs/detect/train2/weights/best.pt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>