In [1]:
# Step 1: Setup Environment
# Install dependencies (if needed)
!pip install ultralytics
!pip install torch torchvision torchaudio

import torch
import os
import shutil
import random
import numpy as np
from sklearn.model_selection import KFold
from ultralytics import YOLO

Collecting ultralytics
  Downloading ultralytics-8.3.97-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

In [2]:

# Check GPU availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

Using device: cuda


In [3]:
!pip install ultralytics opencv-python matplotlib

import torch
import os
import shutil
import random
import numpy as np
import glob
from sklearn.model_selection import KFold
from ultralytics import YOLO
import kagglehub




In [4]:

# Step 2: Download & Load Dataset
path = kagglehub.dataset_download("karanwxlia/underwater-trash-detection")
print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/karanwxlia/underwater-trash-detection?dataset_version_number=1...


100%|██████████| 213M/213M [00:11<00:00, 19.2MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/karanwxlia/underwater-trash-detection/versions/1


In [5]:

dataset_path = "/root/.cache/kagglehub/datasets/karanwxlia/underwater-trash-detection/versions/1"
yolo_path = os.path.join(dataset_path, "utd2.v8i.yolov8")


In [6]:

# Define paths for consolidated dataset
all_images_path = "/kaggle/working/all_images"
all_labels_path = "/kaggle/working/all_labels"
os.makedirs(all_images_path, exist_ok=True)
os.makedirs(all_labels_path, exist_ok=True)

In [7]:
# Move images and labels from train, valid, and test to single folders
for split in ["train", "valid", "test"]:
    split_images = os.path.join(yolo_path, split, "images")
    split_labels = os.path.join(yolo_path, split, "labels")

    for img in glob.glob(os.path.join(split_images, "*.jpg")):
        shutil.move(img, all_images_path)
    for lbl in glob.glob(os.path.join(split_labels, "*.txt")):
        shutil.move(lbl, all_labels_path)

print("Merged all images and labels into all_images/ and all_labels/.")


Merged all images and labels into all_images/ and all_labels/.


In [8]:

# Step 3: Prepare 5-Fold Cross Validation
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)


In [9]:

# Get all images
all_images = sorted(glob.glob(os.path.join(all_images_path, "*.jpg")))
random.shuffle(all_images)

assert len(all_images) > 0, "No images found! Check dataset path."

total_images = len(all_images)
print(f"Total images: {total_images}")

Total images: 9576


In [10]:

# Step 4: Perform Cross Validation
results = []
for fold, (train_idx, val_idx) in enumerate(kf.split(all_images)):
    print(f"\nFold {fold + 1}/{num_folds}...")

    fold_path = f"/kaggle/working/fold_{fold + 1}"
    os.makedirs(fold_path, exist_ok=True)

    fold_train = os.path.join(fold_path, "train")
    fold_val = os.path.join(fold_path, "val")
    os.makedirs(os.path.join(fold_train, "images"), exist_ok=True)
    os.makedirs(os.path.join(fold_train, "labels"), exist_ok=True)
    os.makedirs(os.path.join(fold_val, "images"), exist_ok=True)
    os.makedirs(os.path.join(fold_val, "labels"), exist_ok=True)

    # Move training data
    for idx in train_idx:
        img_path = all_images[idx]
        shutil.copy(img_path, os.path.join(fold_train, "images"))
        lbl_path = img_path.replace("all_images", "all_labels").replace(".jpg", ".txt")
        if os.path.exists(lbl_path):
            shutil.copy(lbl_path, os.path.join(fold_train, "labels"))

    # Move validation data
    for idx in val_idx:
        img_path = all_images[idx]
        shutil.copy(img_path, os.path.join(fold_val, "images"))
        lbl_path = img_path.replace("all_images", "all_labels").replace(".jpg", ".txt")
        if os.path.exists(lbl_path):
            shutil.copy(lbl_path, os.path.join(fold_val, "labels"))



Fold 1/5...

Fold 2/5...

Fold 3/5...

Fold 4/5...

Fold 5/5...


In [21]:

data_yaml = "/content/data.yaml"

model = YOLO("yolov10n.pt")

results.append(model.train(
        data=data_yaml,
        epochs=25,
        imgsz=640,
        batch=16,
        device=device
    ))

Ultralytics 8.3.97 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov10n.pt, data=/content/data.yaml, epochs=25, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=8, project=None, name=train6, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, sho

[34m[1mtrain: [0mScanning /kaggle/working/fold_1/train/labels.cache... 7660 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7660/7660 [00:00<?, ?it/s]






[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /kaggle/working/fold_1/val/labels.cache... 1916 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1916/1916 [00:00<?, ?it/s]


Plotting labels to runs/detect/train6/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 95 weight(decay=0.0), 108 weight(decay=0.0005), 107 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train6[0m
Starting training for 25 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/25      4.92G      2.858      7.451      3.141         35        640: 100%|██████████| 479/479 [02:38<00:00,  3.03it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.64it/s]


                   all       1916       2817      0.274      0.237      0.187     0.0931

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/25      4.92G      3.162      5.094      3.305         31        640: 100%|██████████| 479/479 [02:34<00:00,  3.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:15<00:00,  3.83it/s]


                   all       1916       2817      0.559      0.143      0.156     0.0682

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/25      4.92G      3.187      4.586      3.319         24        640: 100%|██████████| 479/479 [02:36<00:00,  3.06it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.59it/s]

                   all       1916       2817      0.437      0.426      0.372       0.19






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/25      4.92G       3.11      4.315      3.268         33        640: 100%|██████████| 479/479 [02:33<00:00,  3.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.55it/s]


                   all       1916       2817       0.45      0.361      0.346      0.176

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/25      4.92G       3.01      3.989       3.16         32        640: 100%|██████████| 479/479 [02:38<00:00,  3.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.64it/s]

                   all       1916       2817       0.54      0.466      0.468      0.258






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/25      4.92G       2.96      3.812      3.154         56        640: 100%|██████████| 479/479 [02:41<00:00,  2.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:17<00:00,  3.42it/s]

                   all       1916       2817       0.66       0.52      0.578      0.344






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/25      4.92G      2.886      3.577      3.102         25        640: 100%|██████████| 479/479 [02:47<00:00,  2.87it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:17<00:00,  3.35it/s]


                   all       1916       2817      0.584      0.528      0.549      0.326

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/25      4.92G      2.827      3.437      3.031         25        640: 100%|██████████| 479/479 [02:48<00:00,  2.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:17<00:00,  3.45it/s]

                   all       1916       2817      0.661      0.561      0.619      0.385






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/25      4.92G      2.787      3.263      3.012         37        640: 100%|██████████| 479/479 [02:42<00:00,  2.94it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:17<00:00,  3.37it/s]

                   all       1916       2817      0.666      0.554      0.601      0.384






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/25      4.92G      2.748      3.145      2.983         24        640: 100%|██████████| 479/479 [02:46<00:00,  2.87it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:18<00:00,  3.24it/s]

                   all       1916       2817      0.689      0.574      0.649      0.416






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/25      4.92G      2.734      3.081      2.967         36        640: 100%|██████████| 479/479 [02:46<00:00,  2.88it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:17<00:00,  3.47it/s]

                   all       1916       2817      0.721      0.619      0.684      0.433






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/25      4.92G      2.676      2.934      2.931         34        640: 100%|██████████| 479/479 [02:43<00:00,  2.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.54it/s]

                   all       1916       2817      0.741      0.617      0.701      0.459






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/25      4.92G      2.631      2.859      2.917         31        640: 100%|██████████| 479/479 [02:43<00:00,  2.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:18<00:00,  3.31it/s]

                   all       1916       2817      0.748      0.664       0.73      0.488






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/25      4.92G      2.607       2.79      2.885         33        640: 100%|██████████| 479/479 [02:51<00:00,  2.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.65it/s]

                   all       1916       2817       0.76      0.659      0.742      0.497






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/25      4.92G      2.573      2.661      2.849         34        640: 100%|██████████| 479/479 [02:52<00:00,  2.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:19<00:00,  3.05it/s]

                   all       1916       2817      0.749      0.706      0.768      0.526





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/25      4.92G      2.546      2.156       2.94         12        640: 100%|██████████| 479/479 [02:33<00:00,  3.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.60it/s]

                   all       1916       2817      0.787      0.691      0.771      0.532






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/25      4.92G       2.49      2.005      2.893         14        640: 100%|██████████| 479/479 [02:29<00:00,  3.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.59it/s]

                   all       1916       2817      0.797      0.696      0.787      0.544






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/25      4.92G      2.448      1.949      2.854         21        640: 100%|██████████| 479/479 [02:28<00:00,  3.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.65it/s]

                   all       1916       2817      0.817      0.742      0.817      0.581






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/25      4.92G      2.378      1.825      2.809         16        640: 100%|██████████| 479/479 [02:26<00:00,  3.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.61it/s]

                   all       1916       2817      0.808      0.729      0.808      0.575






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/25      4.92G      2.338       1.73       2.76         14        640: 100%|██████████| 479/479 [02:41<00:00,  2.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:18<00:00,  3.19it/s]

                   all       1916       2817      0.825      0.756      0.832      0.598






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/25      4.92G      2.309      1.652       2.74         16        640: 100%|██████████| 479/479 [02:43<00:00,  2.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.58it/s]

                   all       1916       2817      0.844      0.768      0.855       0.62






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/25      4.92G      2.251      1.584      2.693         18        640: 100%|██████████| 479/479 [02:43<00:00,  2.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:16<00:00,  3.58it/s]

                   all       1916       2817      0.829       0.77       0.85      0.622






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/25      4.92G      2.209      1.523      2.668         15        640: 100%|██████████| 479/479 [02:40<00:00,  2.99it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:21<00:00,  2.81it/s]

                   all       1916       2817      0.838      0.792      0.866       0.64






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/25      4.92G      2.167      1.468       2.63         17        640: 100%|██████████| 479/479 [02:43<00:00,  2.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:19<00:00,  3.00it/s]

                   all       1916       2817      0.868       0.78      0.868      0.645






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/25      4.92G      2.145      1.419      2.614         17        640: 100%|██████████| 479/479 [02:45<00:00,  2.89it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:17<00:00,  3.52it/s]

                   all       1916       2817      0.869      0.786      0.875      0.654






25 epochs completed in 1.257 hours.
Optimizer stripped from runs/detect/train6/weights/last.pt, 5.7MB
Optimizer stripped from runs/detect/train6/weights/best.pt, 5.7MB

Validating runs/detect/train6/weights/best.pt...
Ultralytics 8.3.97 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLOv10n summary (fused): 125 layers, 2,695,586 parameters, 0 gradients, 8.2 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 60/60 [00:22<00:00,  2.71it/s]


                   all       1916       2817       0.87      0.783      0.875      0.654
                   Bio        403        557      0.877      0.772      0.868      0.662
                   Rov        341        427      0.815      0.787      0.862      0.706
                 Trash       1541       1833      0.919      0.791      0.896      0.593
Speed: 0.2ms preprocess, 3.1ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/train6[0m


In [23]:
import pandas as pd
results = pd.read_csv("/content/runs/detect/train6/results.csv")
results.head()

Unnamed: 0,epoch,time,train/box_loss,train/cls_loss,train/dfl_loss,metrics/precision(B),metrics/recall(B),metrics/mAP50(B),metrics/mAP50-95(B),val/box_loss,val/cls_loss,val/dfl_loss,lr/pg0,lr/pg1,lr/pg2
0,1,221.285,2.85768,7.45068,3.14114,0.27427,0.23709,0.18729,0.09309,3.37263,5.65464,3.64729,0.000475,0.000475,0.000475
1,2,391.917,3.16191,5.09425,3.30478,0.55932,0.14276,0.15643,0.06821,3.65047,5.62289,3.9322,0.000914,0.000914,0.000914
2,3,565.979,3.18683,4.58588,3.31923,0.43743,0.4257,0.37248,0.19031,3.35558,4.02886,3.57356,0.001315,0.001315,0.001315
3,4,736.999,3.11037,4.31507,3.26765,0.44952,0.36078,0.34644,0.17562,3.35186,4.18081,3.6095,0.001259,0.001259,0.001259
4,5,912.584,3.00955,3.98928,3.16003,0.53954,0.46585,0.46764,0.25786,3.20248,3.47741,3.39744,0.001203,0.001203,0.001203


In [24]:
import pandas as pd

# Load results CSV
results = pd.read_csv("/content/runs/detect/train6/results.csv")

# Extract the last epoch's metrics
final_metrics = results.iloc[-1]  # Get last row

# Print all relevant metrics
print("\nFinal Cross-Validation Results:")
print(f"Epoch: {int(final_metrics['epoch'])}")
print(f"Training Box Loss: {final_metrics['train/box_loss']:.4f}")
print(f"Training Class Loss: {final_metrics['train/cls_loss']:.4f}")
print(f"Training DFL Loss: {final_metrics['train/dfl_loss']:.4f}")
print(f"Validation Box Loss: {final_metrics['val/box_loss']:.4f}")
print(f"Validation Class Loss: {final_metrics['val/cls_loss']:.4f}")
print(f"Validation DFL Loss: {final_metrics['val/dfl_loss']:.4f}")
print(f"Precision: {final_metrics['metrics/precision(B)']:.4f}")
print(f"Recall: {final_metrics['metrics/recall(B)']:.4f}")
print(f"mAP50: {final_metrics['metrics/mAP50(B)']:.4f}")
print(f"mAP50-95: {final_metrics['metrics/mAP50-95(B)']:.4f}")



Final Cross-Validation Results:
Epoch: 25
Training Box Loss: 2.1446
Training Class Loss: 1.4193
Training DFL Loss: 2.6138
Validation Box Loss: 2.2352
Validation Class Loss: 1.3197
Validation DFL Loss: 2.5484
Precision: 0.8689
Recall: 0.7861
mAP50: 0.8749
mAP50-95: 0.6539
