# Classification with YOLOv8
#### Variant Testing: Choosing whether to use YOLOV8N, YOLOV8S or YOLOV8M.

In [13]:
import os
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
import torch
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# setting random seed
random_seed = 42
custom_epochs = 20

# source folder and classes
source_dir = Path("/home/shared-data/corrosion_images")
classes = ["corrosion", "no_corrosion"]

# new folder structure to be created
output_dir = Path("/home/liva/dataset_variants")
train_dir = output_dir / "train"
val_dir = output_dir / "val"

## 1. Create Train/Val/Test Dataset

In [None]:
# creating the directories
for split in [train_dir, val_dir]:
    for cls in classes:
        (split / cls).mkdir(parents=True, exist_ok=True)

# split and copy images for each class
for cls in classes:
    cls_path = source_dir / cls
    images = list(cls_path.glob("*.jpg")) + list(cls_path.glob("*.png")) + list(cls_path.glob("*.jpeg"))
    
    # 80/20 for training and validation
    train_imgs, val_imgs = train_test_split(
        images, 
        test_size=0.2, 
        random_state=random_seed,
        shuffle=True
    )
    
    # copying to training
    for img in train_imgs:
        shutil.copy2(img, train_dir / cls / img.name)
    
    # copying to validation
    for img in val_imgs:
        shutil.copy2(img, val_dir / cls / img.name)

print(f"Train set: {len(list(train_dir.rglob('*.*')))} images")
print(f"Val set: {len(list(val_dir.rglob('*.*')))} images")


Yolov8 has multiple models. They range from nano, to small, to medium and to others (not experimented with in this project).
I will check out these three and evaluate their results in the project resport.

Below: set random seed for training

In [None]:
random_seed = 42
name = f"seed{random_seed}"

## Nano YOLOv8

In [None]:
random_seed = 999
model_nano = YOLO("yolov8n-cls.pt")
results = model_nano.train(
    data=output_dir,
    epochs=custom_epochs,
    imgsz=256,
    batch=64,
    device='0',
    optimizer='AdamW',
    lr0=0.001,
    lrf=0.1,
    momentum=0.9,
    weight_decay=0.0005,
    dropout=0.2,
    name=f"{name}_{custom_epochs}epochs_yoloN",
    seed=random_seed
)

In [32]:
# get metrics
metrics_nano = model_nano.val()

Ultralytics 8.3.217 üöÄ Python-3.10.12 torch-2.9.0+cu128 CUDA:0 (Tesla T4, 14916MiB)
YOLOv8n-cls summary (fused): 30 layers, 1,437,442 parameters, 0 gradients, 3.3 GFLOPs
[34m[1mtrain:[0m /home/liva/dataset/train... found 3199 images in 2 classes ‚úÖ 
[34m[1mval:[0m /home/liva/dataset/val... found 800 images in 2 classes ‚úÖ 
[34m[1mtest:[0m None...
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 2997.7¬±1343.5 MB/s, size: 1310.6 KB)
[K[34m[1mval: [0mScanning /home/liva/dataset/val... 800 images, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 800/800 666.4Kit/s 0.0s
[34m[1mval: [0m/home/liva/dataset/val/corrosion/6074246.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/liva/dataset/val/no_corrosion/16137439.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/liva/dataset/val/no_corrosion/6067519.jpg: corrupt JPEG restored and saved
[K               classes   top1_acc   top5_acc: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 50/

In [None]:
print("Accuracy of YOLOv8N", metrics_nano.top1)
# plot_train_val_loss("runs/classify/seed42_20epochs_yoloN/results.csv", title="Training vs Validation Loss, YOLOv8n", save_path="output/seed42_20epochs_yoloN_AAA")

Accuracy of YOLOv8n on Seed 42: 0.987500011920929


## Small YOLOv8S

In [None]:
model_small = YOLO("yolov8s-cls.pt") # small
results1 = model_small.train(
    data=output_dir,
    epochs=custom_epochs,
    imgsz=256,
    batch=64,
    device='0',
    optimizer='AdamW',
    lr0=0.001,
    lrf=0.1,
    momentum=0.9,
    weight_decay=0.0005,
    dropout=0.2,
    name=f"{name}_{custom_epochs}epochs_yoloS",
    seed=random_seed
)

In [35]:
# get metrics
metrics_small = model_small.val()

Ultralytics 8.3.217 üöÄ Python-3.10.12 torch-2.9.0+cu128 CUDA:0 (Tesla T4, 14916MiB)
YOLOv8s-cls summary (fused): 30 layers, 5,077,762 parameters, 0 gradients, 12.4 GFLOPs
[34m[1mtrain:[0m /home/liva/dataset/train... found 3199 images in 2 classes ‚úÖ 
[34m[1mval:[0m /home/liva/dataset/val... found 800 images in 2 classes ‚úÖ 
[34m[1mtest:[0m None...
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 4462.7¬±1409.5 MB/s, size: 1310.6 KB)
[K[34m[1mval: [0mScanning /home/liva/dataset/val... 800 images, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 800/800 966.4Kit/s 0.0s
[34m[1mval: [0m/home/liva/dataset/val/corrosion/6074246.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/liva/dataset/val/no_corrosion/16137439.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/liva/dataset/val/no_corrosion/6067519.jpg: corrupt JPEG restored and saved
[K               classes   top1_acc   top5_acc: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 50

In [None]:
print("Accuracy of YOLOv8S:", metrics_small.top1)
# plot_train_val_loss("runs/classify/seed42_20epochs_yoloS/results.csv", title="Training vs Validation Loss, YOLOv8s", save_path="output/seed42_20epochs_yoloS")

Accuracy of YOLOv8s on Seed 42: 0.987500011920929


In [37]:
# plot_train_val_loss("runs/classify/seed42_20epochs_yoloS/results.csv", title="Training vs Validation Loss, YOLOv8s", save_path="output/seed42_20epochs_yoloS")

## Medium YOLOv8

In [None]:
model_medium = YOLO("yolov8m-cls.pt") # medium
results2 = model_medium.train(
    data=output_dir,
    epochs=custom_epochs,
    imgsz=256,
    batch=64,
    device='0',
    optimizer='AdamW',
    lr0=0.001,
    lrf=0.1,
    momentum=0.9,
    weight_decay=0.0005,
    dropout=0.2,
    name=f"{name}_{custom_epochs}epochs_yoloM",
    seed=random_seed
)

In [39]:
# get metrics
metrics_medium = model_medium.val()

Ultralytics 8.3.217 üöÄ Python-3.10.12 torch-2.9.0+cu128 CUDA:0 (Tesla T4, 14916MiB)
YOLOv8m-cls summary (fused): 42 layers, 15,765,218 parameters, 0 gradients, 41.6 GFLOPs
[34m[1mtrain:[0m /home/liva/dataset/train... found 3199 images in 2 classes ‚úÖ 
[34m[1mval:[0m /home/liva/dataset/val... found 800 images in 2 classes ‚úÖ 
[34m[1mtest:[0m None...
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 4700.6¬±1268.1 MB/s, size: 1310.6 KB)
[K[34m[1mval: [0mScanning /home/liva/dataset/val... 800 images, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 800/800 926.2Kit/s 0.0s
[34m[1mval: [0m/home/liva/dataset/val/corrosion/6074246.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/liva/dataset/val/no_corrosion/16137439.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/liva/dataset/val/no_corrosion/6067519.jpg: corrupt JPEG restored and saved
[K               classes   top1_acc   top5_acc: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 5

In [None]:
print("Accuracy of YOLOv8M:", metrics_medium.top1)
# plot_train_val_loss("runs/classify/seed42_20epochs_yoloM/results.csv", title="Training vs Validation Loss, YOLOv8m", save_path="output/seed42_20epochs_yoloM")

Accuracy of YOLOv8m on Seed 42: 0.9900000095367432


The best result is with the model YOLOv8S; small. Further explanations are found in the project report.