## Fine-tuning YOLOv8

### Setup

In [None]:
!pip install datasets ultralytics

In [2]:
import os
import yaml
import torch
from datasets import load_dataset
from ultralytics import YOLO
from PIL import Image

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

### Dataset

In [None]:
dataset = load_dataset("kili-technology/plastic_in_river")

In [5]:
os.makedirs("datasets/images/train", exist_ok=True)
os.makedirs("datasets/images/validation", exist_ok=True)
os.makedirs("datasets/labels/train", exist_ok=True)
os.makedirs("datasets/labels/validation", exist_ok=True)

In [6]:
def dump_images_and_labels(data, split):
    data = data[split]
    for i, example in enumerate(data):
        image = example["image"]
        labels = example["litter"]["label"]
        bboxes = example["litter"]["bbox"]
        targets = []
        for label, box in zip(labels, bboxes):
            targets.append(f"{label} {box[0]} {box[1]} {box[2]} {box[3]}")
        with open(f"datasets/labels/{split}/{i}.txt", "w") as f:
            for target in targets:
                f.write(target + "\n")
        image.save(f"datasets/images/{split}/{i}.png")

In [None]:
dump_images_and_labels(dataset, "train")
dump_images_and_labels(dataset, "validation")

### Train

In [None]:
config = {
    "path": ".",
    "train": "images/train",
    "val": "images/validation",
    "names": {
        0: "PLASTIC_BAG",
        1: "PLASTIC_BOTTLE",
        2: "OTHER_PLASTIC_WASTE",
        3: "NOT_PLASTIC_WASTE"
    }
}

with open("plastic.yaml", "w") as f:
    yaml.dump(config, f)

In [None]:
model = YOLO("yolov8m.pt")
model.train(
    data="plastic.yaml",
    epochs=20,
    imgsz=(1280, 720),
    batch=4,
    optimizer="Adam",
    lr0=1e-3
)

### Predict

In [None]:
img = dataset["test"][0]["image"]
model = YOLO("runs/detect/train/weights/best.pt")
res = model.predict(img)[0]
res = res.plot(line_width=1)
res = res[:, :, ::-1]
res = Image.fromarray(res)
res.save("output.png")
res