# Train YOLOv8 with Data Augmentation

In [1]:
import os
import argparse
import logging
import matplotlib.pyplot as plt
from ultralytics import YOLO
from IPython.display import display, Image
from PIL import Image
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader, Dataset
import cv2

# Custom Dataset Class

In [2]:
class CustomDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.images = [img for img in os.listdir(image_dir) if img.endswith('.jpg')]
        self.images.sort()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.image_dir, img_name)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        label_path = os.path.join(self.label_dir, img_name.replace('.jpg', '.txt'))
        boxes = pd.read_csv(label_path, header=None, delim_whitespace=True).values

        if self.transform:
            augmented = self.transform(image=image, bboxes=boxes)
            image = augmented['image']
            boxes = augmented['bboxes']

        return image, boxes

NameError: name 'Dataset' is not defined

In [2]:
def train_yolo(model_name, data_path, total_epochs, img_size, log_dir):
    # Load model
    model = YOLO(model_name)

    # Initialize metrics dictionary
    metrics = {'train_loss': [], 'val_loss': [], 'train_map50': [], 'val_map50': []}
    
    # Setup logging
    logger = setup_logging(log_dir)

    # Train model for all epochs
    results = model.train(
        data=data_path,
        epochs=total_epochs,
        imgsz=img_size,
        save=True,
        plots=True
    )

In [4]:
def parse_args():
    parser = argparse.ArgumentParser(description="Train YOLOv8 model.")
    parser.add_argument('--model', type=str, default='yolov8s.pt', help='Pre-trained model path')
    parser.add_argument('--data', type=str, required=True, help='Path to dataset')
    parser.add_argument('--epochs', type=int, default=25, help='Number of epochs to train')
    parser.add_argument('--img_size', type=int, default=800, help='Image size for training')
    parser.add_argument('--log_dir', type=str, default='./logs', help='Directory to save logs and plots')
    return parser.parse_args()

def parse_args_notebook():
    args = argparse.Namespace(
        model='yolov8s.pt',
        data='/home/jupyter/ee_tree_counting/Data/Dataset 348-17-15/data.yaml',
        epochs=100,
        img_size=640,
        batch=16,
        log_dir='./logs',
        plots=True
    )
    return args

# Data Augmentation: Geometric Augmentation Mainly

### Plan: See if geometric augmentations improve anything, then build big dataset. Do augmentations and add that as further training. Then integrate annotated satellite images with seasonal augmentation, enhanced contrast etc.

In [None]:
args = parse_args_notebook()
train_yolo(args.model, args.data, args.epochs, args.img_size, args.log_dir)