In [2]:
%pip install pandas opencv-python scikit-learn numpy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [23]:
import os
import cv2
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from typing import Tuple

In [None]:
data_folder = "./dataset"
orig_ann_file = os.path.join(data_folder, "annotations.xlsx")              
aug_ann_file = os.path.join(data_folder, "images", "augmented", "annotations.xlsx")  

# Load in annotations

Before the images can be used by the yolo model the annotations and images still need to be preprocessed.

In [25]:
orig_df = pd.read_excel(orig_ann_file)
aug_df = pd.read_excel(aug_ann_file) if os.path.exists(aug_ann_file) else pd.DataFrame(columns=orig_df.columns)

In [26]:
orig_df['source'] = 'original'
aug_df['source'] = 'augmented'

data = pd.concat([orig_df, aug_df], ignore_index=True)
print(f"Loaded {len(orig_df)} original and {len(aug_df)} augmented annotations, total = {len(data)}.")

Loaded 198 original and 585 augmented annotations, total = 783.


For the images to be able to be used by the yolo model the image annotations first need to be normalized. Also the class needs to be set to 0 because there is only one class sheep.

In [None]:
data['class_id'] = 0
data['x_center'] = data['bbox_x'] + data['bbox_width'] / 2.0
data['y_center'] = data['bbox_y'] + data['bbox_height'] / 2.0
data['width']    = data['bbox_width'].astype(float)
data['height']   = data['bbox_height'].astype(float)

The yolo model can only work with 640x640 image but our images are 640x512 so the images need to be padded.

In [None]:
def pad_image_to_square(img: np.ndarray, target_size: int = 640, pad_color: Tuple[int,int,int] = (0,0,0)) -> np.ndarray:
    """Pad image to a square of target_size x target_size with padding color ."""
    h, w = img.shape[:2]
    if h == target_size and w == target_size:
        return img  # already square
    # Calculate padding on each side
    top = (target_size - h) // 2
    bottom = target_size - h - top
    left = (target_size - w) // 2
    right = target_size - w - left
    padded_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=pad_color)
    return padded_img

In [None]:
original_width = 640
original_height = 512
target_size = 640

top_padding = (target_size - original_height) // 2
left_padding = (target_size - original_width) // 2

# Compute adjusted center coordinates (in padded image coordinates)
data['x_center'] = (data['x_center']) + left_padding
data['y_center'] = (data['y_center']) + top_padding

# Normalize to 640x640 (after padding)
data['x_center'] = data['x_center'] / target_size
data['y_center'] = data['y_center'] / target_size
data['width'] = data['width'] / target_size
data['height'] = data['height'] / target_size

Now that the dataset is ready we can split it up to a training, validation and test dataset.

In [28]:
unique_images = data['image_name'].unique()
train_imgs, temp_imgs = train_test_split(unique_images, test_size=0.3, random_state=42)   # 70% train, 30% temp
val_imgs, test_imgs = train_test_split(temp_imgs, test_size=0.5, random_state=42)         # split remaining 30% equally into val/test

def assign_split(image_name: str) -> str:
    if image_name in train_imgs:
        return 'train'
    elif image_name in val_imgs:
        return 'val'
    else:
        return 'test'

data['split'] = data['image_name'].apply(assign_split)
print("Split images:", data['split'].value_counts())

Split images: split
train    558
val      122
test     103
Name: count, dtype: int64


In [31]:
splits = ['train', 'val', 'test']
for split in splits:
    os.makedirs(os.path.join(data_folder, 'images', split), exist_ok=True)
    os.makedirs(os.path.join(data_folder, 'labels', split), exist_ok=True)

In [33]:
for (image_name, split), group in data.groupby(['image_name', 'split']):
    # Determine source folder (enhanced for original images, generated for augmented images)
    source_folder = "augmented" if group['source'].iloc[0] == 'augmented' else "enhanced"
    src_path = os.path.join(data_folder, "images", source_folder, image_name)
    dst_img_path = os.path.join(data_folder, "images", split, image_name)
    dst_lbl_path = os.path.join(data_folder, "labels", split, os.path.splitext(image_name)[0] + ".txt")

    if not os.path.exists(src_path):
        print(f"⚠️ Source image not found: {src_path} (skipping)")
        continue

    # Read image and pad to square
    img = cv2.imread(src_path)
    padded_img = pad_image_to_square(img, target_size=640)
    cv2.imwrite(dst_img_path, padded_img)

    # Write label file in YOLO format
    with open(dst_lbl_path, 'w') as f:
        for _, row in group.iterrows():
            class_id = int(row['class_id'])
            x_c, y_c, w, h = row['x_center'], row['y_center'], row['width'], row['height']
            f.write(f"{class_id} {x_c:.6f} {y_c:.6f} {w:.6f} {h:.6f}\n")
print("✅ Dataset images and labels have been exported to train/val/test splits.")

✅ Dataset images and labels have been exported to train/val/test splits.
