In [1]:
import os
import glob
import shutil
import random

# Define your original dataset directory and the new structure
original_dataset_dir = '../.local/datasets/IoT_ObjectDetection'  # Path to your original dataset folder
new_dataset_dir = '../.local/datasets/IoT_ObjectDetection_Yolo'  # Path to the new structured dataset

In [2]:
import numpy as np

# Collect all class folders
class_folders = [d for d in os.listdir(original_dataset_dir) if os.path.isdir(os.path.join(original_dataset_dir, d))]
class_folders.sort()  # Sort class folders to maintain consistent class index

# write a function that counts the ids for a given folder and removes the ones that are not the most common
def remove_uncommon_ids(class_folder):
    label_files = glob.glob(os.path.join(class_folder, '*.txt'))

    print(f"Class: {class_folder}, Number of label files: {len(label_files)}")

    # Count the number of different class IDs in the label files
    class_ids = []
    for label_file in label_files:
        with open(label_file, 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) > 0:
                    class_ids.append(parts[0])

    # Get the most common class ID
    common_class_id, common_class_count = np.unique(class_ids, return_counts=True)
    common_class_id = common_class_id[np.argmax(common_class_count)]
    print(f"Common class ID: {common_class_id}")

    for label_file in label_files:
        with open(label_file, 'r') as f:
            lines = [line for line in f.readlines()]
        new_lines = []
        for line in lines:
            parts = line.strip().split()
            if len(parts) > 0 and parts[0] == common_class_id:
                new_lines.append(line)
            else:
                print(f"Removing line: {line}")
        with open(label_file, 'w') as f:
            for line in new_lines:
                f.write(line)

        # remove txt files that are empty with their corresponding image
        if os.path.getsize(label_file) == 0:
            os.remove(label_file)
            os.remove(label_file.replace('.txt', '.jpg'))
            print(f"Empty label file: {label_file}")

remove_uncommon_ids(original_dataset_dir + '/Leubot')

# in each folder iterate over all txt files and count the different IDs
for class_name in class_folders:
    class_folder = os.path.join(original_dataset_dir, class_name)
    label_files = glob.glob(os.path.join(class_folder, '*.txt'))

    # Count the number of different class IDs in the label files
    class_ids = []
    for label_file in label_files:
        with open(label_file, 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) > 0:
                    class_ids.append(parts[0])

    print(f"Class: {class_name}, Number of different class IDs: {np.unique(class_ids, return_counts=True)}")


Class: ../.local/datasets/IoT_ObjectDetection/Leubot, Number of label files: 290
Common class ID: 1
Class: DeskBulb, Number of different class IDs: (array(['3'], dtype='<U1'), array([61]))
Class: HueLamp, Number of different class IDs: (array(['10', '11', '7', '8', '9'], dtype='<U2'), array([132, 215,   1, 112, 164]))
Class: HueLampHalf, Number of different class IDs: (array(['3'], dtype='<U1'), array([113]))
Class: Leubot, Number of different class IDs: (array(['1'], dtype='<U1'), array([328]))
Class: RoboticArm, Number of different class IDs: (array(['0', '1'], dtype='<U1'), array([310, 477]))
Class: Tractorbot, Number of different class IDs: (array(['0'], dtype='<U1'), array([677]))


In [3]:
# read the file Tractorbot_116 (2).txt and print the content
with open(original_dataset_dir + '/Tractorbot/Tractorbot_116 (2).txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        print(line)

0 0.318981 0.560677 0.180556 0.107813

0 0.422685 0.316406 0.173148 0.102604



In [4]:
# Create new directories for train and validation
os.makedirs(os.path.join(new_dataset_dir, 'train', 'images'), exist_ok=True)
os.makedirs(os.path.join(new_dataset_dir, 'train', 'labels'), exist_ok=True)
os.makedirs(os.path.join(new_dataset_dir, 'val', 'images'), exist_ok=True)
os.makedirs(os.path.join(new_dataset_dir, 'val', 'labels'), exist_ok=True)

# Collect all class folders
class_folders = [d for d in os.listdir(original_dataset_dir) if os.path.isdir(os.path.join(original_dataset_dir, d))]
class_folders.sort()  # Sort class folders to maintain consistent class index

# Create a mapping from class name to class index
class_mapping = {class_name: idx for idx, class_name in enumerate(class_folders)}
print(class_mapping)

for class_name in class_folders:
    class_folder = os.path.join(original_dataset_dir, class_name)
    image_files = glob.glob(os.path.join(class_folder, '*.jpg'))
    print(f"Class: {class_name}, Number of images: {len(image_files)}")

    # Randomly sample 10 images for validation
    val_samples = random.sample(image_files, min(10, len(image_files)))

    for img_path in image_files:
        # Get the base name without extension
        base_name = os.path.splitext(os.path.basename(img_path))[0]

        # Move the image to the train images folder if it's not in the validation sample
        if img_path not in val_samples:
            new_img_path = os.path.join(new_dataset_dir, 'train', 'images', os.path.basename(img_path))
            shutil.copy(img_path, new_img_path)

            # Move the corresponding label file
            label_file = os.path.join(class_folder, f"{base_name}.txt")
            if os.path.exists(label_file):
                new_label_path = os.path.join(new_dataset_dir, 'train', 'labels', f"{base_name}.txt")
                with open(label_file, 'r') as f:
                    with open(new_label_path, 'w') as f_out:
                        # Adjust the class index in the label file
                        for line in f.readlines():
                            parts = line.strip().split()
                            if len(parts) > 0:
                                # Replace the original class index with the correct index from the mapping
                                original_class_index = parts[0]
                                new_class_index = class_mapping[class_name]
                                # Write in YOLO format (new_class_index x_center y_center width height)
                                f_out.write(f"{new_class_index} {' '.join(parts[1:])}\n")

        else:
            # Move the selected validation image
            new_img_path = os.path.join(new_dataset_dir, 'val', 'images', os.path.basename(img_path))
            shutil.copy(img_path, new_img_path)

            # Move the corresponding validation label file
            label_file = os.path.join(class_folder, f"{base_name}.txt")
            if os.path.exists(label_file):
                new_label_path = os.path.join(new_dataset_dir, 'val', 'labels', f"{base_name}.txt")
                with open(label_file, 'r') as f:
                    with open(new_label_path, 'w') as f_out:
                        lines = [line for line in f.readlines()]
                        # Adjust the class index in the label file
                        for line in lines:
                            parts = line.strip().split()

                            if len(parts) > 0:
                                # Replace the original class index with the correct index from the mapping
                                original_class_index = parts[0]
                                new_class_index = class_mapping[class_name]
                                # Write in YOLO format (new_class_index x_center y_center width height)
                                f_out.write(f"{new_class_index} {' '.join(parts[1:])}\n")

print("Dataset reorganized with random samples for validation and class indices adjusted!")

dir_path = "/home/tobias/Desktop/Uni/IMP/datasets/IoT_ObjectDetection_Yolo"
# if the path exists, remove it
if os.path.exists(dir_path):
    shutil.rmtree(dir_path)

# copy the new dataset to the path
shutil.copytree(new_dataset_dir, dir_path)

{'DeskBulb': 0, 'HueLamp': 1, 'HueLampHalf': 2, 'Leubot': 3, 'RoboticArm': 4, 'Tractorbot': 5}
Class: DeskBulb, Number of images: 61
Class: HueLamp, Number of images: 623
Class: HueLampHalf, Number of images: 113
Class: Leubot, Number of images: 290
Class: RoboticArm, Number of images: 680
Class: Tractorbot, Number of images: 370
Dataset reorganized with random samples for validation and class indices adjusted!


'/home/tobias/Desktop/Uni/IMP/datasets/IoT_ObjectDetection_Yolo'

In [None]:
import yaml

# Get class names from the folders in the original dataset directory
classes = [d for d in os.listdir(original_dataset_dir) if os.path.isdir(os.path.join(original_dataset_dir, d))]
classes.sort()

data_dict = {
    'train': "../IoT_ObjectDetection_Yolo/train",
    'val': "../IoT_ObjectDetection_Yolo/val",
    'nc': len(classes),
    'names': classes,
}
print(classes)

dataset_yaml_path = '/home/tobias/Desktop/Uni/IMP/IMP_Magical_Gaze-based_Device_Control/.local/datasets/IoT_ObjectDetection_Yolo/data.yaml'
# Save the YAML file
with open(dataset_yaml_path, 'w') as f:
    yaml.dump(data_dict, f)

print("data.yaml file created!")

In [None]:
from ultralytics import YOLO

TRAINED_MODEL_PATH = '../.local/models/object_detection/yolov11s_trained.pt'
# Load the YOLOv8 model
model = YOLO('yolo11s.pt')

# Train the model
model.train(
    data=dataset_yaml_path,
    epochs=50,
    imgsz=640,
    batch=16,
    augment=True,          # Enable augmentations
    hsv_h=0.015,           # Adjust hue variation
    hsv_s=0.7,             # Adjust saturation variation
    hsv_v=0.4,             # Adjust value (brightness) variation
    degrees=10.0,          # Random rotation degrees
    translate=0.1,         # Random translation
    scale=0.5,             # Scale variation
    shear=2.0,             # Shear variation
    #mixup=0.1,             # Mixup augmentation
)

print("Training completed!")

In [None]:
# Save the trained model
TRAINED_MODEL_PATH = '../.local/models/object_detection/yolov11s_trained.pt'

model.save(TRAINED_MODEL_PATH)

In [None]:
from ultralytics import YOLO

model = YOLO(TRAINED_MODEL_PATH)
# Perform validation
results = model.val(data=dataset_yaml_path, save_json=True)  # Validate and save results in JSON format

In [None]:
import os
import glob
import yaml
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt

# Load the YOLOv8 model
model = YOLO(TRAINED_MODEL_PATH)

dataset_dir = "/home/tobias/Desktop/Uni/IMP/datasets/"

# predict on the validation set and visualize the results
with open('data.yaml') as f:
    data = yaml.load(f, Loader=yaml.FullLoader)
print(data['val'])
val_images = glob.glob(os.path.join(dataset_dir, data['val'], 'images', '*.jpg'))

for img_path in val_images:
    results = model(img_path)
    # Process results list
    for result in results:
        boxes = result.boxes  # Boxes object for bounding box outputs
        masks = result.masks  # Masks object for segmentation masks outputs
        keypoints = result.keypoints  # Keypoints object for pose outputs
        probs = result.probs  # Probs object for classification outputs
        obb = result.obb  # Oriented boxes object for OBB outputs
        result.show()  # display to screen
    