## Data formation. Splitting into training and validation

In [1]:
import os
import shutil
import random

# Paths
source_folder = "dataset"
train_folder = "dataset/train"
val_folder = "dataset/val"
os.makedirs(train_folder + "/images", exist_ok=True)
os.makedirs(train_folder + "/labels", exist_ok=True)
os.makedirs(val_folder + "/images", exist_ok=True)
os.makedirs(val_folder + "/labels", exist_ok=True)

# Get all images
images = [f for f in os.listdir(source_folder) if f.endswith(".jpg")]
random.shuffle(images)  # Shuffle data

# Split 80% train, 20% val
split_index = int(len(images) * 0.8)
train_images = images[:split_index]
val_images = images[split_index:]

def move_files(image_list, dest_folder):
    for img in image_list:
        txt = img.replace(".jpg", ".txt")
        shutil.move(os.path.join(source_folder, img), os.path.join(dest_folder, "images", img))
        shutil.move(os.path.join(source_folder, txt), os.path.join(dest_folder, "labels", txt))

# Move files
move_files(train_images, train_folder)
move_files(val_images, val_folder)

print("Dataset reorganized successfully!")


Dataset reorganized successfully!


## YAML file of dataset for training

In [8]:
import yaml

data = {
    "train": "dataset/train/images",
    "val": "dataset/val/images",
    "nc": 2,  # Number of classes (0 and 1)
    "names": ["Odometer", "LCD"]
}

with open("data.yaml", "w") as file:
    yaml.dump(data, file, default_flow_style=False)

print("data.yaml created successfully!")


data.yaml created successfully!
