# Preprocessing Notebook for Number Plate Detection
This notebook prepares your dataset by splitting it into training, validation, and test sets, and creating a corresponding YAML file for training the YOLO model.

In [None]:
# Step 1: Import required libraries
from sklearn.model_selection import train_test_split
import cv2
import os
import yaml

In [None]:
# Step 2: Define paths and helper function
root_dir = "datasets/car-number-plate/"
valid_formats = [".jpg", ".jpeg", ".png", ".txt"]

def file_paths(root, valid_formats):
    file_paths = []
    for dirpath, dirnames, filenames in os.walk(root):
        for filename in filenames:
            extension = os.path.splitext(filename)[1].lower()
            if extension in valid_formats:
                file_path = os.path.join(dirpath, filename)
                file_paths.append(file_path)
    return file_paths

In [None]:
# Step 3: Collect image and label paths
image_paths = file_paths(root_dir + "images", valid_formats[:3])
label_paths = file_paths(root_dir + "labels", [valid_formats[-1]])

In [None]:
# Step 4: Split the dataset
X_train, X_val_test, y_train, y_val_test = train_test_split(image_paths, label_paths, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.7, random_state=42)

In [None]:
# Step 5: Define function to copy images and labels
def write_to_file(images_path, labels_path, X):
    os.makedirs(images_path, exist_ok=True)
    os.makedirs(labels_path, exist_ok=True)

    for img_path in X:
        img_name = os.path.splitext(os.path.basename(img_path))[0]
        img_ext = os.path.splitext(img_path)[1]

        image = cv2.imread(img_path)
        cv2.imwrite(os.path.join(images_path, f"{img_name}{img_ext}"), image)

        label_file_path = os.path.join(root_dir, "labels", f"{img_name}.txt")
        if os.path.exists(label_file_path):
            with open(label_file_path, "r") as label_file:
                label_data = label_file.read()
            with open(os.path.join(labels_path, f"{img_name}.txt"), "w") as f:
                f.write(label_data)
        else:
            print(f"Label file not found for image: {img_name}")

In [None]:
# Step 6: Execute copying
write_to_file("datasets/images/train", "datasets/labels/train", X_train)
write_to_file("datasets/images/valid", "datasets/labels/valid", X_val)
write_to_file("datasets/images/test", "datasets/labels/test", X_test)

In [None]:
# Step 7: Create YAML file
data = {
    "path": "../datasets",
    "train": "images/train",
    "val": "images/valid",
    "test": "images/test",
    "names": ["number plate"]
}

with open("number-plate.yaml", "w") as f:
    yaml.dump(data, f)