<a href="https://colab.research.google.com/github/Synaptic-rayan/Machine-Learning/blob/main/PreProcessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import os
import yaml
from sklearn.model_selection import train_test_split

root_dir = "/content/trafic_data"
valid_formats = [".jpg", ".jpeg", ".png", ".txt"]
image_path = "/content/trafic_data/train/images"
label_path = "/content/trafic_data/train/labels"

def file_paths(root, valid_formats):
    """
    Get the full path to each image/label in the dataset.
    """
    file_paths = []

    for dirpath, dirnames, filenames in os.walk(root):
        for filename in filenames:
            extension = os.path.splitext(filename)[1].lower()
            if extension in valid_formats:
                file_path = os.path.join(dirpath, filename)
                file_paths.append(file_path)

    return file_paths

def write_image_label(images_path, labels_path, img_path):
    img_name = os.path.splitext(os.path.basename(img_path))[0]
    img = cv2.imread(img_path)
    if img is not None:
        cv2.imwrite(f"{images_path}/{img_name}.jpg", img)
    else:
        print(f"Error: Could not read image '{img_path}'")

    with open(f"{labels_path}/{img_name}.txt", "w") as label_file:
        label_file_path = os.path.join(label_path, f"{img_name}.txt")
        try:
            with open(label_file_path, "r") as original_label_file:
                label_file.write(original_label_file.read())
        except FileNotFoundError:
            print(f"Warning: Label file '{label_file_path}' not found.")

def write_to_file(images_path, labels_path, X):
    """
    Write images and their corresponding labels to directories.
    """
    os.makedirs(images_path, exist_ok=True)
    os.makedirs(labels_path, exist_ok=True)

    for img_path in X:
        write_image_label(images_path, labels_path, img_path)

# Debug: Print the paths being used
print("Image directory path:", os.path.join(root_dir, "train/images"))
print("Label directory path:", os.path.join(root_dir, "train/labels"))

image_paths = file_paths(os.path.join(root_dir, "train/images"), valid_formats[:3])
label_paths = file_paths(os.path.join(root_dir, "train/labels"), valid_formats[-1:])

# Debug: Print the collected image and label paths
print("Collected image paths:", image_paths)
print("Collected label paths:", label_paths)

# Check if data is available for splitting
if not image_paths or not label_paths:
    print("Error: No data available for splitting.")
else:
    # Split data into train, validation, and test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(image_paths, label_paths, test_size=0.3, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.7, random_state=42)

    write_to_file("datasets/images/train", "datasets/labels/train", X_train)
    write_to_file("datasets/images/valid", "datasets/labels/valid", X_val)
    write_to_file("datasets/images/test", "datasets/labels/test", X_test)

    data = {
        "path": "../datasets",
        "train": "images/train",
        "val": "images/valid",
        "test": "images/test",
        "names": ["number plate"]
    }

    with open("number-plate.yaml", "w") as f:
        yaml.dump(data, f)



Image directory path: /content/trafic_data/train/images
Label directory path: /content/trafic_data/train/labels
Collected image paths: ['/content/trafic_data/train/images/Dipto_532_JPG.rf.b15555ac5da422d0b1ebd467d625f45e.jpg', '/content/trafic_data/train/images/Pias--123-_jpg.rf.a44d26f68b65aaeba4ab80e0e08fe320.jpg', '/content/trafic_data/train/images/Pias--466-_jpg.rf.0430763a1f2aad84f6f6ba8b8072fd01.jpg', '/content/trafic_data/train/images/121_jpg.rf.12663332346e88d4786511c0dcc0515e.jpg', '/content/trafic_data/train/images/Dipto_553_jpg.rf.76b65201836d8f6200d3274571b867c1.jpg', '/content/trafic_data/train/images/Dipto_616_jpg.rf.118758f6f747c79091dad90cae6485b9.jpg', '/content/trafic_data/train/images/Numan_-239-_jpg.rf.1386b2c3736a7c37f2d957d79b21d081.jpg', '/content/trafic_data/train/images/Pias--504-_jpg.rf.4049832e129b278c6ebf5f5c3114770b.jpg', '/content/trafic_data/train/images/Navid_158_jpg.rf.510a183d33917399477af4e460740cde.jpg', '/content/trafic_data/train/images/Navid_337_j