## Image Dataset Creation 

In [None]:
import os
import cv2
import random
from glob import glob

# Input dataset root
base_dir = r"G:\My Documents\Digital Neuron Labtech\Detection Dataset"

# Output directory for 200 OCR crops
output_dir = r"G:\My Documents\Digital Neuron Labtech\Extra"
os.makedirs(output_dir, exist_ok=True)

# Collect all image-label pairs across splits
splits = ['train', 'valid', 'test']
all_crops = []

for split in splits:
    img_dir = os.path.join(base_dir, split, "images")
    label_dir = os.path.join(base_dir, split, "labels")

    for file in os.listdir(img_dir):
        if not file.endswith((".jpg", ".jpeg", ".png")):
            continue

        img_path = os.path.join(img_dir, file)
        label_path = os.path.join(label_dir, file.rsplit(".", 1)[0] + ".txt")

        if not os.path.exists(label_path):
            continue

        image = cv2.imread(img_path)
        if image is None:
            continue

        h, w = image.shape[:2]

        with open(label_path, 'r') as f:
            for idx, line in enumerate(f):
                parts = line.strip().split()
                if len(parts) != 5:
                    continue

                _, x_c, y_c, bw, bh = map(float, parts)
                x_c *= w
                y_c *= h
                bw *= w
                bh *= h

                x1 = int(max(0, x_c - bw / 2))
                y1 = int(max(0, y_c - bh / 2))
                x2 = int(min(w, x_c + bw / 2))
                y2 = int(min(h, y_c + bh / 2))

                crop = image[y1:y2, x1:x2]

                if crop.size == 0:
                    continue

                # Store crop as (image_array, index)
                all_crops.append(crop)

# Shuffle and select 200 crops
random.shuffle(all_crops)
selected_crops = all_crops[:200]

# Save cropped images as 1.jpg to 200.jpg
for i, crop in enumerate(selected_crops, start=1):
    out_path = os.path.join(output_dir, f"{i}.jpg")
    cv2.imwrite(out_path, crop)

print(f"Saved {len(selected_crops)} cropped plate images to {output_dir}")

## Dataset Annotation Helper

In [None]:
import os
import cv2

img_dir = r"G:\My Documents\Digital Neuron Labtech\Recognition Dataset"
output_csv = os.path.join(img_dir, "labels.csv")

images = sorted([f for f in os.listdir(img_dir) if f.endswith('.jpg')])
labels = []

for img_name in images[:5]:
    img_path = os.path.join(img_dir, img_name)
    img = cv2.imread(img_path)
    cv2.imshow("Image", img)
    cv2.waitKey(1)

    label = input(f"Enter plate text for {img_name}: ")
    labels.append((img_name, label))
    cv2.destroyAllWindows()

# Save to CSV
import csv
with open(output_csv, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["filename", "words"])
    writer.writerows(labels)
