# Image Preprocessing


In [5]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# declare paths to dataset
dataset_dir = "dataset"
images_dir = os.path.join(dataset_dir, "license-plate")
labels_file = os.path.join(dataset_dir, "labels.csv")

# read csv
# CSV has columns: 'images' (image file names) and 'labels' (class labels)
labels_df = pd.read_csv(labels_file)

# load and preprocess images
def load_and_preprocess_image(image_name, target_size=(32, 32)):
    img_path = os.path.join(images_dir, image_name)
    img = load_img(img_path, target_size=target_size)  # resize image
    img_array = img_to_array(img) / 255.0  # convert img to array and normalize
    return img_array

# load all images into a numpy array
x_data = np.array([load_and_preprocess_image(img_name) for img_name in labels_df['images']])

# convert labels to numerical format
label_mapping = {label: idx for idx, label in enumerate(sorted(labels_df['labels'].unique()))}
y_data = labels_df['labels'].map(label_mapping).values

# one-hot encode the labels
y_data = to_categorical(y_data, num_classes=len(label_mapping))

# split the dataset: training, validation, test
x_train, x_temp, y_train, y_temp = train_test_split(x_data, y_data, test_size=0.3, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)

# data summary
print("Training set:", x_train.shape, y_train.shape)
print("Validation set:", x_val.shape, y_val.shape)
print("Test set:", x_test.shape, y_test.shape)


Training set: (14000, 32, 32, 3) (14000, 19976)
Validation set: (3000, 32, 32, 3) (3000, 19976)
Test set: (3000, 32, 32, 3) (3000, 19976)


In [None]:
# optional; only to view the dataset
import matplotlib.pyplot as plt

# function to display a grid of images with their labels
def display_images_grid(images, labels, label_mapping, title, grid_size=(10, 10)):
    fig, axes = plt.subplots(grid_size[0], grid_size[1], figsize=(15, 15))
    fig.suptitle(title, fontsize=16)
    axes = axes.ravel()  # flatten the 2D grid into a 1D array of axes

    for i in range(grid_size[0] * grid_size[1]): 
        axes[i].imshow(images[i])  # show the image
        label_index = np.argmax(labels[i])  # decode one-hot encoded label
        label_name = [k for k, v in label_mapping.items() if v == label_index][0]  # Reverse mapping
        axes[i].set_title(label_name, fontsize=8)  # add the label as title
        axes[i].axis('off')  # Turn off the axes

    plt.tight_layout(rect=[0, 0, 1, 0.95])  # Adjust layout to fit the title
    plt.show()

# display 100 training images
print("Training Images and Labels:")
display_images_grid(x_train, y_train, label_mapping, title="Training Images (100 Examples)")

# display 100 test images
print("Test Images and Labels:")
display_images_grid(x_test, y_test, label_mapping, title="Test Images (100 Examples)")


## Step 1: Apply Gaussian Smoothing 

## Step 2: Gray Scale Conversion

## Step 3: Canny Edge Detection

# License Plate Detection

## Step 4: Image Binarization

## Step 5: Morphological Transformations

## Step 6: Contour Detection

## Step 7: Filter Contour

# License Plate Number Recognition

## Step 8: Text Recognition