In [1]:
import os
import shutil

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import imutils
import cv2
import easyocr

# Help Functions

## Numerical Plate localization algorithm

### YOLO model

In [2]:
# Get the names of the output layers
def getOutputsNames(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    # Get the names of the output layers, i.e. the layers with unconnected outputs
    return [layersNames[i - 1] for i in net.getUnconnectedOutLayers()]

In [3]:
# Draw the predicted bounding box
def drawPred(classId, conf, left, top, right, bottom):
    # Draw a bounding box.
    #    cv2.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
    cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 3)

    label = '%.2f' % conf

    # Get the label for the class name and its confidence
    if classes:
        assert(classId < len(classes))
        label = '%s: %s' % (classes[classId], label)

    # Display the label at the top of the bounding box
    labelSize, baseLine = cv2.getTextSize(
        label, cv2.FONT_HERSHEY_SIMPLEX, 1, 1)
    top = max(top, labelSize[1])
    #cv2.rectangle(frame, (left, top - round(1.7*labelSize[1])), (left + round(
    #   1.3*labelSize[0]), top + baseLine), (255, 0, 255), cv2.FILLED)
    # cv2.putText(frame, label, (left, top),
    #           cv2.FONT_HERSHEY_SIMPLEX, 1.3, (255, 255, 255), 2)

In [4]:
# Remove the bounding boxes with low confidence using non-maxima suppression
def postprocess(frame, outs):
    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]

    classIds = []
    confidences = []
    boxes = []
    # Scan through all the bounding boxes output from the network and keep only the
    # ones with high confidence scores. Assign the box's class label as the class with the highest score.
    classIds = []
    confidences = []
    boxes = []
    for out in outs:
        print("out.shape : ", out.shape)
        for detection in out:
            # if detection[4]>0.001:
            scores = detection[5:]
            classId = np.argmax(scores)
            # if scores[classId]>confThreshold:
            confidence = scores[classId]
            if detection[4] > confThreshold:
                print(detection[4], " - ", scores[classId],
                      " - th : ", confThreshold)
                print(f"detection: {detection}")
            if confidence > confThreshold:
                center_x = int(detection[0] * frameWidth)
                center_y = int(detection[1] * frameHeight)
                width = int(detection[2] * frameWidth)
                height = int(detection[3] * frameHeight)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    return indices, boxes, classIds, confidences

### Opencv Algorithm

In [5]:
def opencv_license_plates_localization(file_path, min_aspect_ratio=2, max_aspect_ratio=5):
    
    # Extract the filename
    file_name = os.path.basename(file_path)
    
    img = cv2.imread(file_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Make the image gray
    
    bfilter = cv2.bilateralFilter(gray, 11, 17, 17) # Noise reduction
    edged = cv2.Canny(bfilter, 30, 200) # Edge detection
    
    keypoints = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # Find keypoints
    contours = imutils.grab_contours(keypoints) # Grab contours
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:30] # Select the 10 largest contours 
    
    # Find the position of the license plate (rectangle search)
    location = None
    for contour in contours:
        approx = cv2.approxPolyDP(contour, 10, True)
        if len(approx) == 4:
            x, y, w, h = cv2.boundingRect(approx)
            aspect_ratio = float(w) / h
            
            # Check if the aspect ratio falls within the desired range
            if min_aspect_ratio <= aspect_ratio <= max_aspect_ratio:
                location = approx
                return location
    
    if location is None:
        return None

## License plate alignment

In [29]:
def align_license_plate(license_plate_image):
    # Convert the license plate image to grayscale
    gray_license_plate = cv2.cvtColor(license_plate_image, cv2.COLOR_BGR2GRAY)

    # Perform edge detection on the grayscale image
    edges = cv2.Canny(gray_license_plate, 50, 150, apertureSize=3)

    # Find lines in the edge-detected image
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=25, minLineLength=10, maxLineGap=10)

    # Check if any lines were found
    if lines is None:
        print(f'lines is None')
        return license_plate_image  # No lines found, return the original image

    # Calculate the angle of the detected lines
    angles = []
    for line in lines:
        x1, y1, x2, y2 = line[0]
        angle = np.arctan2(y2 - y1, x2 - x1)
        angles.append(angle)

    # Calculate the median angle to align the license plate
    median_angle = np.median(angles)

    # Rotate the license plate image to align it with the median angle
    aligned_license_plate = license_plate_image.copy()
    M = cv2.getRotationMatrix2D((aligned_license_plate.shape[1] / 2, aligned_license_plate.shape[0] / 2), median_angle * 180 / np.pi, 1)
    aligned_license_plate = cv2.warpAffine(aligned_license_plate, M, (aligned_license_plate.shape[1], aligned_license_plate.shape[0]))

    return aligned_license_plate

# Move the LongPlate files to the 1_chosen_data folder

## Train files

In [14]:
source_folder = 'data/Vehicle Registration Plates.v1-original-images.yolov8/train'
destination_folder = 'data/1_chosen_data/train'

if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

train_images_folder = source_folder + '/images'
train_labels_folder = source_folder + '/labels'
train_images = os.listdir(train_images_folder)
train_labels = os.listdir(train_labels_folder)

print(f"len(train_images) = {len(train_images)}")
print(f"len(train_labels) = {len(train_labels)}")

len(train_images) = 6176
len(train_labels) = 6176


In [21]:
counter = 0
for filename in train_images:
    if filename.startswith('CarLongPlate'):
        print(filename)
        counter += 1
        if counter == 10:
            break

CarLongPlate0_jpg.rf.68e0b9ccbdf0663f58876e02d480971e.jpg
CarLongPlate100_jpg.rf.6067d4cebf8aa075fc1c9b317f824c84.jpg
CarLongPlate102_jpg.rf.3c1ac339e94f9a48376022e310a8e5c7.jpg
CarLongPlate103_jpg.rf.344c8ec636c3fac0667537ff778ffa3a.jpg
CarLongPlate104_jpg.rf.870fdbc5cf820aede6c9b2dfd8319b7a.jpg
CarLongPlate106_jpg.rf.5561c4ab0b29b8e8ba22b23381bfbe1d.jpg
CarLongPlate107_jpg.rf.f672816f379274ac85026b64c585c948.jpg
CarLongPlate108_jpg.rf.534e54495e2a4dfd7ea8c1df359381bc.jpg
CarLongPlate109_jpg.rf.d8b0fcae17838a8f950f0a60fb3d8f3b.jpg
CarLongPlate10_jpg.rf.1125e4c8cd5267366d855b201cdd7951.jpg


In [10]:
train_car_long_plate_images = [image_name for image_name in train_images if image_name.startswith('CarLongPlate')]
train_car_long_plate_labels = [label for label in train_labels if label.startswith('CarLongPlate')]

print(f"len(train_car_long_plate_images) = {len(train_car_long_plate_images)}")
print(f"len(train_car_long_plate_labels) = {len(train_car_long_plate_labels)}")

len(train_car_long_plate_images) = 3436
len(train_car_long_plate_labels) = 3436


In [None]:
images_destination_folder = destination_folder + '/images'
labels_destination_folder = destination_folder + '/labels'

if not os.path.exists(images_destination_folder):
    os.makedirs(images_destination_folder)
    
if not os.path.exists(labels_destination_folder):
    os.makedirs(labels_destination_folder)

for long_plate_image, long_plate_label in zip(train_car_long_plate_images, train_car_long_plate_labels):
    source_image_path = os.path.join(train_images_folder, long_plate_image)
    destination_image_path = os.path.join(images_destination_folder, long_plate_image)
    
    source_label_path = os.path.join(train_labels_folder, long_plate_label)
    destination_label_path = os.path.join(labels_destination_folder, long_plate_label)
    
    shutil.copy(source_image_path, destination_image_path)
    shutil.copy(source_label_path, destination_label_path)

print("Copy process completed")

In [19]:
long_plate_images = os.listdir(images_destination_folder)
long_plate_labels = os.listdir(labels_destination_folder)

print(f"len(long_plate_images) = {len(long_plate_images)}")
print(f"len(long_plate_labels) = {len(long_plate_labels)}")

len(long_plate_images) = 3436
len(long_plate_labels) = 3436


## Valid files

In [20]:
source_folder = 'data/Vehicle Registration Plates.v1-original-images.yolov8/valid'
destination_folder = 'data/1_chosen_data/valid'

if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

valid_images_folder = source_folder + '/images'
valid_labels_folder = source_folder + '/labels'
valid_images = os.listdir(valid_images_folder)
valid_labels = os.listdir(valid_labels_folder)

print(f"len(valid_images) = {len(valid_images)}")
print(f"len(valid_labels) = {len(valid_labels)}")

len(valid_images) = 1765
len(valid_labels) = 1765


In [23]:
valid_car_long_plate_images = [image_name for image_name in valid_images if image_name.startswith('CarLongPlate')]
valid_car_long_plate_labels = [label for label in valid_labels if label.startswith('CarLongPlate')]

print(f"len(valid_car_long_plate_images) = {len(valid_car_long_plate_images)}")
print(f"len(valid_car_long_plate_labels) = {len(valid_car_long_plate_labels)}")

len(valid_car_long_plate_images) = 978
len(valid_car_long_plate_labels) = 978


In [25]:
images_destination_folder = destination_folder + '/images'
labels_destination_folder = destination_folder + '/labels'

if not os.path.exists(images_destination_folder):
    os.makedirs(images_destination_folder)
    
if not os.path.exists(labels_destination_folder):
    os.makedirs(labels_destination_folder)

for long_plate_image, long_plate_label in zip(valid_car_long_plate_images, valid_car_long_plate_labels):
    source_image_path = os.path.join(valid_images_folder, long_plate_image)
    destination_image_path = os.path.join(images_destination_folder, long_plate_image)
    
    source_label_path = os.path.join(valid_labels_folder, long_plate_label)
    destination_label_path = os.path.join(labels_destination_folder, long_plate_label)
    
    shutil.copy(source_image_path, destination_image_path)
    shutil.copy(source_label_path, destination_label_path)

print("Copy process completed")


Copy process completed


In [26]:
long_plate_images = os.listdir(images_destination_folder)
long_plate_labels = os.listdir(labels_destination_folder)

print(f"len(long_plate_images) = {len(long_plate_images)}")
print(f"len(long_plate_labels) = {len(long_plate_labels)}")

len(long_plate_images) = 978
len(long_plate_labels) = 978


## Test files

In [27]:
source_folder = 'data/Vehicle Registration Plates.v1-original-images.yolov8/test'
destination_folder = 'data/1_chosen_data/test'

if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

test_images_folder = source_folder + '/images'
test_labels_folder = source_folder + '/labels'
test_images = os.listdir(test_images_folder)
test_labels = os.listdir(test_labels_folder)

print(f"len(test_images) = {len(test_images)}")
print(f"len(test_labels) = {len(test_labels)}")

len(test_images) = 882
len(test_labels) = 882


In [28]:
test_car_long_plate_images = [image_name for image_name in test_images if image_name.startswith('CarLongPlate')]
test_car_long_plate_labels = [label for label in test_labels if label.startswith('CarLongPlate')]

print(f"len(test_car_long_plate_images) = {len(test_car_long_plate_images)}")
print(f"len(test_car_long_plate_labels) = {len(test_car_long_plate_labels)}")

len(test_car_long_plate_images) = 490
len(test_car_long_plate_labels) = 490


In [31]:
images_destination_folder = destination_folder + '/images'
labels_destination_folder = destination_folder + '/labels'

if not os.path.exists(images_destination_folder):
    os.makedirs(images_destination_folder)
    
if not os.path.exists(labels_destination_folder):
    os.makedirs(labels_destination_folder)

for long_plate_image, long_plate_label in zip(test_car_long_plate_images, test_car_long_plate_labels):
    source_image_path = os.path.join(test_images_folder, long_plate_image)
    destination_image_path = os.path.join(images_destination_folder, long_plate_image)
    
    source_label_path = os.path.join(test_labels_folder, long_plate_label)
    destination_label_path = os.path.join(labels_destination_folder, long_plate_label)
    
    shutil.copy(source_image_path, destination_image_path)
    shutil.copy(source_label_path, destination_label_path)

print("Copy process completed")

Copy process completed


In [32]:
long_plate_images = os.listdir(images_destination_folder)
long_plate_labels = os.listdir(labels_destination_folder)

print(f"len(long_plate_images) = {len(long_plate_images)}")
print(f"len(long_plate_labels) = {len(long_plate_labels)}")

len(long_plate_images) = 490
len(long_plate_labels) = 490


# Change the contents of the labels folder to match the contents of the images after removing the broken images

## Train files

In [5]:
images_folder = 'data/1_selected_data/train/images'
labels_folder = 'data/1_selected_data/train/labels'

# List all image files in the 'images' folder
image_files = os.listdir(images_folder)

# List all label files in the 'labels' folder
label_files = os.listdir(labels_folder)

print(f"len(train_images) = {len(image_files)}")
print(f"len(train_labels) = {len(label_files)}")

len(train_images) = 2715
len(train_labels) = 3436


In [7]:
test_label = label_files[0]
image_filename = os.path.splitext(test_label)[0] + '.jpg'
image_filename

'CarLongPlate0_jpg.rf.68e0b9ccbdf0663f58876e02d480971e.jpg'

In [None]:
image_files[0] == image_filename

True

In [10]:
# Loop through each label file and check if the corresponding image exists
for label_file in label_files:
    if label_file.endswith('.txt'):
        # Extract the image filename without the extension
        image_filename = os.path.splitext(label_file)[0] + '.jpg'

        # Check if the corresponding image exists in the 'images' folder
        if image_filename not in image_files:
            # If the image does not exist, delete the label file
            label_file_path = os.path.join(labels_folder, label_file)
            os.remove(label_file_path)

print("Deletion process completed")

Deletion process completed.


In [11]:
images_folder = 'data/1_selected_data/train/images'
labels_folder = 'data/1_selected_data/train/labels'

# List all image files in the 'images' folder
image_files = os.listdir(images_folder)

# List all label files in the 'labels' folder
label_files = os.listdir(labels_folder)

print(f"len(train_images) = {len(image_files)}")
print(f"len(train_labels) = {len(label_files)}")

len(train_images) = 2715
len(train_labels) = 2715


## Valid files

In [12]:
images_folder = 'data/1_selected_data/valid/images'
labels_folder = 'data/1_selected_data/valid/labels'

# List all image files in the 'images' folder
image_files = os.listdir(images_folder)

# List all label files in the 'labels' folder
label_files = os.listdir(labels_folder)

print(f"len(valid_images) = {len(image_files)}")
print(f"len(valid_labels) = {len(label_files)}")

len(valid_images) = 796
len(valid_labels) = 978


In [13]:
# Loop through each label file and check if the corresponding image exists
for label_file in label_files:
    if label_file.endswith('.txt'):
        # Extract the image filename without the extension
        image_filename = os.path.splitext(label_file)[0] + '.jpg'

        # Check if the corresponding image exists in the 'images' folder
        if image_filename not in image_files:
            # If the image does not exist, delete the label file
            label_file_path = os.path.join(labels_folder, label_file)
            os.remove(label_file_path)

print("Deletion process completed")

Deletion process completed


In [14]:
images_folder = 'data/1_selected_data/valid/images'
labels_folder = 'data/1_selected_data/valid/labels'

# List all image files in the 'images' folder
image_files = os.listdir(images_folder)

# List all label files in the 'labels' folder
label_files = os.listdir(labels_folder)

print(f"len(valid_images) = {len(image_files)}")
print(f"len(valid_labels) = {len(label_files)}")

len(valid_images) = 796
len(valid_labels) = 796


## Test files

In [15]:
images_folder = 'data/1_selected_data/test/images'
labels_folder = 'data/1_selected_data/test/labels'

# List all image files in the 'images' folder
image_files = os.listdir(images_folder)

# List all label files in the 'labels' folder
label_files = os.listdir(labels_folder)

print(f"len(test_images) = {len(image_files)}")
print(f"len(test_labels) = {len(label_files)}")

len(test_images) = 394
len(test_labels) = 490


In [16]:
# Loop through each label file and check if the corresponding image exists
for label_file in label_files:
    if label_file.endswith('.txt'):
        # Extract the image filename without the extension
        image_filename = os.path.splitext(label_file)[0] + '.jpg'

        # Check if the corresponding image exists in the 'images' folder
        if image_filename not in image_files:
            # If the image does not exist, delete the label file
            label_file_path = os.path.join(labels_folder, label_file)
            os.remove(label_file_path)

print("Deletion process completed")

images_folder = 'data/1_selected_data/test/images'
labels_folder = 'data/1_selected_data/test/labels'

# List all image files in the 'images' folder
image_files = os.listdir(images_folder)

# List all label files in the 'labels' folder
label_files = os.listdir(labels_folder)

print(f"len(test_images) = {len(image_files)}")
print(f"len(test_labels) = {len(label_files)}")

Deletion process completed
len(test_images) = 394
len(test_labels) = 394


# Filter images using an improved algorithm from Homework_9

## Rewrite the algorithm from Homework_9

In [6]:
# Initialize the parameters
confThreshold = 0.5  # Confidence threshold
nmsThreshold = 0.4  # Non-maximum suppression threshold

inpWidth = 416  # 608     # Width of network's input image
inpHeight = 416  # 608     # Height of network's input image

# Load names of classes
classesFile = "yolo-license-plate-detection/model/classes.names"

classes = None
with open(classesFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# Give the configuration and weight files for the model and load the network using them.
modelConfiguration = "yolo-license-plate-detection/model/config/darknet-yolov3.cfg"
modelWeights = "yolo-license-plate-detection/model/weights/model.weights"

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [None]:
image_dir = 'data/1_selected_data/train/images'

output_dir = 'data/2_filtered_data/train/images'
output_plates_dir = 'data/2_filtered_data/train/plates'

if not os.path.isdir(image_dir):
    print("Input image dir ", image_dir, " doesn't exist")

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

title = ""
reader = easyocr.Reader(['en'])

# license_plates_dict = {'image_name': [], 'x': [], 'y': [], 'width': [], 'height': [] }
for image_name in [k for k in os.listdir(image_dir) if 'out_py' not in k]:
    license_plate = None
    image_path = os.path.join(image_dir, image_name)
    print(f"Image_path = {image_path}")
    
    output_img_path = os.path.join(output_dir, image_name)
    output_plate_path = os.path.join(output_plates_dir, image_name)
    
    image = cv2.imread(image_path)
    frame = image.copy()
    # Create a 4D blob from the frame
    blob = cv2.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False)
    
    # Set input to the network
    net.setInput(blob)

    # Run forward pass and get output
    outs = net.forward(getOutputsNames(net))

    # Remove bounding boxes with low confidence and draw predictions
    indices, boxes, classIds, confidences = postprocess(frame, outs)
    if len(indices) != 0:
        # Loop through detected boxes and find the license plate
        for i in indices:
            box = boxes[i]
            left, top, width, height = box[0], box[1], box[2], box[3]
            classId = classIds[i]
            confidence = confidences[i]
            # Check if the detected object is a license plate
            if classes[classId] == 'License Plate':
                print(f"left, top, width, height = {left, top, width, height}")
                # Extract the license plate region
                if left < 0:
                    left = 0
                license_plate = frame[top:top+height, left:left+width]
                break
        title = "YOLO"
    
    else:
        # OpenCV algorithm
        location = opencv_license_plates_localization(file_path=image_path, min_aspect_ratio=2)
        if location is not None:
            left, top, width, height = cv2.boundingRect(location)
            img = cv2.imread(image_path)
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Make the image gray
            # Extract the license plate from the original image
            mask = np.zeros(gray.shape, np.uint8) # create a blank mask
            new_image = cv2.drawContours(mask, [location], 0, 255, -1) # draw contours inside the mask image with location coordinates
            new_image = cv2.bitwise_and(img, img, mask=mask) # Applying masks on top of the original image
            (x, y) = np.where(mask==255)
            (x1, y1) = (np.min(x), np.min(y))
            (x2, y2) = (np.max(x), np.max(y))
            license_plate = img[x1:x2 + 1, y1:y2 + 1]
            title = "OpenCV"
    
    if license_plate is None:                               #if not isinstance(license_plate, np.ndarray):
        print(f"\nNO LICENSE PLATE DETECTED IN THIS IMAGE: {image_name}!!!\n")
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.title(image_name)
        plt.show()
        continue
    
    # Check if the detected object is a license plate by trying to read sone symbols on object using easyOCR
    license_plate = align_license_plate(license_plate.copy())
    result = reader.readtext(license_plate)
    if result != []:
        text = result[0][-2]
                    
        if text != '' and len(text) >= 3:
            """
            license_plates_dict['image_name'].append(image_name)
            license_plates_dict['x'].append(left)
            license_plates_dict['y'].append(top)
            license_plates_dict['width'].append(width)
            license_plates_dict['height'].append(height)
            """
            
            cv2.imwrite(output_img_path, image)
            cv2.imwrite(output_plate_path, license_plate)
            
            """
            # Display the original image, license plate, and license plate on the original image
            fig, axes = plt.subplots(1, 3, figsize=(12, 6))
            axes[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            axes[0].set_title('Original Image')
        
            axes[1].imshow(cv2.cvtColor(license_plate, cv2.COLOR_BGR2RGB))
            axes[1].set_title('License Plate')
            
            if title == 'YOLO':
                drawPred(classId, confidence, left, top, left + width, top + height)
            else:
                temp_image = img.copy()
                frame = cv2.rectangle(temp_image, tuple([left, top]), tuple([left + width, top + height]), (0, 255, 0), 3) # draw green rectangle
                
            axes[2].imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            axes[2].set_title(f"{title}: {text}")
        
            plt.show()
            """
        else:
            print(f"\nNO TEXT WAS DETECTED OR IT IS TOO SHORT ON THE SELECTED PART OF THE IMAGE: {image_name}!!!")
            print(f"\nTHE DETECTED TEXT IS: '{text}' AND ITS LENGTH IS {len(text)}.\n")
            plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            plt.title(image_name)
            plt.show()
    else:
        print(f"\nNO TEXT WAS DETECTED ON THE SELECTED PART OF THE IMAGE: {image_name}!!!\n")
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.title(image_name)
        plt.show()