In [1]:
from ultralytics import YOLO
import cv2
from sort.sort import *
import string
import easyocr
import torch


In [2]:
# Verify the GPU is available
print(f"GPU available: {torch.cuda.is_available()}")
print(f"Actual device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")

GPU available: True
Actual device: cuda


In [3]:
# Initialize an empty dictionay in which it will be saved the data
results = {}

# Load yolov8n for car detection, and the custom license plate detector
vehicle_detector = YOLO('yolov8n')
plate_detector = YOLO('./model/plate_detector.pt')

# Initialize the OCR reader
reader = easyocr.Reader(['en'], gpu=True)

#Initialize the motion tracker
mot_tracker = Sort()

# Load the video
video = cv2.VideoCapture('./video/cars.mp4')

total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

  return torch.load(file, map_location='cpu'), file  # load
  net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
  model.load_state_dict(torch.load(model_path, map_location=device))


In [4]:


# We know the format of license plates we are working with, so, we know in which positions we expect numbers or letters. For a OCR is complicated to
# distinguishe perfectly some pairs of characters, for example, letter S and number 5. With these two dictionaries, we map the corresponding 
# letter-number in positions in which we know is a missclassification

dict_char_to_int = {'O': '0',
                    'I': '1',
                    'J': '3',
                    'A': '4',
                    'G': '6',
                    'S': '5'}

dict_int_to_char = {'0': 'O',
                    '1': 'I',
                    '3': 'J',
                    '4': 'A',
                    '6': 'G',
                    '5': 'S'}


# This function save in a csv file the information obtained in the video for post-processing.

def write_csv(results, output_path):
    """
    Write the results to a CSV file.

    Args:
        results (dict): Dictionary containing the results.
        output_path (str): Path to the output CSV file.
    """
    with open(output_path, 'w') as f:
        f.write('{},{},{},{},{},{},{}\n'.format('frame_nmr', 'car_id', 'car_bbox',
                                                'license_plate_bbox', 'license_plate_bbox_score', 'license_number',
                                                'license_number_score'))

        for frame_nmr in results.keys():
            for car_id in results[frame_nmr].keys():
                print(results[frame_nmr][car_id])
                if 'car' in results[frame_nmr][car_id].keys() and \
                   'license_plate' in results[frame_nmr][car_id].keys() and \
                   'text' in results[frame_nmr][car_id]['license_plate'].keys():
                    f.write('{},{},{},{},{},{},{}\n'.format(frame_nmr,
                                                            car_id,
                                                            '[{} {} {} {}]'.format(
                                                                results[frame_nmr][car_id]['car']['bbox'][0],
                                                                results[frame_nmr][car_id]['car']['bbox'][1],
                                                                results[frame_nmr][car_id]['car']['bbox'][2],
                                                                results[frame_nmr][car_id]['car']['bbox'][3]),
                                                            '[{} {} {} {}]'.format(
                                                                results[frame_nmr][car_id]['license_plate']['bbox'][0],
                                                                results[frame_nmr][car_id]['license_plate']['bbox'][1],
                                                                results[frame_nmr][car_id]['license_plate']['bbox'][2],
                                                                results[frame_nmr][car_id]['license_plate']['bbox'][3]),
                                                            results[frame_nmr][car_id]['license_plate']['bbox_score'],
                                                            results[frame_nmr][car_id]['license_plate']['text'],
                                                            results[frame_nmr][car_id]['license_plate']['text_score'])
                            )
        f.close()



# This function verifies that the text obtained from the license plate has the  expected format AA77AAA (two letters, two numbers, three letter).
# It takes into account missclassifications explained previously
def license_complies_format(text):
    """
    Check if the license plate text complies with the required format.

    Args:
        text (str): License plate text.

    Returns:
        bool: True if the license plate complies with the format, False otherwise.
    """
    if len(text) != 7:
        return False

    if (text[0] in string.ascii_uppercase or text[0] in dict_int_to_char.keys()) and \
       (text[1] in string.ascii_uppercase or text[1] in dict_int_to_char.keys()) and \
       (text[2] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[2] in dict_char_to_int.keys()) and \
       (text[3] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[3] in dict_char_to_int.keys()) and \
       (text[4] in string.ascii_uppercase or text[4] in dict_int_to_char.keys()) and \
       (text[5] in string.ascii_uppercase or text[5] in dict_int_to_char.keys()) and \
       (text[6] in string.ascii_uppercase or text[6] in dict_int_to_char.keys()):
        return True
    else:
        return False


# This function gives the desired format to the text, eliminating spaces and correcting missclassifications letter-number
def format_license(text):
    """
    Format the license plate text by converting characters using the mapping dictionaries.

    Args:
        text (str): License plate text.

    Returns:
        str: Formatted license plate text.
    """
    license_plate_ = ''
    mapping = {0: dict_int_to_char, 1: dict_int_to_char, 4: dict_int_to_char, 5: dict_int_to_char, 6: dict_int_to_char,
               2: dict_char_to_int, 3: dict_char_to_int}
    for j in [0, 1, 2, 3, 4, 5, 6]:
        if text[j] in mapping[j].keys():
            license_plate_ += mapping[j][text[j]]
        else:
            license_plate_ += text[j]

    return license_plate_


# For every frame of the video, multiple cars are detected. This function finds the specific car a plate belongs to.
def get_car(plate, track_ids):
    x1, y1, x2, y2, score, class_id = plate
    foundIt = False
    for j in range(len(track_ids)):
        xcar1, ycar1, xcar2, ycar2, car_id = track_ids[j]
        if x1 > xcar1 and y1 > ycar1 and x2 < xcar2 and y2 < ycar2:
            car_index = j
            foundIt = True
            break
    if foundIt:
        return track_ids[car_index]
    return -1, -1, -1, -1, -1


# This function reads the image of the plate
def read_license_plate(plate_crop):
    detections = reader.readtext(plate_crop)
    for detection in detections:
        bbox, text, score = detection
        text = text.upper().replace(' ','')

        if license_complies_format(text):
            return format_license(text), score
    return None, None





    

In [5]:
# In the YOLO detector, these are the id's of a car, bike, bus and truck, respectively.
vehicle_ids = [2, 3, 5, 7]

frame_num = -1
ret = True

while ret:# and frame_num<200:
    frame_num += 1
    ret, frame = video.read()
    #ret is False when we are at the end of the video
    if ret:
        results[frame_num] = {}
        vehicles = vehicle_detector(frame)[0]
        vehicles_boxes = []
        
        for vehicle in vehicles.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = vehicle
            
            if int(class_id) in vehicle_ids:
                vehicles_boxes.append([x1, y1, x2, y2, score])
        track_ids = mot_tracker.update(np.asarray(vehicles_boxes))

        plates = plate_detector(frame)[0]
        
        for plate in plates.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = plate

            # Now we will assign a car to every license plate detected
            xcar1, ycar1, xcar2, ycar2, car_id = get_car(plate, track_ids)

            if car_id != -1:

                # Crop the license plate
                plate_crop = frame[int(y1):int(y2),int(x1):int(x2),:]

                #Process license plate
                plate_crop_gray = cv2.cvtColor(plate_crop, cv2.COLOR_BGR2GRAY)
                _, plate_crop_tresh = cv2.threshold(plate_crop_gray, 64, 255,cv2.THRESH_BINARY_INV)


                plate_text, plate_text_score = read_license_plate(plate_crop_gray)

                if plate_text is not None:
                    results[frame_num][car_id] = {'car': {'bbox': [xcar1, ycar1, xcar2, ycar2]},
                                                'license_plate': {'bbox': [x1, y1, x2, y2],
                                                                    'text': plate_text,
                                                                    'bbox_score': score,
                                                                    'text_score': plate_text_score}}

          
write_csv(results,'./info.csv');





0: 384x640 21 cars, 1 bus, 2 trucks, 138.2ms
Speed: 10.1ms preprocess, 138.2ms inference, 286.5ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 3 plates, 24.1ms
Speed: 0.0ms preprocess, 24.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 22 cars, 1 bus, 2 trucks, 24.1ms
Speed: 7.0ms preprocess, 24.1ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 3 plates, 24.1ms
Speed: 0.0ms preprocess, 24.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 23 cars, 1 bus, 2 trucks, 22.0ms
Speed: 7.0ms preprocess, 22.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 plates, 9.0ms
Speed: 8.0ms preprocess, 9.0ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 24 cars, 1 bus, 1 truck, 17.1ms
Speed: 7.0ms preprocess, 17.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 plates, 9.0ms
Speed: 8.0ms preprocess,

{'car': {'bbox': [752.2359008789062, 1369.70556640625, 1428.825927734375, 1982.8436279296875]}, 'license_plate': {'bbox': [941.2796630859375, 1759.831787109375, 1226.74951171875, 1869.125244140625], 'text': 'NA13NRU', 'bbox_score': 0.3996046483516693, 'text_score': 0.37268832079691244}}
{'car': {'bbox': [749.8249741339441, 1374.8580982738074, 1424.2456094762706, 1991.2738457249372]}, 'license_plate': {'bbox': [937.056640625, 1761.627685546875, 1235.5413818359375, 1874.8319091796875], 'text': 'NA13NRU', 'bbox_score': 0.287248432636261, 'text_score': 0.2691320637405827}}
{'car': {'bbox': [749.4789804213835, 1379.4000166102037, 1431.53463254117, 2000.3142846943651]}, 'license_plate': {'bbox': [947.8624877929688, 1789.257568359375, 1201.591796875, 1861.302978515625], 'text': 'NA13NRU', 'bbox_score': 0.6281622648239136, 'text_score': 0.4069816811850475}}
{'car': {'bbox': [742.0248962039786, 1385.7630673495541, 1429.7043639554558, 2011.3290988702147]}, 'license_plate': {'bbox': [943.37677001

In [6]:
import csv
import numpy as np
from scipy.interpolate import interp1d


def interpolate_bounding_boxes(data):
    # Extract necessary data columns from input data
    frame_numbers = np.array([int(row['frame_nmr']) for row in data])
    car_ids = np.array([int(float(row['car_id'])) for row in data])
    
    car_bboxes = np.array([list(map(float, row['car_bbox'][1:-1].split())) for row in data])
    license_plate_bboxes = np.array([list(map(float, row['license_plate_bbox'][1:-1].split())) for row in data])

    interpolated_data = []
    unique_car_ids = np.unique(car_ids)
    for car_id in unique_car_ids:

        frame_numbers_ = [p['frame_nmr'] for p in data if int(float(p['car_id'])) == int(float(car_id))]
        print(frame_numbers_, car_id)

        # Filter data for a specific car ID
        car_mask = car_ids == car_id
        car_frame_numbers = frame_numbers[car_mask]

        
        
        car_bboxes_interpolated = []
        license_plate_bboxes_interpolated = []

        first_frame_number = car_frame_numbers[0]
        last_frame_number = car_frame_numbers[-1]

        for i in range(len(car_bboxes[car_mask])):
            frame_number = car_frame_numbers[i]
            car_bbox = car_bboxes[car_mask][i]
            license_plate_bbox = license_plate_bboxes[car_mask][i]

            if i > 0:
                prev_frame_number = car_frame_numbers[i-1]
                prev_car_bbox = car_bboxes_interpolated[-1]
                prev_license_plate_bbox = license_plate_bboxes_interpolated[-1]

                if frame_number - prev_frame_number > 1:
                    # Interpolate missing frames' bounding boxes
                    frames_gap = frame_number - prev_frame_number
                    x = np.array([prev_frame_number, frame_number])
                    x_new = np.linspace(prev_frame_number, frame_number, num=frames_gap, endpoint=False)
                    interp_func = interp1d(x, np.vstack((prev_car_bbox, car_bbox)), axis=0, kind='linear')
                    interpolated_car_bboxes = interp_func(x_new)
                    interp_func = interp1d(x, np.vstack((prev_license_plate_bbox, license_plate_bbox)), axis=0, kind='linear')
                    interpolated_license_plate_bboxes = interp_func(x_new)

                    car_bboxes_interpolated.extend(interpolated_car_bboxes[1:])
                    license_plate_bboxes_interpolated.extend(interpolated_license_plate_bboxes[1:])

            car_bboxes_interpolated.append(car_bbox)
            license_plate_bboxes_interpolated.append(license_plate_bbox)

        for i in range(len(car_bboxes_interpolated)):
            frame_number = first_frame_number + i
            row = {}
            row['frame_nmr'] = str(frame_number)
            row['car_id'] = str(car_id)
            row['car_bbox'] = ' '.join(map(str, car_bboxes_interpolated[i]))
            row['license_plate_bbox'] = ' '.join(map(str, license_plate_bboxes_interpolated[i]))

            if str(frame_number) not in frame_numbers_:
                # Imputed row, set the following fields to '0'
                row['license_plate_bbox_score'] = '0'
                row['license_number'] = '0'
                row['license_number_score'] = '0'
            else:
                # Original row, retrieve values from the input data if available
                original_row = [p for p in data if int(p['frame_nmr']) == frame_number and int(float(p['car_id'])) == int(float(car_id))][0]
                row['license_plate_bbox_score'] = original_row['license_plate_bbox_score'] if 'license_plate_bbox_score' in original_row else '0'
                row['license_number'] = original_row['license_number'] if 'license_number' in original_row else '0'
                row['license_number_score'] = original_row['license_number_score'] if 'license_number_score' in original_row else '0'

            interpolated_data.append(row)

    return interpolated_data


# Load the CSV file
with open('info.csv', 'r') as file:
    reader = csv.DictReader(file)
    data = list(reader)

# Interpolate missing data
interpolated_data = interpolate_bounding_boxes(data)

# Write updated data to a new CSV file
header = ['frame_nmr', 'car_id', 'car_bbox', 'license_plate_bbox', 'license_plate_bbox_score', 'license_number', 'license_number_score']
with open('info_interpolated.csv', 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=header)
    writer.writeheader()
    writer.writerows(interpolated_data)

['94', '95', '102', '104', '106', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '123', '124', '125', '126', '128', '129', '131', '132', '133', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '152', '155', '157', '161', '163', '165', '173', '174', '179', '182', '183', '185', '186', '187', '188', '189', '190', '191', '192', '193', '196', '197', '198', '199', '200', '201', '203', '204', '205', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218'] 1
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '13', '14', '15', '16', '17', '18', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '32', '33'] 2
['4', '13', '23', '29', '41', '42', '43', '44', '59', '61', '66', '69'] 5
['23', '25', '28', '29', '30', '32', '33', '34', '35', '36', '38', '39', '40', '41', '42', '44', '45', '46', '47', '48', '49', '50', '51', '52', '54', '55', '56', '57', '59', '60', '61', '62',

In [7]:
import ast

import cv2
import numpy as np
import pandas as pd


def draw_border(img, top_left, bottom_right, color=(0, 255, 0), thickness=10, line_length_x=200, line_length_y=200):
    x1, y1 = top_left
    x2, y2 = bottom_right

    cv2.line(img, (x1, y1), (x1, y1 + line_length_y), color, thickness)  #-- top-left
    cv2.line(img, (x1, y1), (x1 + line_length_x, y1), color, thickness)

    cv2.line(img, (x1, y2), (x1, y2 - line_length_y), color, thickness)  #-- bottom-left
    cv2.line(img, (x1, y2), (x1 + line_length_x, y2), color, thickness)

    cv2.line(img, (x2, y1), (x2 - line_length_x, y1), color, thickness)  #-- top-right
    cv2.line(img, (x2, y1), (x2, y1 + line_length_y), color, thickness)

    cv2.line(img, (x2, y2), (x2, y2 - line_length_y), color, thickness)  #-- bottom-right
    cv2.line(img, (x2, y2), (x2 - line_length_x, y2), color, thickness)

    return img


results = pd.read_csv('info_interpolated.csv')

#print(results.columns)

# load video
video_path = './video/cars.mp4'
cap = cv2.VideoCapture(video_path)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Specify the codec
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('./video/cars_processed.mp4', fourcc, fps, (width, height))

license_plate = {}
for car_id in np.unique(results['car_id']):
    max_ = np.amax(results[results['car_id'] == car_id]['license_number_score'])
    license_plate[car_id] = {'license_crop': None,
                             'license_plate_number': results[(results['car_id'] == car_id) &
                                                             (results['license_number_score'] == max_)]['license_number'].iloc[0]}
    cap.set(cv2.CAP_PROP_POS_FRAMES, results[(results['car_id'] == car_id) &
                                             (results['license_number_score'] == max_)]['frame_nmr'].iloc[0])
    ret, frame = cap.read()

    x1, y1, x2, y2 = ast.literal_eval(results[(results['car_id'] == car_id) &
                                              (results['license_number_score'] == max_)]['license_plate_bbox'].iloc[0].replace('[ ', '[').replace('   ', ' ').replace('  ', ' ').replace(' ', ','))

    license_crop = frame[int(y1):int(y2), int(x1):int(x2), :]
    license_crop = cv2.resize(license_crop, (int((x2 - x1) * 400 / (y2 - y1)), 400))

    license_plate[car_id]['license_crop'] = license_crop


frame_nmr = -1

cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# read frames
ret = True
while ret:# and frame_nmr < 50:
    ret, frame = cap.read()
    frame_nmr += 1
    
    if ret:
        df_ = results[results['frame_nmr'] == frame_nmr]
        for row_indx in range(len(df_)):
            # draw car
            car_x1, car_y1, car_x2, car_y2 = ast.literal_eval(df_.iloc[row_indx]['car_bbox'].replace('[ ', '[').replace('   ', ' ').replace('  ', ' ').replace(' ', ','))
            draw_border(frame, (int(car_x1), int(car_y1)), (int(car_x2), int(car_y2)), (0, 255, 0), 25,line_length_x=200, line_length_y=200)

            #cv2.rectangle(frame, (int(car_x1), int(car_y1)), (int(car_x2), int(car_y2)), (0, 255, 0), 12)

            # draw license plate
            x1, y1, x2, y2 = ast.literal_eval(df_.iloc[row_indx]['license_plate_bbox'].replace('[ ', '[').replace('   ', ' ').replace('  ', ' ').replace(' ', ','))
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 12)

            # crop license plate
            license_crop = license_plate[df_.iloc[row_indx]['car_id']]['license_crop']

            H, W, _ = license_crop.shape

            try:
                frame[int(car_y1) - H - 100:int(car_y1) - 100,
                      int((car_x2 + car_x1 - W) / 2):int((car_x2 + car_x1 + W) / 2), :] = license_crop

                frame[int(car_y1) - H - 400:int(car_y1) - H - 100,
                      int((car_x2 + car_x1 - W) / 2):int((car_x2 + car_x1 + W) / 2), :] = (255, 255, 255)

                (text_width, text_height), _ = cv2.getTextSize(
                    license_plate[df_.iloc[row_indx]['car_id']]['license_plate_number'],
                    cv2.FONT_HERSHEY_SIMPLEX,
                    4.3,
                    17)

                cv2.putText(frame,
                            license_plate[df_.iloc[row_indx]['car_id']]['license_plate_number'],#+"-"+str(df_.iloc[row_indx]['car_id']),
                            (int((car_x2 + car_x1 - text_width) / 2), int(car_y1 - H - 250 + (text_height / 2))),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            4.3,
                            (0, 0, 0),
                            17)

            except:
                pass

        out.write(frame)
        frame = cv2.resize(frame, (1280, 720))

print("Complete")

out.release()
cap.release()

Complete
