In [1]:
from ultralytics import YOLO
import cv2
import csv
import numpy as np
from sort.sort import *

# Your custom utility functions
from util import get_car, read_license_plate, write_csv

# Initialize variables
results = {}
mot_tracker = Sort()
coco_model = YOLO('yolov8m.pt')
license_plate_detector = YOLO('/Users/malik/Downloads/license_plate_detector.pt')

# Change this line to use the default camera
cap = cv2.VideoCapture(0)

# Initialize CSV writer
csv_file = open('output.csv', 'w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['frame_nmr', 'car_id', 'car_bbox', 'license_bbox', 'license_text', 'license_bbox_score', 'license_text_score'])

# Main loop
vehicles = [2, 3, 5, 7]
frame_nmr = -1
ret = True

while ret:
    frame_nmr += 1
    ret, frame = cap.read()
    if ret:
        results[frame_nmr] = {}
        detections = coco_model(frame)[0]
        detections_ = [[x1, y1, x2, y2, score] for x1, y1, x2, y2, score, class_id in detections.boxes.data.tolist() if int(class_id) in vehicles]
        track_ids = mot_tracker.update(np.array(detections_))
        license_plates = license_plate_detector(frame)[0]
        
        for license_plate in license_plates.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = license_plate
            xcar1, ycar1, xcar2, ycar2, car_id = get_car(license_plate, track_ids)
            if car_id != -1:
                license_plate_crop = frame[int(y1):int(y2), int(x1): int(x2), :]
                license_plate_text, license_plate_text_score = read_license_plate(license_plate_crop)
                
                if license_plate_text is not None:
                    results[frame_nmr][car_id] = {'car': {'bbox': [xcar1, ycar1, xcar2, ycar2]},
                                                  'license_plate': {'bbox': [x1, y1, x2, y2],
                                                                    'text': license_plate_text,
                                                                    'bbox_score': score,
                                                                    'text_score': license_plate_text_score}}
                    
                    # Write to CSV
                    csv_writer.writerow([frame_nmr, car_id, [xcar1, ycar1, xcar2, ycar2], [x1, y1, x2, y2], license_plate_text, score, license_plate_text_score])
                    
                    # Draw bounding boxes and text on the frame
                    cv2.rectangle(frame, (int(xcar1), int(ycar1)), (int(xcar2), int(ycar2)), (0, 255, 0), 2)
                    cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
                    cv2.putText(frame, license_plate_text, (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

        # Show the frame in a window
        cv2.imshow('Real-time License Plate Recognition', frame)
        
        # Press 'q' to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release video and close CSV
cap.release()
csv_file.close()
cv2.destroyAllWindows()


Using CPU. Note: This module is much faster with a GPU.

0: 384x640 1 person, 276.1ms
Speed: 8.2ms preprocess, 276.1ms inference, 12.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 61.8ms
Speed: 2.3ms preprocess, 61.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 bottles, 1 chair, 1 couch, 252.1ms
Speed: 1.9ms preprocess, 252.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 57.5ms
Speed: 1.7ms preprocess, 57.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 bottles, 1 chair, 1 couch, 1 bed, 216.6ms
Speed: 1.3ms preprocess, 216.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 47.3ms
Speed: 1.5ms preprocess, 47.3ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 bottles, 1 chair, 1 couch, 218.8ms
Speed: 1.5ms preprocess, 218.8ms inf

Speed: 1.7ms preprocess, 42.2ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 bottles, 211.8ms
Speed: 1.2ms preprocess, 211.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 40.7ms
Speed: 1.5ms preprocess, 40.7ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 bottles, 209.3ms
Speed: 1.2ms preprocess, 209.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 44.3ms
Speed: 1.8ms preprocess, 44.3ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 bottles, 219.4ms
Speed: 1.3ms preprocess, 219.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 44.2ms
Speed: 1.5ms preprocess, 44.2ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 bottles, 202.7ms
Speed: 1.3ms preprocess, 202.7ms inference, 0.

KeyboardInterrupt: 