Imports

In [1]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

CUDA Available: True
GPU Name: NVIDIA GeForce RTX 3050 Laptop GPU


In [2]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import random
import matplotlib.pyplot as plt

In [3]:
import cv2
from ultralytics import YOLO
from sort.sort import Sort
import easyocr

In [4]:
from datasets import Dataset
from transformers import TrOCRProcessor
from transformers import VisionEncoderDecoderModel
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Loading pretrained YOLOv8 model
model = YOLO('yolov8n.pt') 

# Testing on an image
results = model("D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\Screenshot 2025-07-08 162015.png")
results[0].show()


image 1/1 D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\Screenshot 2025-07-08 162015.png: 384x640 19 cars, 3 trucks, 83.1ms
Speed: 6.5ms preprocess, 83.1ms inference, 193.1ms postprocess per image at shape (1, 3, 384, 640)


In [9]:
video_path = "data/your_video.mp4"
cap = cv2.VideoCapture(r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\videos\2103099-uhd_3840_2160_30fps.mp4")

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Running YOLOv8 detection
    results = model(frame)

    # Plotting results on frame
    annotated_frame = results[0].plot()

    # Displaying the frame
    display_frame = cv2.resize(annotated_frame, (960, 540))
    cv2.imshow("YOLOv8 Detection", display_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 21 cars, 1 bus, 2 trucks, 37.8ms
Speed: 6.0ms preprocess, 37.8ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 23 cars, 1 bus, 2 trucks, 37.4ms
Speed: 5.0ms preprocess, 37.4ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 cars, 1 bus, 2 trucks, 37.5ms
Speed: 4.5ms preprocess, 37.5ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 23 cars, 1 bus, 2 trucks, 37.2ms
Speed: 4.9ms preprocess, 37.2ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)


In [17]:
tracker = Sort()

cap = cv2.VideoCapture(r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\videos\2103099-uhd_3840_2160_30fps.mp4")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)

    # Parseing detections for SORT: (x1,y1,x2,y2,score)
    dets = []
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            conf = box.conf[0].cpu().numpy()
            dets.append([x1, y1, x2, y2, conf])

    dets = np.array(dets)
    
    tracks = tracker.update(dets)

    # Drawing tracked objects
    for track in tracks:
        x1, y1, x2, y2, track_id = track
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
        cv2.putText(frame, f'ID: {int(track_id)}', (x1, y1-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)

    display_frame = cv2.resize(frame, (960, 540))
    cv2.imshow("YOLOv8 + SORT Tracking", display_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 21 cars, 1 bus, 2 trucks, 37.6ms
Speed: 5.8ms preprocess, 37.6ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 23 cars, 1 bus, 2 trucks, 37.2ms
Speed: 4.9ms preprocess, 37.2ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 cars, 1 bus, 2 trucks, 37.5ms
Speed: 4.5ms preprocess, 37.5ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 23 cars, 1 bus, 2 trucks, 36.9ms
Speed: 4.7ms preprocess, 36.9ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 24 cars, 1 bus, 2 trucks, 36.8ms
Speed: 4.7ms preprocess, 36.8ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 cars, 1 bus, 2 trucks, 36.8ms
Speed: 4.3ms preprocess, 36.8ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)


Speed Estimation
- assumptions :
  - fixed camera angle (no moving camera)
  - Speed is estimated relatively unless calibrated with real-world scale (e.g. distance per pixel)
- Implementation Strategy :
  - Storing previous positions of each tracked ID
  - Calculating pixel distance moved per frame
  - Convert to real-world speed using:
     - Speed (km/h) = pixel distance × scale factor × fps × 3.6 / 1000
  - scale factor: meters per pixel (needs calibration)

In [27]:
tracker = Sort()
plate_model = YOLO("path_to_plate_detection_model.pt")
ocr_reader = easyocr.Reader(['en'])

cap = cv2.VideoCapture(r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\videos\2103099-uhd_3840_2160_30fps.mp4")

prev_positions = {}
fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 30  # fallback
meters_per_pixel = 0.05  # example scale factor
speed_history = {}  # To store recent speeds per track
history_len = 5     # Number of frames to average over

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)

    # Parsing detections for SORT: (x1,y1,x2,y2,score)
    dets = []
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            conf = box.conf[0].cpu().numpy()
            dets.append([x1, y1, x2, y2, conf])

    dets = np.array(dets)
    
    tracks = tracker.update(dets)

    for track in tracks:
        x1, y1, x2, y2, track_id = track
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        
        center_x = (x1 + x2) / 2
        center_y = (y1 + y2) / 2
        
        speed = 0.0
        
        if track_id in prev_positions:
            prev_x, prev_y = prev_positions[track_id]
            pixel_distance = np.sqrt((center_x - prev_x)**2 + (center_y - prev_y)**2)
            instant_speed = pixel_distance * meters_per_pixel * fps * 3.6
            
            # Update speed history
            if track_id not in speed_history:
                speed_history[track_id] = []
            speed_history[track_id].append(instant_speed)
            
            # Keep only recent N values
            if len(speed_history[track_id]) > history_len:
                speed_history[track_id].pop(0)
            
            # Calculate average speed
            speed = np.mean(speed_history[track_id])
        
        prev_positions[track_id] = (center_x, center_y)
        
        # Draw bounding box, ID, and smoothed speed
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
        cv2.putText(frame, f'ID: {int(track_id)}', (x1, y1-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
        cv2.putText(frame, f'Speed: {speed:.1f} km/h', (x1, y2+20),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,0,0), 2)
    
        
        # Updating previous position
        prev_positions[track_id] = (center_x, center_y)
        
        # Drawing bounding box and ID
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
        cv2.putText(frame, f'ID: {int(track_id)}', (x1, y1-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)

        # Cropping vehicle region
        vehicle_crop = frame[y1:y2, x1:x2]
        
        # Detecting number plate within vehicle
        results = plate_model(vehicle_crop)
        plates = results[0].boxes.xyxy.cpu().numpy()
        
        for plate in plates:
            px1, py1, px2, py2 = map(int, plate)
            plate_crop = vehicle_crop[py1:py2, px1:px2]
            
            # OCR
            ocr_result = ocr_reader.readtext(plate_crop)
            plate_text = ocr_result[0][1] if ocr_result else "Unknown"
            
            # Draw plate box and text
            cv2.rectangle(vehicle_crop, (px1, py1), (px2, py2), (0,0,255), 2)
            cv2.putText(frame, f'Plate: {plate_text}', (x1, y2+40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)

    display_frame = cv2.resize(frame, (960, 540))
    cv2.imshow("YOLOv8 + SORT Tracking", display_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

FileNotFoundError: [Errno 2] No such file or directory: 'path_to_plate_detection_model.pt'

Checking Dataset

In [6]:
csv_path = r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\groundtruth.csv"
images_dir = r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\usimages"

df = pd.read_csv(csv_path, header=None, names=['image_name', 'info', 'label'])

print("Sample entries:\n", df.head())

# Checking missing images
missing = []
for img_name in df['image_name']:
    img_path = os.path.join(images_dir, img_name)
    if not os.path.exists(img_path):
        missing.append(img_name)

if missing:
    print(f"Missing {len(missing)} images:", missing[:5])
else:
    print("All images found")

samples = df.sample(5)
for idx, row in samples.iterrows():
    img_path = os.path.join(images_dir, row['image_name'])
    img = Image.open(img_path)
    plt.imshow(img)
    plt.title(f"Label: {row['label']}")
    plt.axis('off')
    plt.show()

Sample entries:
    image_name info    label
0  ak1165.png   ak   FUW999
1   ak399.png   ak   FGJ235
2   ak721.png   ak   FHG521
3   ak848.png   ak   FPJ331
4  al1156.png   al  6A5730A
All images found


EasyOCR Baseline 

In [7]:
reader = easyocr.Reader(['en'], gpu=True)

# Evaluating on a few samples
correct = 0
total = 10  

samples = df.sample(total)

for idx, row in samples.iterrows():
    img_path = os.path.join(images_dir, row['image_name'])
    
    img_bgr = cv2.imread(img_path)
    img_gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    result = reader.readtext(img_gray, detail=0)
    
    print(f"GT: {row['label']}, Prediction: {result}")
    
    # Check if GT label is in prediction list (basic exact match logic)
    if any(row['label'] in pred for pred in result):
        correct += 1

print(f"EasyOCR baseline accuracy on {total} samples: {correct/total*100:.2f}%")

GT: XSTASY, Prediction: ['WISCONSIN', 'XSTASY', "America's Dairyland"]
GT: BJL833, Prediction: ["'GEORGIA:gov", 'BJL6833', 'TDEC', 'JACKSON']
GT: 845CTC, Prediction: ['EXPLORE', 'Minnesota comn', '845-CTC', 'JUD -', '10.000 lakes', '10']
GT: ZT714, Prediction: ['Rhode Island -', 'ZT-714', 'Ocean State', '@cU']
GT: 47A8D4C, Prediction: ['Stars Fell On', '47A8D4C', 'Nov]', 'Alabama', '20062']
GT: 326XLH, Prediction: ['IowA', '326 XLH', 'POTTAWATTAMIE']
GT: PLTFRM, Prediction: ['Sllinois', 'PLT FRM']
GT: 0JW695, Prediction: ['Webrska', 'I1o}', 'OJK 695']
GT: MNW235, Prediction: ['SuncIDAHO', 'MNR 235', 'FAMOUS POTATOE', '08 !']
GT: FAE6137, Prediction: ['NEW YORK', 'FAE.6137', 'EMPIRE STATE']
EasyOCR baseline accuracy on 10 samples: 20.00%


TrOCR Fine-tuning

In [8]:
df = pd.read_csv('D:/projects/Automated Vehicle Speed & Number Plate Detection System/data/groundtruth.csv', names=['image_name', 'info', 'label'])

df = df[['image_name', 'label']]

# Adding full image path column
images_dir = 'D:/projects/Automated Vehicle Speed & Number Plate Detection System/data/usimages'
df['image_path'] = df['image_name'].apply(lambda x: os.path.join(images_dir, x))

# Converting to Hugging Face Dataset
dataset = Dataset.from_pandas(df)

In [9]:
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [10]:
def preprocess(examples):
    images = [Image.open(path).convert("RGB") for path in examples['image_path']]
    pixel_values = processor(images=images, return_tensors="pt").pixel_values
    labels = processor.tokenizer(examples['label'], padding="max_length", max_length=16, truncation=True).input_ids
    return {"pixel_values": pixel_values.squeeze(), "labels": torch.tensor(labels)}

In [11]:
dataset = dataset.map(preprocess, batched=True, remove_columns=['image_name', 'image_path', 'label'])

Map: 100%|████████████████████████████████████████████████████████████████████| 751/751 [00:13<00:00, 55.12 examples/s]


In [12]:
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
model.to('cuda' if torch.cuda.is_available() else 'cpu')

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VisionEncoderDecoderModel(
  (encoder): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=False)
              (key): Linear(in_features=768, out_features=768, bias=False)
              (value): Linear(in_features=768, out_features=768, bias=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (i

In [14]:
model.config.decoder_start_token_id = processor.tokenizer.bos_token_id
model.config.pad_token_id = processor.tokenizer.pad_token_id
model.config.vocab_size = model.config.decoder.vocab_size

In [15]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./trocr_numberplate_finetuned",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=500
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

trainer.train()

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
10,4.1919
20,1.1769
30,1.1229
40,0.9715
50,0.9829
60,0.9048
70,0.8146
80,0.7996
90,0.7666
100,0.7835


TrainOutput(global_step=564, training_loss=0.5393171388629481, metrics={'train_runtime': 7345.8243, 'train_samples_per_second': 0.307, 'train_steps_per_second': 0.077, 'total_flos': 1.685886535313916e+18, 'train_loss': 0.5393171388629481, 'epoch': 3.0})

In [17]:
model.save_pretrained('./trocr_numberplate_finetuned')
processor.save_pretrained('./trocr_numberplate_finetuned')

[]

In [18]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Loading fine-tuned model
model = VisionEncoderDecoderModel.from_pretrained('./trocr_numberplate_finetuned').to(device)
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')

# Evaluating on 10 random samples
model.eval()
correct = 0
total = 10

samples = df.sample(total)

for idx, row in samples.iterrows():
    img_path = os.path.join(images_dir, row['image_name'])
    image = Image.open(img_path).convert("RGB")
    
    pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
    
    # Prediction
    generated_ids = model.generate(pixel_values)
    prediction = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    print(f"GT: {row['label']}, Prediction: {prediction}")
    
    # Checking if prediction matches GT
    if row['label'] in prediction.replace(" ", "").replace("-", ""):
        correct += 1

print(f"TrOCR fine-tuned model accuracy on {total} samples: {correct/total*100:.2f}%")

GT: UAA47C, Prediction: UU47
GT: BT000, Prediction: BT
GT: 615KPI, Prediction: 615PIPI
GT: 547KPE, Prediction: 555547PE
GT: 0, Prediction: 0
GT: 704BBJ, Prediction: 704BB
GT: G0LFING, Prediction: GG0LING
GT: KAB277, Prediction: KK277
GT: DIS6847, Prediction: DIS47
GT: 299YX, Prediction: 299X
TrOCR fine-tuned model accuracy on 10 samples: 10.00%


In [20]:
from fuzzywuzzy import fuzz

threshold = 80  
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Loading fine-tuned model
model = VisionEncoderDecoderModel.from_pretrained('./trocr_numberplate_finetuned').to(device)
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')

# Evaluating on 10 random samples
model.eval()
correct = 0
total = 10

samples = df.sample(total)

for idx, row in samples.iterrows():
    img_path = os.path.join(images_dir, row['image_name'])
    image = Image.open(img_path).convert("RGB")
    
    pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
    
    # Prediction
    generated_ids = model.generate(pixel_values)
    prediction = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    print(f"GT: {row['label']}, Prediction: {prediction}")
    
    # Checking if prediction matches GT
    if row['label'] in prediction.replace(" ", "").replace("-", ""):
        correct += 1

    score = fuzz.ratio(row['label'], prediction.replace(" ", "").replace("-", ""))
    print(f"GT: {row['label']}, Prediction: {prediction}, Similarity: {score}%")
    
    if score >= threshold:
        correct += 1
        
print(f"TrOCR fine-tuned model accuracy on {total} samples: {correct/total*100:.2f}%")
print(f"Fuzzy match accuracy on {total} samples: {correct/total*100:.2f}%")

GT: 696PAI, Prediction: 696PA
GT: 696PAI, Prediction: 696PA, Similarity: 91%
GT: A000956, Prediction: AA956
GT: A000956, Prediction: AA956, Similarity: 67%
GT: NRM331, Prediction: NR331
GT: NRM331, Prediction: NR331, Similarity: 91%
GT: 713731, Prediction: 7137
GT: 713731, Prediction: 7137, Similarity: 80%
GT: W17K, Prediction: WWK
GT: W17K, Prediction: WWK, Similarity: 57%
GT: CX4887, Prediction: CC48
GT: CX4887, Prediction: CC48, Similarity: 60%
GT: HNB773, Prediction: HH773
GT: HNB773, Prediction: HH773, Similarity: 73%
GT: 1410502, Prediction: 14502
GT: 1410502, Prediction: 14502, Similarity: 83%
GT: 5415791, Prediction: 5415191
GT: 5415791, Prediction: 5415191, Similarity: 86%
GT: 1WG011, Prediction: 11WG
GT: 1WG011, Prediction: 11WG, Similarity: 60%
TrOCR fine-tuned model accuracy on 10 samples: 50.00%
Fuzzy match accuracy on 10 samples: 50.00%


In [21]:
processor = TrOCRProcessor.from_pretrained('./trocr_numberplate_finetuned')
model = VisionEncoderDecoderModel.from_pretrained('./trocr_numberplate_finetuned').to('cuda' if torch.cuda.is_available() else 'cpu')

def read_number_plate(image_path):
    image = Image.open(image_path).convert('RGB')
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    pixel_values = pixel_values.to(model.device)

    generated_ids = model.generate(pixel_values)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_text

sample_img = "D:/projects/Automated Vehicle Speed & Number Plate Detection System/data/usimages/ak1165.png"

predicted_plate = read_number_plate(sample_img)
print("Predicted number plate:", predicted_plate)

Predicted number plate: FU999


Dataset for number plate detection fine-tuning

In [22]:
data_dir = r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data"
output_labels_dir = os.path.join(data_dir, "labels_yolo")
os.makedirs(output_labels_dir, exist_ok=True)

# Process each txt label file
for file in os.listdir(data_dir):
    if file.endswith(".txt"):
        txt_path = os.path.join(data_dir, file)
        
        with open(txt_path, "r") as f:
            line = f.readline().strip()
            parts = line.split("\t")
            img_name, x, y, w, h, plate = parts
            
            # Read image to get dimensions
            img_path = os.path.join(data_dir, img_name)
            img = cv2.imread(img_path)
            img_h, img_w = img.shape[:2]
            
            # Converting to YOLO format
            x, y, w, h = map(float, [x, y, w, h])
            x_center = (x + w/2) / img_w
            y_center = (y + h/2) / img_h
            w_norm = w / img_w
            h_norm = h / img_h
            
            yolo_line = f"0 {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}\n"
            
            # Writing to new label file (same filename but in output dir)
            out_label_path = os.path.join(output_labels_dir, file)
            with open(out_label_path, "w") as out_f:
                out_f.write(yolo_line)

print("Conversion to YOLO format completed")

Conversion to YOLO format completed


In [23]:
import shutil

In [24]:
labels_dir = os.path.join(data_dir, "labels_yolo")

output_base = os.path.join(data_dir, "yolo_dataset")
images_train_dir = os.path.join(output_base, "images", "train")
images_val_dir = os.path.join(output_base, "images", "val")
labels_train_dir = os.path.join(output_base, "labels", "train")
labels_val_dir = os.path.join(output_base, "labels", "val")

# Creating directories
for d in [images_train_dir, images_val_dir, labels_train_dir, labels_val_dir]:
    os.makedirs(d, exist_ok=True)

# Getting all image names
all_images = [f for f in os.listdir(data_dir) if f.endswith(".jpg")]

# Shuffle and split
random.shuffle(all_images)
split_idx = int(len(all_images) * 0.9)
train_imgs = all_images[:split_idx]
val_imgs = all_images[split_idx:]

def move_files(img_list, img_dest, label_dest):
    for img_name in img_list:
        # Move image
        shutil.copy(os.path.join(data_dir, img_name), os.path.join(img_dest, img_name))
        
        # Move label
        label_name = img_name.replace(".jpg", ".txt")
        shutil.copy(os.path.join(labels_dir, label_name), os.path.join(label_dest, label_name))

# Moving train and val data
move_files(train_imgs, images_train_dir, labels_train_dir)
move_files(val_imgs, images_val_dir, labels_val_dir)

print(f"Dataset organized: {len(train_imgs)} train images, {len(val_imgs)} val images")

Dataset organized: 199 train images, 23 val images


In [None]:
model = YOLO('yolov8n.yaml')

# Training the model
results = model.train(
    data=r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\yolo_dataset\data.yaml",
    epochs=50,
    imgsz=640,
    batch=16,
    device=0 
)


model.save("number_plate_detector_yolov8n.pt")

New https://pypi.org/project/ultralytics/8.3.163 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.162  Python-3.11.0 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\yolo_dataset\data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=tr

100%|███████████████████████████████████████████████████████████████████████████████| 755k/755k [00:00<00:00, 1.38MB/s]

Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics




 22        [15, 18, 21]  1    751507  ultralytics.nn.modules.head.Detect           [1, [64, 128, 256]]           
YOLOv8n summary: 129 layers, 3,011,043 parameters, 3,011,027 gradients, 8.2 GFLOPs

Freezing layer 'model.22.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...


100%|█████████████████████████████████████████████████████████████████████████████| 5.35M/5.35M [00:01<00:00, 3.36MB/s]


[34m[1mAMP: [0mchecks passed 
[34m[1mtrain: [0mFast image access  (ping: 0.10.1 ms, read: 156.5146.4 MB/s, size: 90.9 KB)


[34m[1mtrain: [0mScanning D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\yol[0m

[34m[1mtrain: [0mNew cache created: D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\yolo_dataset\labels\train.cache





In [7]:
model = YOLO('yolov8n.yaml')

results = model.train(
    data=r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\yolo_dataset\data.yaml",  
    epochs=50,       
    imgsz=640,       
    batch=4,         
    device=0,       
    workers=0,      
    optimizer='AdamW',  
    verbose=True
)

model.save("number_plate_detector_yolov8n.pt")

New https://pypi.org/project/ultralytics/8.3.163 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.162  Python-3.11.0 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\yolo_dataset\data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=tra

[34m[1mtrain: [0mScanning D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\yol[0m

[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 604.9298.6 MB/s, size: 79.6 KB)



[34m[1mval: [0mScanning D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\yolo_[0m

Plotting labels to runs\detect\train3\labels.jpg... 





[34m[1moptimizer:[0m AdamW(lr=0.01, momentum=0.937) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train3[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50     0.646G      4.802      7.408       3.72          5        640: 100%|██████████| 50/50 [00:09<00:00,  5.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50     0.646G      3.812      3.838      2.462          3        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50     0.646G      3.273      3.633      2.341          4        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23    0.00234     0.0435    0.00124   0.000494






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50     0.646G      3.345      3.419      2.168          4        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.104      0.087     0.0268     0.0115






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50     0.646G      3.007      3.185      2.014          3        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23    0.00972      0.217     0.0133    0.00365






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50     0.646G      2.848      2.964      1.846          5        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.142       0.13     0.0906      0.039






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50     0.646G      2.717      2.675      1.916          4        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23   0.000276     0.0435   8.94e-05   5.35e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50     0.646G      2.595      2.508      1.692          8        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.573      0.261      0.301      0.134






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/50     0.646G      2.429      2.275      1.682          4        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.576      0.087     0.0892     0.0464






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/50     0.646G       2.32      2.117      1.632          4        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.724      0.348      0.386      0.163






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/50     0.646G      2.367      2.071      1.595          6        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.237      0.217      0.174     0.0683






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/50     0.646G      2.281      2.002      1.607          9        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23       0.41      0.609      0.471      0.212






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/50     0.646G      2.156       1.86      1.498          7        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.492      0.391      0.445      0.241






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/50     0.646G      2.062      1.723      1.495          3        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.732      0.239       0.39      0.213






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/50     0.646G          2      1.736      1.458          3        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23       0.69      0.522      0.539      0.265






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/50     0.646G      1.947      1.679      1.456          5        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.559      0.565      0.595      0.371






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/50     0.646G      1.895      1.598      1.427          5        640: 100%|██████████| 50/50 [00:10<00:00,  4.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23       0.52      0.304      0.335      0.147






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/50     0.646G       2.07      1.767      1.493          9        640: 100%|██████████| 50/50 [00:08<00:00,  5.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.874      0.565       0.68      0.361






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/50     0.646G      1.853      1.521      1.392          5        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23          1      0.608      0.777      0.468






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/50     0.646G      1.792      1.485      1.405          3        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.631      0.609      0.585      0.308






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/50     0.646G      1.789      1.474      1.367          7        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.891      0.609      0.713      0.444






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/50     0.646G       1.75      1.424      1.361          6        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.792      0.652      0.697      0.416






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/50     0.646G      1.669      1.433      1.338          9        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.558      0.348        0.4      0.255






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/50     0.646G      1.686      1.302      1.283          5        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.903      0.696      0.835      0.505






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/50     0.646G      1.707      1.333      1.364          6        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.881      0.652       0.79      0.473






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/50     0.646G       1.62      1.256      1.255          6        640: 100%|██████████| 50/50 [00:09<00:00,  5.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.876      0.612      0.771      0.452






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/50     0.646G      1.519      1.197      1.219          8        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23          1      0.641      0.773      0.436






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/50     0.646G      1.663      1.211       1.25          9        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23       0.79      0.826      0.843      0.539






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/50     0.646G      1.498      1.069      1.232          5        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23       0.89      0.706      0.848      0.512






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/50     0.646G       1.61      1.157      1.241          6        640: 100%|██████████| 50/50 [00:09<00:00,  5.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.981      0.739      0.867      0.544






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      31/50     0.646G      1.541      1.122      1.226          5        640: 100%|██████████| 50/50 [00:09<00:00,  5.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.948      0.799      0.902      0.586






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      32/50     0.646G      1.491      1.107       1.21         12        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23          1      0.647      0.806       0.48






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      33/50     0.646G      1.472      1.153      1.238          9        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.994      0.652       0.81      0.514






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      34/50     0.646G      1.426      1.043      1.168          6        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23          1      0.814      0.888      0.571






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      35/50     0.646G      1.401      1.011      1.147          0        640: 100%|██████████| 50/50 [00:08<00:00,  5.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.964      0.826      0.911      0.564






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      36/50     0.646G      1.418      1.031      1.189          7        640: 100%|██████████| 50/50 [00:08<00:00,  5.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.962      0.739      0.865      0.557






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      37/50     0.646G      1.334          1      1.171          5        640: 100%|██████████| 50/50 [00:10<00:00,  4.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.964      0.739      0.877      0.547






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      38/50     0.646G      1.379     0.9782       1.15          6        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.867      0.826      0.908      0.571






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      39/50     0.646G      1.364      1.034      1.168          4        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.912      0.907       0.94      0.598






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      40/50     0.646G      1.356     0.9448       1.13         10        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.928       0.87       0.94      0.573





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      41/50     0.646G      1.194     0.9214      1.114          3        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23       0.94      0.739      0.915      0.574






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      42/50     0.646G      1.243     0.9124      1.131          3        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.872      0.913      0.925      0.609






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      43/50     0.646G      1.231      0.893       1.14          3        640: 100%|██████████| 50/50 [00:10<00:00,  4.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.871       0.87      0.928      0.615






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      44/50     0.646G      1.186     0.8603      1.099          3        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23          1      0.772      0.925      0.613






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      45/50     0.646G      1.201     0.8919      1.098          3        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.951       0.85       0.93      0.615






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      46/50     0.646G      1.154     0.8581      1.105          3        640: 100%|██████████| 50/50 [00:08<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23          1      0.849      0.945      0.623






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      47/50     0.646G      1.131     0.8192      1.094          3        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.967       0.87      0.953      0.617






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      48/50     0.646G      1.184     0.8289      1.123          3        640: 100%|██████████| 50/50 [00:09<00:00,  5.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.955      0.929      0.955      0.589






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      49/50     0.646G      1.151      0.791      1.085          3        640: 100%|██████████| 50/50 [00:08<00:00,  5.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.956      0.957       0.96       0.65






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      50/50     0.646G      1.099     0.7881      1.089          3        640: 100%|██████████| 50/50 [00:07<00:00,  6.
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0

                   all         23         23      0.962      0.913      0.958      0.668






50 epochs completed in 0.127 hours.
Optimizer stripped from runs\detect\train3\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train3\weights\best.pt, 6.2MB

Validating runs\detect\train3\weights\best.pt...
Ultralytics 8.3.162  Python-3.11.0 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
YOLOv8n summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<0


                   all         23         23      0.962      0.913      0.958      0.668
Speed: 0.3ms preprocess, 3.0ms inference, 0.0ms loss, 1.8ms postprocess per image
Results saved to [1mruns\detect\train3[0m


In [15]:
model = YOLO("runs/detect/train3/weights/best.pt")

# Testing on a sample image
img_path = r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\0b86cecf-67d1-4fc0-87c9-b36b0ee228bb.jpg"
results = model(img_path, show=True)
results[0].show()


image 1/1 D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\0b86cecf-67d1-4fc0-87c9-b36b0ee228bb.jpg: 384x640 1 number_plate, 35.2ms
Speed: 3.1ms preprocess, 35.2ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)


In [17]:
# Loading models
plate_detector = YOLO("runs/detect/train3/weights/best.pt")
processor = TrOCRProcessor.from_pretrained('./trocr_numberplate_finetuned', use_fast=True)
trocr_model = VisionEncoderDecoderModel.from_pretrained('./trocr_numberplate_finetuned').to('cuda' if torch.cuda.is_available() else 'cpu')

# Detecting plate
img_path = r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\0b86cecf-67d1-4fc0-87c9-b36b0ee228bb.jpg"
results = plate_detector(img_path)

# Reading plate text
for box in results[0].boxes:
    x1, y1, x2, y2 = map(int, box.xyxy[0])
    plate_img = Image.open(img_path).crop((x1, y1, x2, y2))
    
    pixel_values = processor(images=plate_img, return_tensors="pt").pixel_values.to(trocr_model.device)
    generated_ids = trocr_model.generate(pixel_values)
    plate_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    print(f"Detected Plate Text: {plate_text}")


image 1/1 D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\number plate detector data\0b86cecf-67d1-4fc0-87c9-b36b0ee228bb.jpg: 384x640 1 number_plate, 13.9ms
Speed: 2.6ms preprocess, 13.9ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)
Detected Plate Text: YYGX


Final validation script

In [24]:
# Loading models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

vehicle_model = YOLO("yolov8n.pt")
plate_model = YOLO("number_plate_detector_yolov8n.pt")

processor = TrOCRProcessor.from_pretrained('./trocr_numberplate_finetuned')
trocr_model = VisionEncoderDecoderModel.from_pretrained('./trocr_numberplate_finetuned').to(device)

tracker = Sort()

# Speed Calculation
def estimate_speed(p1, p2, fps, ppm=8.8):
    dist_pixels = np.linalg.norm(np.array(p2) - np.array(p1))
    dist_meters = dist_pixels / ppm
    speed = (dist_meters * fps) * 3.6
    return speed

cap = cv2.VideoCapture(r"D:\projects\Automated Vehicle Speed & Number Plate Detection System\data\videos\2103099-uhd_3840_2160_30fps.mp4")
fps = cap.get(cv2.CAP_PROP_FPS)
vehicle_positions = {}

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = vehicle_model(frame)
    dets = []

    for box in results[0].boxes:
        cls_id = int(box.cls[0].cpu().numpy())
        if cls_id in [2, 3, 5, 7]:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            conf = box.conf[0].cpu().numpy()
            dets.append([x1, y1, x2, y2, conf])

    dets = np.array(dets)
    tracks = tracker.update(dets)

    for track in tracks:
        x1, y1, x2, y2, track_id = track
        cx = int((x1 + x2) / 2)
        cy = int((y1 + y2) / 2)

        if track_id in vehicle_positions:
            prev_pos = vehicle_positions[track_id]
            speed = estimate_speed(prev_pos, (cx, cy), fps)
        else:
            speed = 0

        vehicle_positions[track_id] = (cx, cy)

        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,255,0), 2)
        cv2.putText(frame, f'ID:{int(track_id)} Speed:{int(speed)}km/h', (int(x1), int(y1)-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)

        vehicle_crop = frame[int(y1):int(y2), int(x1):int(x2)]
        plate_results = plate_model(vehicle_crop)

        for pbox in plate_results[0].boxes:
            px1, py1, px2, py2 = pbox.xyxy[0].cpu().numpy()
            plate_crop = vehicle_crop[int(py1):int(py2), int(px1):int(px2)]

            plate_rgb = cv2.cvtColor(plate_crop, cv2.COLOR_BGR2RGB)
            inputs = processor(images=plate_rgb, return_tensors="pt").to(device)
            outputs = trocr_model.generate(**inputs)
            plate_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]

            cv2.rectangle(vehicle_crop, (int(px1), int(py1)), (int(px2), int(py2)), (255,0,0), 2)
            cv2.putText(vehicle_crop, plate_text, (int(px1), int(py1)-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)

    frame_resized = cv2.resize(frame, (1280, int(frame.shape[0] * 1280 / frame.shape[1])))
    cv2.imshow("Vehicle Speed & Plate Detection", frame_resized)

    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 21 cars, 1 bus, 2 trucks, 13.5ms
Speed: 2.2ms preprocess, 13.5ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 640x640 (no detections), 15.3ms
Speed: 2.6ms preprocess, 15.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 576x640 (no detections), 11.5ms
Speed: 2.3ms preprocess, 11.5ms inference, 1.1ms postprocess per image at shape (1, 3, 576, 640)

0: 640x640 (no detections), 13.5ms
Speed: 3.8ms preprocess, 13.5ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 288x640 (no detections), 12.2ms
Speed: 1.9ms preprocess, 12.2ms inference, 0.9ms postprocess per image at shape (1, 3, 288, 640)

0: 640x608 (no detections), 13.1ms
Speed: 3.1ms preprocess, 13.1ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 608)

0: 352x640 (no detections), 15.7ms
Speed: 1.8ms preprocess, 15.7ms inference, 1.0ms postprocess per image at shape (1, 3, 352, 640)

0: 640x608 (no detections), 14.6ms
Speed: 2.9ms preprocess,