# Car License Plate Detection and Recognition System

## Project Overview

This project implements an automated car license plate detection and recognition system using computer vision and deep learning techniques. The system is designed to detect license plates from vehicle images or video streams, extract the plate region, and recognize the characters for further processing.

### Team Members
- **Ahmed Al-duais** - 
- **Abulkareem Thiab** -   
- **Ayman Mrwan** - 

### Project Goals
1. Develop an accurate license plate detection system using YOLO
2. Implement OCR-based text recognition for Arabic and English characters
3. Create user-friendly GUI applications for real-time processing
4. Compare different model architectures and select the optimal one

### Technology Stack
- **Deep Learning**: YOLOv8 (Ultralytics)
- **OCR**: EasyOCR
- **GUI Framework**: PySide6 (Qt)
- **Computer Vision**: OpenCV
- **Data Processing**: NumPy, Pandas
- **Visualization**: Matplotlib, Seaborn


In [None]:
# Import necessary libraries
import os
import sys
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Configure matplotlib for better plots
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

print("Libraries imported successfully!")
print(f"OpenCV version: {cv2.__version__}")
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")


## 1. Dataset Analysis and Exploration

### Dataset Structure

Our project utilizes two main datasets for training and validation:

1. **Primary Dataset**: `plat number car yemen.v1i.yolov8`
   - Single class: "private" 
   - 80 training images, 16 validation images, 8 test images
   - Focus on private vehicle license plates

2. **Extended Dataset**: `yemen-plate`
   - Three classes: "city", "number", "text"
   - 52 training images, 52 validation images, 52 test images
   - More detailed annotation for different plate components

Let's explore the dataset structure and characteristics.


In [None]:
# Dataset exploration
def explore_dataset(dataset_path, dataset_name):
    """Explore dataset structure and provide statistics"""
    print(f"\n=== {dataset_name} Dataset Analysis ===")
    
    # Check if dataset exists
    if not os.path.exists(dataset_path):
        print(f"Dataset not found at: {dataset_path}")
        return
    
    # Count images in each split
    splits = ['train', 'valid', 'test']
    total_images = 0
    
    for split in splits:
        split_path = os.path.join(dataset_path, split, 'images')
        if os.path.exists(split_path):
            image_count = len([f for f in os.listdir(split_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
            print(f"{split.capitalize()} images: {image_count}")
            total_images += image_count
        else:
            print(f"{split.capitalize()} images: 0 (directory not found)")
    
    print(f"Total images: {total_images}")
    
    # Check for labels
    for split in splits:
        label_path = os.path.join(dataset_path, split, 'labels')
        if os.path.exists(label_path):
            label_count = len([f for f in os.listdir(label_path) if f.endswith('.txt')])
            print(f"{split.capitalize()} labels: {label_count}")
    
    # Read data.yaml if exists
    yaml_path = os.path.join(dataset_path, 'data.yaml')
    if os.path.exists(yaml_path):
        print(f"\nDataset configuration found: {yaml_path}")
        with open(yaml_path, 'r') as f:
            print(f.read())

# Explore both datasets
datasets = {
    "Primary Dataset": "src/data/plat number car yemen.v1i.yolov8",
    "Extended Dataset": "src/data/yemen-plate"
}

for name, path in datasets.items():
    explore_dataset(path, name)


In [None]:
# Visualize sample images from the dataset
def visualize_sample_images(dataset_path, dataset_name, num_samples=4):
    """Display sample images from the dataset"""
    print(f"\n=== Sample Images from {dataset_name} ===")
    
    # Get sample images from training set
    train_images_path = os.path.join(dataset_path, 'train', 'images')
    if not os.path.exists(train_images_path):
        print(f"Training images not found at: {train_images_path}")
        return
    
    image_files = [f for f in os.listdir(train_images_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    
    if len(image_files) == 0:
        print("No images found in training directory")
        return
    
    # Select random samples
    import random
    sample_files = random.sample(image_files, min(num_samples, len(image_files)))
    
    # Create subplot
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    axes = axes.ravel()
    
    for i, filename in enumerate(sample_files):
        if i >= 4:
            break
            
        img_path = os.path.join(train_images_path, filename)
        img = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        axes[i].imshow(img_rgb)
        axes[i].set_title(f"Sample {i+1}: {filename[:30]}...")
        axes[i].axis('off')
    
    # Hide unused subplots
    for i in range(len(sample_files), 4):
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Visualize samples from both datasets
for name, path in datasets.items():
    visualize_sample_images(path, name)


## 2. Model Development and Training

### YOLO Model Architecture Selection

Our project involved testing different YOLO model architectures to find the optimal balance between accuracy and performance:

1. **YOLOv8n (Nano)**: Initial attempt with lightweight model
   - Fast inference but insufficient accuracy
   - Results stored in `runs/detect/detect_nano/`

2. **YOLOv8s (Small)**: Final selected model
   - Better accuracy while maintaining reasonable speed
   - 50 epochs training with comprehensive evaluation
   - Results stored in `runs/detect/yolov8n14/`

### Training Configuration

The final model was trained with the following parameters:
- **Model**: YOLOv8s (small)
- **Epochs**: 50
- **Image Size**: 640x640
- **Batch Size**: 9
- **Optimizer**: Auto (AdamW)
- **Learning Rate**: 0.01 (with cosine annealing)
- **Data Augmentation**: Enabled (mosaic, mixup, cutmix)


In [None]:
# Load and analyze training results
def load_training_results(results_path):
    """Load and analyze YOLO training results"""
    results_file = os.path.join(results_path, 'results.csv')
    
    if not os.path.exists(results_file):
        print(f"Results file not found: {results_file}")
        return None
    
    # Load results
    df = pd.read_csv(results_file)
    
    print(f"Training completed in {len(df)} epochs")
    print(f"Total training time: {df['time'].iloc[-1]:.2f} seconds ({df['time'].iloc[-1]/3600:.2f} hours)")
    
    return df

# Load training results
results_path = "runs/detect/yolov8n14"
training_results = load_training_results(results_path)

if training_results is not None:
    print("\n=== Training Results Summary ===")
    print(f"Final mAP50: {training_results['metrics/mAP50(B)'].iloc[-1]:.4f}")
    print(f"Final mAP50-95: {training_results['metrics/mAP50-95(B)'].iloc[-1]:.4f}")
    print(f"Final Precision: {training_results['metrics/precision(B)'].iloc[-1]:.4f}")
    print(f"Final Recall: {training_results['metrics/recall(B)'].iloc[-1]:.4f}")
    print(f"Final Box Loss: {training_results['train/box_loss'].iloc[-1]:.4f}")
    print(f"Final Class Loss: {training_results['train/cls_loss'].iloc[-1]:.4f}")


In [None]:
# Visualize training metrics
def plot_training_metrics(df):
    """Plot training metrics over epochs"""
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Plot 1: Loss curves
    axes[0, 0].plot(df['epoch'], df['train/box_loss'], label='Box Loss', color='blue')
    axes[0, 0].plot(df['epoch'], df['train/cls_loss'], label='Class Loss', color='red')
    axes[0, 0].plot(df['epoch'], df['train/dfl_loss'], label='DFL Loss', color='green')
    axes[0, 0].set_title('Training Losses')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Plot 2: Validation losses
    axes[0, 1].plot(df['epoch'], df['val/box_loss'], label='Val Box Loss', color='blue')
    axes[0, 1].plot(df['epoch'], df['val/cls_loss'], label='Val Class Loss', color='red')
    axes[0, 1].plot(df['epoch'], df['val/dfl_loss'], label='Val DFL Loss', color='green')
    axes[0, 1].set_title('Validation Losses')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # Plot 3: Precision and Recall
    axes[1, 0].plot(df['epoch'], df['metrics/precision(B)'], label='Precision', color='purple')
    axes[1, 0].plot(df['epoch'], df['metrics/recall(B)'], label='Recall', color='orange')
    axes[1, 0].set_title('Precision and Recall')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Score')
    axes[1, 0].legend()
    axes[1, 0].grid(True)
    
    # Plot 4: mAP scores
    axes[1, 1].plot(df['epoch'], df['metrics/mAP50(B)'], label='mAP@0.5', color='red')
    axes[1, 1].plot(df['epoch'], df['metrics/mAP50-95(B)'], label='mAP@0.5:0.95', color='blue')
    axes[1, 1].set_title('Mean Average Precision')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('mAP')
    axes[1, 1].legend()
    axes[1, 1].grid(True)
    
    plt.tight_layout()
    plt.show()

if training_results is not None:
    plot_training_metrics(training_results)


## 3. Model Implementation and Architecture

### Core Components

Our license plate recognition system consists of three main components:

1. **Plate Detector**: YOLO-based object detection for locating license plates
2. **Plate Recognizer**: OCR-based text extraction using EasyOCR
3. **GUI Application**: PySide6-based user interface for real-time processing

### Plate Detection Module

The `PlateDetector` class handles the detection of license plates in images:

```python
class PlateDetector:
    def __init__(self, model_path):
        self.model = YOLO(model_path)
    
    def detect(self, image, conf_threshold=0.5):
        """Detect license plates in an image"""
        results = self.model(image)
        detections = []
        
        for r in results:
            for box in r.boxes:
                conf = float(box.conf[0])
                if conf >= conf_threshold:
                    x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
                    detections.append({
                        'bbox': (x1, y1, x2, y2),
                        'confidence': conf,
                        'crop': image[y1:y2, x1:x2]
                    })
        
        return detections
```

### Text Recognition Module

The `PlateRecognizer` class handles OCR for Arabic and English text:

```python
class PlateRecognizer:
    def __init__(self, languages=['en', 'ar']):
        self.reader = easyocr.Reader(languages)
    
    def recognize(self, image):
        """Recognize text from a license plate image"""
        ocr_results = self.reader.readtext(image)
        text_detected = " ".join([res[1] for res in ocr_results])
        
        # Fix common misread words
        if "خصوصي" in text_detected or "نقل" in text_detected:
            text_detected = "خصوصي نقل اجرة"
            
        return text_detected
```


In [None]:
# Test the model on sample images
def test_model_on_samples():
    """Test the trained model on sample images"""
    try:
        from ultralytics import YOLO
        import easyocr
        
        # Load the trained model
        model_path = "src/models/yolov8s14/weights/best.pt"
        if not os.path.exists(model_path):
            print(f"Model not found at: {model_path}")
            return
        
        model = YOLO(model_path)
        print("Model loaded successfully!")
        
        # Load OCR
        reader = easyocr.Reader(['en', 'ar'])
        print("OCR reader initialized!")
        
        # Test on sample images
        test_images_path = "src/data/plat number car yemen.v1i.yolov8/test/images"
        if os.path.exists(test_images_path):
            image_files = [f for f in os.listdir(test_images_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            
            if len(image_files) > 0:
                # Test on first image
                test_image = os.path.join(test_images_path, image_files[0])
                print(f"\nTesting on: {image_files[0]}")
                
                # Load and process image
                img = cv2.imread(test_image)
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                
                # Detect plates
                results = model(img)
                
                # Process results
                detections = []
                for r in results:
                    for box in r.boxes:
                        conf = float(box.conf[0])
                        if conf >= 0.5:  # Confidence threshold
                            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
                            detections.append({
                                'bbox': (x1, y1, x2, y2),
                                'confidence': conf,
                                'crop': img[y1:y2, x1:x2]
                            })
                
                print(f"Found {len(detections)} license plates")
                
                # Display results
                fig, axes = plt.subplots(1, 2, figsize=(15, 6))
                
                # Original image
                axes[0].imshow(img_rgb)
                axes[0].set_title("Original Image")
                axes[0].axis('off')
                
                # Annotated image
                annotated = img_rgb.copy()
                for i, det in enumerate(detections):
                    x1, y1, x2, y2 = det['bbox']
                    cv2.rectangle(annotated, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(annotated, f"Plate {i+1}: {det['confidence']:.2f}", 
                               (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                
                axes[1].imshow(annotated)
                axes[1].set_title("Detection Results")
                axes[1].axis('off')
                
                plt.tight_layout()
                plt.show()
                
                # Test OCR on detected plates
                for i, det in enumerate(detections):
                    if det['crop'].size > 0:
                        try:
                            ocr_results = reader.readtext(det['crop'])
                            text_detected = " ".join([res[1] for res in ocr_results])
                            print(f"Plate {i+1} text: {text_detected}")
                        except Exception as e:
                            print(f"OCR failed for plate {i+1}: {e}")
            else:
                print("No test images found")
        else:
            print("Test images directory not found")
            
    except ImportError as e:
        print(f"Required libraries not available: {e}")
        print("Please install ultralytics and easyocr to test the model")

# Test the model
test_model_on_samples()
