1. Training Set 1: This set comprises 900 images, with each image featuring a single car along with its corresponding license plate. The provided annotations include the coordinates of the bounding box (ymin, xmin, ymax, xmax) that outlines the license plate in each image.

In [1]:
import cv2
import os
import pandas as pd

# Paths (update to your actual paths)
csv_file = r"C:\Users\naven\Desktop\Navendar\Licplatesdetection_train.csv"  # Path to CSV file
image_dir = r"C:\Users\naven\Desktop\Navendar\Data\license_plates_detection_train"           # Folder with images
output_dir = r"C:\Users\naven\Desktop\Navendar\Annoted_images"               # Folder for annotated images

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Read CSV file
try:
    df = pd.read_csv(csv_file)
except Exception as e:
    print(f"Error reading CSV file: {e}")
    exit(1)

# Verify required columns
required_columns = ["img_id", "ymin", "xmin", "ymax", "xmax"]
if not all(col in df.columns for col in required_columns):
    print(f"CSV file must contain columns: {required_columns}")
    exit(1)

def draw_bounding_box(image_path, bbox, output_path):
    """Draw bounding box on image and save result."""
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not load {image_path}")
        return
    
    # Extract and draw bounding box
    ymin, xmin, ymax, xmax = map(int, bbox)
    cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
    
    # Save annotated image
    cv2.imwrite(output_path, img)
    print(f"Saved to {output_path}")

# Process each row in the Excel file
for _, row in df.iterrows():
    image_name = row["img_id"]
    bbox = [row["ymin"], row["xmin"], row["ymax"], row["xmax"]]
    
    # Construct full paths
    image_path = os.path.join(image_dir, image_name)
    output_path = os.path.join(output_dir, f"annotated_{image_name}")
    
    # Draw bounding box
    draw_bounding_box(image_path, bbox, output_path)

Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_1.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_10.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_100.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_101.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_102.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_103.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_104.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_105.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_106.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_107.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_108.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_109.jpg
Saved to C:\Users\naven\Desktop\Navendar\Annoted_images\annotated_11.jpg
Saved to C:\Users\naven\Desktop\Navendar\A

2. Training Set 2: Consisting of another 900 images, this set focuses solely on license plates. Each image in this set contains a license plate, and the provided annotations contain the characters present on each license plate.

In [3]:
import cv2
import os
import pandas as pd
from ultralytics import YOLO
import pytesseract
from PIL import Image
import numpy as np

# Paths (update to your actual paths)
image_dir = r"C:\Users\naven\Desktop\Navendar\Data\license_plates_recognition_train"            # Folder with vehicle images
output_excel = r"C:\Users\naven\Desktop\Navendar\results.xlsx"   # Output Excel for results
output_dir = r"C:\Users\naven\Desktop\Navendar\Annoted"      # Folder for annotated images

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Initialize YOLOv8 model
model = YOLO("yolov8n.pt")  # Replace with a license plate-specific model if available

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Initialize results list
results = []

def detect_license_plate(image_path):
    """Detect license plate using YOLOv8 and return cropped plate and bounding box."""
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not load {image_path}")
        return None, None
    
    # Run YOLOv8 detection
    detections = model(image_path)[0]
    
    # Extract bounding box (assuming one plate per image for simplicity)
    for box in detections.boxes:
        if box.conf > 0.5:  # Confidence threshold
            xmin, ymin, xmax, ymax = map(int, box.xyxy[0])
            return img[ymin:ymax, xmin:xmax], [ymin, xmin, ymax, xmax]
    
    print(f"No license plate detected in {image_path}")
    return None, None

def preprocess_plate(plate_img):
    """Preprocess license plate image for better OCR."""
    if plate_img is None:
        return None
    
    # Convert to grayscale
    gray = cv2.cvtColor(plate_img, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding to enhance contrast
    thresh = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
    )
    
    # Optional: Denoise
    denoised = cv2.fastNlMeansDenoising(thresh)
    
    return denoised

def extract_text(plate_img):
    """Extract text from the license plate using Pytesseract."""
    if plate_img is None:
        return "No plate detected"
    
    # Preprocess image
    processed_img = preprocess_plate(plate_img)
    if processed_img is None:
        return "No plate detected"
    
    # Convert to PIL Image for Pytesseract
    pil_img = Image.fromarray(processed_img)
    
    # Configure Pytesseract: Use PSM 8 (single word) for license plates
    custom_config = r'--oem 3 --psm 8'
    text = pytesseract.image_to_string(pil_img, config=custom_config).strip()
    
    return text if text else "No text detected"

def draw_bounding_box(image_path, bbox, text, output_path):
    """Draw bounding box and text on image and save result."""
    img = cv2.imread(image_path)
    if img is None or bbox is None:
        print(f"Error: Could not process {image_path}")
        return
    
    ymin, xmin, ymax, xmax = map(int, bbox)
    # Draw rectangle (red) and text (green)
    cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
    cv2.putText(img, text, (xmin, ymin-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    # Save annotated image
    cv2.imwrite(output_path, img)
    print(f"Saved annotated image to {output_path}")

# Process all images in the directory
for image_name in os.listdir(image_dir):
    if image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(image_dir, image_name)
        
        # Detect license plate
        plate_img, bbox = detect_license_plate(image_path)
        
        # Extract text
        license_text = extract_text(plate_img)
        
        # Draw bounding box and text
        output_path = os.path.join(output_dir, f"annotated_{image_name}")
        if bbox:
            draw_bounding_box(image_path, bbox, license_text, output_path)
        
        # Store result
        results.append({"image": image_name, "license_text": license_text})

# Save results to Excel
results_df = pd.DataFrame(results)
results_df.to_excel(output_excel, index=False)
print(f"Saved results to {output_excel}")


image 1/1 C:\Users\naven\Desktop\Navendar\Data\license_plates_recognition_train\0.jpg: 160x640 (no detections), 119.4ms
Speed: 2.8ms preprocess, 119.4ms inference, 0.8ms postprocess per image at shape (1, 3, 160, 640)
No license plate detected in C:\Users\naven\Desktop\Navendar\Data\license_plates_recognition_train\0.jpg

image 1/1 C:\Users\naven\Desktop\Navendar\Data\license_plates_recognition_train\1.jpg: 128x640 (no detections), 114.8ms
Speed: 2.1ms preprocess, 114.8ms inference, 1.0ms postprocess per image at shape (1, 3, 128, 640)
No license plate detected in C:\Users\naven\Desktop\Navendar\Data\license_plates_recognition_train\1.jpg

image 1/1 C:\Users\naven\Desktop\Navendar\Data\license_plates_recognition_train\10.jpg: 576x640 (no detections), 269.8ms
Speed: 6.2ms preprocess, 269.8ms inference, 1.1ms postprocess per image at shape (1, 3, 576, 640)
No license plate detected in C:\Users\naven\Desktop\Navendar\Data\license_plates_recognition_train\10.jpg

image 1/1 C:\Users\naven\

3. Test Set: This set consists of 201 images and is structured similarly to the first training set. In this set, your task is twofold: you need to detect the license plates within the images and recognize the characters on those plates. 

In [5]:
import cv2
import numpy as np
import pytesseract
import pandas as pd
import os
import imutils  # Correct import for grab_contours

# Path to Tesseract executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # Windows example

# Directories
image_dir = r'C:\Users\naven\Desktop\Navendar\Data\test'
output_excel = r'C:\Users\naven\Desktop\Navendar\Recognized_License_Plates.xlsx'
debug_dir = 'debug_images'  # Directory to save intermediate images for debugging

# Create debug directory if it doesn't exist
if not os.path.exists(debug_dir):
    os.makedirs(debug_dir)

# Create a list to store results
results = []

def preprocess_image(image):
    """Preprocess the image for better OCR accuracy."""
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to reduce noise
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Adaptive thresholding to handle varying lighting
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY_INV, 11, 2)
    
    # Morphological operations to enhance characters
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    # Edge detection using Canny
    edged = cv2.Canny(gray, 50, 200)
    
    # Save intermediate images for debugging
    cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray)
    cv2.imwrite(os.path.join(debug_dir, 'thresh.png'), thresh)
    cv2.imwrite(os.path.join(debug_dir, 'edged.png'), edged)
    
    return gray, thresh, edged

def detect_license_plate(image, edged):
    """Detect the license plate in the image."""
    # Find contours in the edged image
    cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)  # Corrected to use imutils.grab_contours
    
    # Sort contours by area and keep top 15
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:15]
    
    screenCnt = None
    for c in cnts:
        # Approximate the contour
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
        
        # Relaxed condition: accept contours with 4-6 sides to handle slight distortions
        if 4 <= len(approx) <= 6:
            # Check aspect ratio (license plates are typically rectangular)
            x, y, w, h = cv2.boundingRect(approx)
            aspect_ratio = w / float(h)
            if 1.5 <= aspect_ratio <= 5.0:  # Typical license plate aspect ratio
                screenCnt = approx
                break
    
    # Save debug image with detected contour
    if screenCnt is not None:
        debug_image = image.copy()
        cv2.drawContours(debug_image, [screenCnt], -1, (0, 255, 0), 3)
        cv2.imwrite(os.path.join(debug_dir, 'contour.png'), debug_image)
    
    return screenCnt

def extract_text(image, screenCnt, gray, thresh):
    """Extract text from the detected license plate."""
    if screenCnt is None:
        return None, "No plate detected"
    
    # Create a mask for the license plate
    mask = np.zeros(gray.shape, np.uint8)
    cv2.drawContours(mask, [screenCnt], 0, 255, -1)
    new_image = cv2.bitwise_and(image, image, mask=mask)
    
    # Crop the license plate region
    (x, y) = np.where(mask == 255)
    if len(x) == 0 or len(y) == 0:
        return None, "Empty mask"
    (topx, topy) = (np.min(x), np.min(y))
    (bottomx, bottomy) = (np.max(x), np.max(y))
    cropped = thresh[topx:bottomx+1, topy:bottomy+1]
    
    # Save cropped image for debugging
    cv2.imwrite(os.path.join(debug_dir, 'cropped.png'), cropped)
    
    # Configure Tesseract with improved settings
    custom_config = r'--oem 3 --psm 8 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    text = pytesseract.image_to_string(cropped, config=custom_config)
    
    # Clean the text
    text = ''.join(char for char in text if char.isalnum()).strip()
    
    if not text:
        return None, "No text recognized"
    
    return text, None

def process_images(image_dir):
    """Process all images in the directory and extract license plate text."""
    for image_name in os.listdir(image_dir):
        if image_name.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(image_dir, image_name)
            image = cv2.imread(image_path)
            
            if image is None:
                results.append({'Image': image_name, 'License Plate': 'Not Detected', 'Error': 'Failed to load image'})
                print(f"Failed to load image: {image_name}")
                continue
            
            # Preprocess the image
            gray, thresh, edged = preprocess_image(image)
            
            # Detect the license plate
            screenCnt = detect_license_plate(image, edged)
            
            # Extract text from the license plate
            plate_text, error = extract_text(image, screenCnt, gray, thresh)
            
            # Store the result
            results.append({
                'Image': image_name,
                'License Plate': plate_text if plate_text else 'Not Detected',
                'Error': error if error else 'None'
            })
            
            print(f"Processed {image_name}: {plate_text if plate_text else 'Not Detected'}")
    
    # Save results to Excel
    df = pd.DataFrame(results)
    df.to_excel(output_excel, index=False)
    print(f"Results saved to {output_excel}")

if __name__ == "__main__":
    # Ensure the image directory exists
    if not os.path.exists(image_dir):
        print(f"Image directory {image_dir} does not exist.")
    else:
        process_images(image_dir)

Processed 1000.jpg: Not Detected
Processed 1001.jpg: UIFS79176
Processed 1002.jpg: SN
Processed 1003.jpg: 2F514
Processed 1004.jpg: PELXET
Processed 1005.jpg: WBG
Processed 1006.jpg: SEAN
Processed 1007.jpg: 28E579
Processed 1008.jpg: Not Detected
Processed 1009.jpg: SES
Processed 1010.jpg: Not Detected
Processed 1011.jpg: Not Detected
Processed 1012.jpg: RE
Processed 1013.jpg: 2
Processed 1014.jpg: SS
Processed 1015.jpg: Not Detected
Processed 1016.jpg: Not Detected
Processed 1017.jpg: W825SH9
Processed 1018.jpg: RE
Processed 1019.jpg: Not Detected
Processed 1020.jpg: 93SF1903
Processed 1021.jpg: Not Detected
Processed 1022.jpg: 151752980951
Processed 1023.jpg: SU5
Processed 1024.jpg: Not Detected
Processed 1025.jpg: I
Processed 1026.jpg: P
Processed 1027.jpg: Not Detected
Processed 1028.jpg: TO327390
Processed 1029.jpg: BHEEY
Processed 1030.jpg: RY
Processed 1031.jpg: SS
Processed 1032.jpg: Not Detected
Processed 1033.jpg: M167525590F
Processed 1034.jpg: Not Detected
Processed 1035.j