# 🎨 SYNTHETIC ALGERIAN LICENSE PLATE GENERATOR

Generate synthetic Algerian license plates for OCR training.
**Format:** 11 digits grouped as `XXXXXX XXX XX`

In [None]:
# Install dependencies
!pip install Pillow numpy matplotlib tqdm

In [None]:
import os
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance
from tqdm import tqdm
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
# Configuration
BASE_DIR = 'synthetic_plates'
TRAIN_DIR = os.path.join(BASE_DIR, 'train')
VAL_DIR = os.path.join(BASE_DIR, 'validation')
TEST_DIR = os.path.join(BASE_DIR, 'test')

NUM_TRAIN = 40000
NUM_VAL = 5000
NUM_TEST = 5000

PLATE_WIDTH = 520
PLATE_HEIGHT = 110
FONT_SIZE = 70

WHITE_BG = (255, 255, 255)
YELLOW_BG = (255, 235, 100)
TEXT_COLOR = (0, 0, 0)
BORDER_COLOR = (0, 0, 0)
WHITE_PROBABILITY = 0.7

os.makedirs(TRAIN_DIR, exist_ok=True)
os.makedirs(VAL_DIR, exist_ok=True)
os.makedirs(TEST_DIR, exist_ok=True)
print(f'Total to generate: {NUM_TRAIN + NUM_VAL + NUM_TEST:,}')

In [None]:
def generate_plate_number():
    return ''.join([str(random.randint(0, 9)) for _ in range(11)])

def format_plate_text(number):
    return f'{number[:6]}  {number[6:9]}  {number[9:11]}'

def get_background_color():
    return WHITE_BG if random.random() < WHITE_PROBABILITY else YELLOW_BG

def add_noise(image, intensity=0.02):
    img_array = np.array(image, dtype=np.float32)
    noise = np.random.normal(0, intensity * 255, img_array.shape)
    return Image.fromarray(np.clip(img_array + noise, 0, 255).astype(np.uint8))

def add_slight_blur(image):
    if random.random() < 0.5:
        image = image.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.3, 0.8)))
    return image

def adjust_brightness(image):
    return ImageEnhance.Brightness(image).enhance(random.uniform(0.85, 1.15))

def adjust_contrast(image):
    return ImageEnhance.Contrast(image).enhance(random.uniform(0.85, 1.15))

print('Helper functions loaded!')

In [None]:
def load_font(size=FONT_SIZE):
    font_paths = [
        '/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf',
        '/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf',
        '/System/Library/Fonts/Courier.dfont',
        'C:\\Windows\\Fonts\\courbd.ttf',
    ]
    for font_path in font_paths:
        try:
            font = ImageFont.truetype(font_path, size)
            print(f'Font loaded: {font_path}')
            return font
        except:
            continue
    print('Using default font')
    return ImageFont.load_default()

font = load_font(FONT_SIZE)

In [None]:
def create_license_plate(plate_number, add_variations=True):
    bg_color = get_background_color()
    image = Image.new('RGB', (PLATE_WIDTH, PLATE_HEIGHT), bg_color)
    draw = ImageDraw.Draw(image)
    
    # Border
    margin = 5
    draw.rectangle([(margin, margin), (PLATE_WIDTH - margin, PLATE_HEIGHT - margin)],
                   outline=BORDER_COLOR, width=3)
    
    # Text
    formatted_text = format_plate_text(plate_number)
    bbox = draw.textbbox((0, 0), formatted_text, font=font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]
    
    x_offset = random.randint(-3, 3) if add_variations else 0
    y_offset = random.randint(-2, 2) if add_variations else 0
    x = (PLATE_WIDTH - text_width) // 2 + x_offset
    y = (PLATE_HEIGHT - text_height) // 2 + y_offset
    
    draw.text((x, y), formatted_text, fill=TEXT_COLOR, font=font)
    
    # Variations
    if add_variations:
        if random.random() < 0.6:
            image = add_noise(image, random.uniform(0.01, 0.03))
        image = add_slight_blur(image)
        if random.random() < 0.5:
            image = adjust_brightness(image)
        if random.random() < 0.5:
            image = adjust_contrast(image)
    
    return image

print('Plate generation function ready!')

In [None]:
# Test generation
fig, axes = plt.subplots(2, 3, figsize=(15, 6))
fig.suptitle('Sample Synthetic License Plates', fontsize=16, fontweight='bold')

for i, ax in enumerate(axes.flat):
    plate_num = generate_plate_number()
    plate_img = create_license_plate(plate_num, add_variations=True)
    ax.imshow(plate_img)
    ax.set_title(f'Label: {plate_num}', fontsize=10)
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
def generate_dataset(output_dir, num_images, add_variations=True):
    print(f'Generating {num_images:,} plates → {output_dir}')
    generated_numbers = set()
    
    with tqdm(total=num_images, desc='Generating', unit='plates') as pbar:
        count = 0
        while count < num_images:
            plate_number = generate_plate_number()
            if plate_number in generated_numbers:
                continue
            generated_numbers.add(plate_number)
            
            plate_image = create_license_plate(plate_number, add_variations)
            filename = f'{plate_number}.jpg'
            filepath = os.path.join(output_dir, filename)
            plate_image.save(filepath, quality=95)
            
            count += 1
            pbar.update(1)
    
    print(f'✅ {num_images:,} plates saved')

print('Batch generation function ready!')

In [None]:
# Generate training set
generate_dataset(TRAIN_DIR, NUM_TRAIN, add_variations=True)

In [None]:
# Generate validation set
generate_dataset(VAL_DIR, NUM_VAL, add_variations=True)

In [None]:
# Generate test set
generate_dataset(TEST_DIR, NUM_TEST, add_variations=True)

In [None]:
# Dataset statistics
def get_stats(directory):
    files = list(Path(directory).glob('*.jpg'))
    if not files:
        return None
    total_mb = sum(f.stat().st_size for f in files) / (1024 * 1024)
    return len(files), total_mb

print('\n' + '='*60)
print('DATASET SUMMARY')
print('='*60)
for name, directory in [('Train', TRAIN_DIR), ('Val', VAL_DIR), ('Test', TEST_DIR)]:
    result = get_stats(directory)
    if result:
        count, size_mb = result
        print(f'{name}: {count:,} images, {size_mb:.1f} MB')
print('='*60)

In [None]:
# Show random samples
def show_samples(directory, num=9):
    files = list(Path(directory).glob('*.jpg'))
    if not files:
        return
    samples = random.sample(files, min(num, len(files)))
    
    fig, axes = plt.subplots(3, 3, figsize=(15, 9))
    fig.suptitle(f'Samples from {Path(directory).name}/', fontsize=16)
    
    for ax, img_path in zip(axes.flat, samples):
        img = Image.open(img_path)
        ax.imshow(img)
        ax.set_title(f'Label: {img_path.stem}', fontsize=9)
        ax.axis('off')
    
    for i in range(len(samples), 9):
        axes.flat[i].axis('off')
    
    plt.tight_layout()
    plt.show()

show_samples(TRAIN_DIR)
show_samples(VAL_DIR)
show_samples(TEST_DIR)