# Automated License Plate Detection using ViT
Vision Transformer (ViT) architectures were explored in the form of
Transformer-based OCR models. ViT-based encoders enable global context
modeling across the entire license plate image, improving robustness
to character spacing variations and distortions.

**Execution Environment:** Google Colab (GPU-enabled)



In [None]:
def load_trocr_model(model_name="microsoft/trocr-base-printed"):
    """
    Load TrOCR processor and model for license plate OCR

    Args:
        model_name: Hugging Face model identifier

    Returns:
        Tuple of (processor, model)
    """
    print("[INFO] Loading TrOCR model and processor...")
    processor = TrOCRProcessor.from_pretrained(model_name)
    model = VisionEncoderDecoderModel.from_pretrained(model_name)

    # Use GPU if available
    return processor, model
def extract_license_plate_text(image_path, processor, model, device):
    """
    Extract license plate text with confidence score

    Returns:
        Tuple of (text, confidence_score) or (None, 0)
    """
    try:
        # Load and convert image
        image = Image.open(image_path).convert("RGB")

        # Preprocess image
        pixel_values = processor(images=image, return_tensors="pt").pixel_values
        pixel_values = pixel_values.to(device)

        # Configure beam search for confidence scores
        model.config.num_beams = 5
        model.config.max_length = 64
        model.config.early_stopping = True

        # Generate text with scores
        with torch.no_grad():
            return_dict = model.generate(
                pixel_values,
                output_scores=True,
                return_dict_in_generate=True,
                num_beams=5
            )

        # Get sequences and scores
        generated_ids = return_dict['sequences']
        sequence_scores = return_dict['sequences_scores'] if 'sequences_scores' in return_dict else None

        # Decode text
        generated_text_list = processor.batch_decode(
            generated_ids,
            skip_special_tokens=True
        )

        if not generated_text_list:
            return None, 0.0

        text = generated_text_list[0]

        # Calculate confidence
        if sequence_scores is not None:
            # Convert log probability to probability
            confidence = torch.exp(sequence_scores[0]).item()
        else:
            confidence = 0.5  # Default confidence if not available

        # Clean text
        if isinstance(text, list):
            text = ' '.join(str(item) for item in text)
        text = str(text).strip()

        return text if text else None, confidence

    except Exception as e:
        print(f"[ERROR] Failed to extract text from {image_path}: {str(e)}")
        return None, 0.0
def validate_license_plate_format(plate_text: str) -> Tuple[bool, str]:
    """
    Validate license plate format for multiple countries/formats
    """
    # Clean the text
    plate_text = plate_text.strip().upper()
    plate_text = re.sub(r'\s+', ' ', plate_text)

    if not plate_text:
        return False, "Empty plate text"

    if len(plate_text) < 4:
        return False, "Text too short"

    # Remove special characters for validation
    clean_text = re.sub(r'[^A-Z0-9]', '', plate_text)

    if len(clean_text) < 4:
        return False, "Not enough alphanumeric characters"

    # Define multiple format patterns
    patterns = {
        # Indian format: AA 00 AB 0000
        'Indian': r'^[A-Z]{2}[\s-]?\d{1,2}[\s-]?[A-Z]{1,2}[\s-]?\d{4}$',

        # UK format: AA00 AAA or AAA 000A
        'UK': r'^[A-Z]{2}\d{2}[\s-]?[A-Z]{3}$|^[A-Z]{3}[\s-]?\d{3}[A-Z]?$',

        # US format: AAA 0000 or 000 AAA
        'US': r'^[A-Z]{2,3}[\s-]?\d{3,4}$|^\d{3}[\s-]?[A-Z]{2,3}$',

        # European format: AA 000 AA or AA-000-AA
        'European': r'^[A-Z]{1,3}[\s-]?\d{3,4}[\s-]?[A-Z]{0,3}$',

        # Generic: At least 2 letters and 2 numbers
        'Generic': r'^(?=.*[A-Z]{2,})(?=.*\d{2,})[A-Z0-9\s-]{4,12}$'
    }

    # Check against each pattern
    for format_type, pattern in patterns.items():
        if re.match(pattern, plate_text):
            return True, f"Valid ({format_type} format)"

    # If no pattern matches, check if it has reasonable mix of letters/numbers
    letter_count = sum(c.isalpha() for c in clean_text)
    digit_count = sum(c.isdigit() for c in clean_text)

    if letter_count >= 2 and digit_count >= 2 and len(clean_text) <= 12:
        return True, "Valid (Generic format)"

    return False, f"Invalid format: {plate_text}"
