# Automated License Plate Detection using TesseractOCR

This implementation performs license plate recognition using an
OCR-first, heuristic-based approach built on Tesseract OCR.

The text is extracted from the entire image and candidate
license plate strings are inferred through preprocessing, confidence
filtering, and pattern validation.

**Execution Environment:** Google Colab (Teserract is computationally lightweight and does not require GPU resources)

In [None]:

class LicensePlateRecognition:
    def __init__(self):
        self.database = []
        self.invalid_entries = []
        self.redundant_entries = []
        self.total_images = 0

    def calculate_image_quality_score(self, image):
        """Calculate image quality score based on multiple factors"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # 1. Sharpness/Blur Detection (Laplacian variance)
        laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
        sharpness_score = min(100, (laplacian_var / 500) * 100)  # Normalize to 0-100

        # 2. Brightness Score
        mean_brightness = np.mean(gray)
        brightness_score = 100 - abs(mean_brightness - 127) / 1.27  # Optimal at 127

        # 3. Contrast Score
        contrast = gray.std()
        contrast_score = min(100, (contrast / 80) * 100)  # Normalize to 0-100

        # 4. Noise Level (inverse - lower noise = higher score)
        noise = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
        noise_diff = np.abs(gray.astype(float) - noise.astype(float)).mean()
        noise_score = max(0, 100 - (noise_diff * 2))

        # Combined quality score (weighted average)
        quality_score = (
            sharpness_score * 0.35 +
            brightness_score * 0.25 +
            contrast_score * 0.25 +
            noise_score * 0.15
        )

        return round(quality_score, 2)

    def preprocess_image(self, image):
        """Preprocess image for better OCR accuracy"""
        # Convert to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Apply bilateral filter to reduce noise while keeping edges sharp
        filtered = cv2.bilateralFilter(gray, 11, 17, 17)

        # Apply adaptive thresholding
        thresh = cv2.adaptiveThreshold(
            filtered, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY, 11, 2
        )

        # Morphological operations to clean up
        kernel = np.ones((3,3), np.uint8)
        morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

        return morph

    def clean_plate_text(self, text):
        """Clean and validate detected license plate text"""
        if not text:
            return None

        # Remove newlines, spaces, and special characters
        text = re.sub(r'[\n\r\t\s]', '', text)
        text = re.sub(r'[^A-Z0-9]', '', text.upper())

        # Basic validation: license plates typically have 4-12 characters
        if len(text) < 4 or len(text) > 12:
            return None

        return text

    def extract_plate_number(self, image_path):
        """Extract license plate number from image using Tesseract OCR with confidence"""
        try:
            # Read image
            image = cv2.imread(image_path)
            if image is None:
                return None, 0, 0, "Failed to read image"

            # Calculate image quality score
            quality_score = self.calculate_image_quality_score(image)

            # Preprocess image
            processed = self.preprocess_image(image)

            # Apply OCR with custom config for alphanumeric recognition
            custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'

            # Get detailed OCR data including confidence scores
            ocr_data = pytesseract.image_to_data(processed, config=custom_config, output_type=pytesseract.Output.DICT)

            # Extract text and calculate average confidence
            text_parts = []
            confidences = []

            for i, conf in enumerate(ocr_data['conf']):
                if int(conf) > 0:  # Valid confidence score
                    text = ocr_data['text'][i]
                    if text.strip():
                        text_parts.append(text)
                        confidences.append(int(conf))

            # Calculate average confidence
            avg_confidence = round(np.mean(confidences), 2) if confidences else 0

            # Combine text parts
            full_text = ''.join(text_parts)

            # Clean the detected text
            plate_number = self.clean_plate_text(full_text)

            return plate_number, avg_confidence, quality_score, None

        except Exception as e:
            return None, 0, 0, str(e)

    def is_valid_plate(self, plate_number):
        """Validate license plate format (customize based on your region)"""
        if not plate_number:
            return False

        # Example validation patterns (customize for your region)
        patterns = [
            r'^[A-Z]{2}\d{1,2}[A-Z]{1,3}\d{1,4}$',  # Indian format
            r'^[A-Z]{3}\d{3,4}$',  # US format
            r'^[A-Z]{2,3}\d{4}[A-Z]{0,2}$',  # Generic format
            r'^[A-Z0-9]{6,10}$'  # General alphanumeric
        ]

        return any(re.match(pattern, plate_number) for pattern in patterns)

    