# Automated License Plate Detection using PaddleOCR
PaddleOCR is employed as a general-purpose OCR baseline.
The framework performs text detection using DBNet followed
by sequence-based text recognition using a CTC-trained model.

While PaddleOCR effectively recognizes alphanumeric text,
it does not explicitly localize license plates and may detect
non-plate text present in vehicle images.


**Execution Environment:** Google Colab (GPU-enabled)


In [None]:
class PaddleOCRRecognizer:
    def __init__(self):
        print("Initializing PaddleOCR...")
        # Fixed: Removed custom model paths to use default models
        self.ocr = PaddleOCR(
            use_angle_cls=True,
            lang='en'
        )

        # Valid license plate patterns
        self.valid_patterns = [
            r'^[A-Z]{2}\d{2}[A-Z]{1,2}\d{4}$',  # KA01AB1234
            r'^[A-Z]{2}\d{2}[A-Z]{2}\d{4}$',    # DL01AB1234
            r'^[A-Z]{3}\d{4}$',                  # ABC1234
            r'^[A-Z]{2}\d{4}$',                  # AB1234
            r'^[A-Z]\d{3}[A-Z]{3}$',             # A123ABC
            r'^[A-Z]{2}\d{1,2}[A-Z]{1,3}\d{1,4}$', # Flexible pattern
        ]

        self.recognition_times = []
        print("PaddleOCR initialized successfully!")


    def preprocess_image(self, image):
        """Enhanced preprocessing for license plates"""
        # Convert to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Apply CLAHE for contrast enhancement
        clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
        enhanced = clahe.apply(gray)

        # Denoise
        denoised = cv2.fastNlMeansDenoising(enhanced, h=10)

        # Sharpen
        kernel = np.array([[-1,-1,-1],
                          [-1, 9,-1],
                          [-1,-1,-1]])
        sharpened = cv2.filter2D(denoised, -1, kernel)

        return cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)


    def recognize_plate(self, image):
        """Recognize license plate text using PaddleOCR"""
        start_time = time.time()

        try:
            # Try with original image
            result1 = self.ocr.ocr(image)

            # Try with preprocessed image
            preprocessed = self.preprocess_image(image)
            result2 = self.ocr.ocr(preprocessed)

            # Combine results from both attempts
            all_texts = []

            for result in [result1, result2]:
                if result and result[0]:
                    for line in result:
                        for word_info in line:
                            text = word_info[1][0]
                            confidence = word_info[1][1]

                            if confidence > 0.3:
                                cleaned = self.clean_text(text)
                                if cleaned and len(cleaned) >= 4:
                                    all_texts.append((cleaned, confidence))

            # Get best result
            if all_texts:
                best_text, best_conf = max(all_texts, key=lambda x: x[1])
                recognition_time = time.time() - start_time
                self.recognition_times.append(recognition_time)
                return best_text, best_conf

            recognition_time = time.time() - start_time
            self.recognition_times.append(recognition_time)
            return None, 0.0

        except Exception as e:
            recognition_time = time.time() - start_time
            self.recognition_times.append(recognition_time)
            return None, 0.0


    def clean_text(self, text):
        """Clean and format license plate text"""
        # Remove special characters and spaces
        cleaned = ''.join(c for c in text if c.isalnum()).upper()

        # Fix common OCR mistakes
        replacements = {
            'O': '0', 'I': '1', 'L': '1', 'Z': '2',
            'S': '5', 'B': '8', 'G': '6', 'Q': '0'
        }

        # Apply replacements contextually
        result = []
        for i, char in enumerate(cleaned):
            # If surrounded by numbers, likely a number
            prev_is_digit = i > 0 and cleaned[i-1].isdigit()
            next_is_digit = i < len(cleaned)-1 and cleaned[i+1].isdigit()

            if (prev_is_digit or next_is_digit) and char in replacements:
                result.append(replacements[char])
            else:
                result.append(char)

        return ''.join(result)


    def is_valid_format(self, plate_text):
        """Check if plate matches valid patterns"""
        if not plate_text or len(plate_text) < 4:
            return False

        # Check against patterns
        for pattern in self.valid_patterns:
            if re.match(pattern, plate_text):
                return True

        # Accept if has reasonable mix of letters and numbers
        has_letters = any(c.isalpha() for c in plate_text)
        has_numbers = any(c.isdigit() for c in plate_text)
        reasonable_length = 4 <= len(plate_text) <= 12

        return has_letters and has_numbers and reasonable_length


    def get_avg_recognition_time(self):
        """Get average recognition time"""
        if self.recognition_times:
            return np.mean(self.recognition_times)
        return 0.0