# Task
Set up a project environment for a number plate recognition system, prepare a dataset (or simulate one), implement number plate detection, perform optical character recognition (OCR), post-process OCR results for accuracy, and finally, summarize the implemented system's performance.

In [None]:
print("Installing essential Python libraries: OpenCV, NumPy, Pillow, and pytesseract...")
!pip install opencv-python numpy Pillow pytesseract
print("Python libraries installed successfully.")

In [None]:
import os
import cv2
import matplotlib.pyplot as plt

# Define the paths for the Haar Cascade XML and the example image
haar_cascade_path = 'haarcascade_russian_plate_number.xml'
example_image_path = 'car_with_plate.jpg'

# Verify if the files exist
if not os.path.exists(haar_cascade_path):
    print(f"Error: Haar cascade file '{haar_cascade_path}' not found. Please upload it to your Colab environment.")
else:
    print(f"Haar cascade file '{haar_cascade_path}' found.")

if not os.path.exists(example_image_path):
    print(f"Error: Example image file '{example_image_path}' not found. Please upload it to your Colab environment.")
else:
    print(f"Example image file '{example_image_path}' found.")

print("Please ensure both files are present to proceed.")


In [None]:
import cv2
import matplotlib.pyplot as plt
import os

# Ensure the paths are defined (from previous step, assuming they are valid)
haar_cascade_path = 'haarcascade_russian_plate_number.xml'
example_image_path = 'car_with_plate.jpg'

# 1. Load the example image
img = cv2.imread(example_image_path)

# Check if image was loaded successfully
if img is None:
    print(f"Error: Could not load image from '{example_image_path}'. Please check the path and file integrity.")
else:
    print(f"Image '{example_image_path}' loaded successfully.")

    # 2. Initialize the Haar Cascade classifier
    plate_cascade = cv2.CascadeClassifier(haar_cascade_path)

    # Check if cascade was loaded successfully
    if plate_cascade.empty():
        print(f"Error: Could not load Haar cascade from '{haar_cascade_path}'. Please check the path and file integrity.")
    else:
        print(f"Haar cascade classifier '{haar_cascade_path}' loaded successfully.")

        # 3. Convert the loaded image to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # 4. Apply the classifier to the grayscale image to detect number plates
        #    scaleFactor: Parameter specifying how much the image size is reduced at each image scale.
        #    minNeighbors: Parameter specifying how many neighbors each candidate rectangle should have to retain it.
        plates = plate_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(25, 25))

        print(f"Detected {len(plates)} number plates.")

        # Create a copy of the original image to draw rectangles on
        img_with_boxes = img.copy()
        cropped_plates_dir = 'detected_plates'
        os.makedirs(cropped_plates_dir, exist_ok=True)

        # 5. Iterate through the detected number plates and draw rectangles
        # 6. For each detected number plate, crop the region of interest (ROI)
        for i, (x, y, w, h) in enumerate(plates):
            # Draw rectangle on the image_with_boxes
            cv2.rectangle(img_with_boxes, (x, y), (x + w, y + h), (255, 0, 0), 2) # Blue rectangle

            # Crop the number plate ROI from the original image
            roi = img[y:y+h, x:x+w]

            # 7. Save the cropped number plate images
            cropped_plate_path = os.path.join(cropped_plates_dir, f'plate_{i}.jpg')
            cv2.imwrite(cropped_plate_path, roi)
            print(f"Cropped plate {i} saved to {cropped_plate_path}")

        # 8. Display the image with the drawn bounding boxes
        plt.figure(figsize=(10, 8))
        plt.imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB))
        plt.title('Detected Number Plates')
        plt.axis('off')
        plt.show()

        print(f"All detected number plates saved in the '{cropped_plates_dir}' directory.")


In [None]:
import pytesseract
import os
import cv2
import matplotlib.pyplot as plt

# 1. Set the path to the Tesseract executable
# Ensure Tesseract-OCR is installed. In Colab, it's typically at /usr/bin/tesseract
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'

# Directory containing cropped number plates
cropped_plates_dir = 'detected_plates'

# 2. Create a list to store the recognized plate texts
recognized_plate_texts = []

# Check if the directory exists and contains images
if not os.path.exists(cropped_plates_dir) or not os.listdir(cropped_plates_dir):
    print(f"Error: Directory '{cropped_plates_dir}' not found or empty. Please ensure number plate detection was successful.")
else:
    print(f"Processing images from '{cropped_plates_dir}'...")

    # 3. Iterate through each image file in the 'detected_plates' directory
    for filename in sorted(os.listdir(cropped_plates_dir)):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(cropped_plates_dir, filename)

            # a. Load the cropped number plate image
            cropped_img = cv2.imread(image_path)

            if cropped_img is None:
                print(f"Warning: Could not load image '{filename}'. Skipping.")
                continue

            # Store the original cropped image for display before preprocessing
            original_display_img = cropped_img.copy()

            # b. Convert the image to grayscale
            gray_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)

            # c. Apply Otsu's thresholding to binarize the image
            # This can improve OCR accuracy by creating a clear binary image
            _, thresh_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

            # d. Use pytesseract.image_to_string() to perform OCR
            # --psm 8: Assume a single word (or segment of text).
            # --oem 3: Use default Tesseract engine mode (best for accuracy).
            config_str = '--psm 8 --oem 3'
            text = pytesseract.image_to_string(thresh_img, config=config_str).strip()

            # Clean up recognized text (remove non-alphanumeric characters or extra spaces)
            # A more robust cleaning might be needed for real-world scenarios
            cleaned_text = ''.join(e for e in text if e.isalnum())

            # e. Append the recognized text to your list
            recognized_plate_texts.append(cleaned_text)

            # f. Display the original cropped image along with its recognized text
            plt.figure(figsize=(6, 4))
            plt.imshow(cv2.cvtColor(original_display_img, cv2.COLOR_BGR2RGB)) # Convert BGR to RGB for matplotlib
            plt.title(f"Recognized Text: {cleaned_text}")
            plt.axis('off')
            plt.show()
            print(f"Processed '{filename}'. Recognized text: '{cleaned_text}'")

    print("\n--- OCR Summary ---")
    for i, plate_text in enumerate(recognized_plate_texts):
        print(f"Plate {i+1}: {plate_text}")


In [None]:
import re

# Assuming 'recognized_plate_texts' is available from the previous step
# For demonstration purposes, if it's not, let's create a dummy one.
# In a real scenario, this list would be populated by the previous OCR step.
if 'recognized_plate_texts' not in locals():
    recognized_plate_texts = [
        "AB12C34", "0S5E7G9", "I_L1K34", "BH01AL9999", "NOPLATE", "MH 04 AB 1234", "G_J 05 F 8976"
    ]
print(f"Original recognized plate texts: {recognized_plate_texts}")

def post_process_plate_text(text):
    # 1a. Remove any non-alphanumeric characters, spaces, or unwanted symbols
    # Convert to uppercase for consistent processing
    cleaned_text = re.sub(r'[^A-Z0-9]', '', text.upper())

    # 1b. Implement logic to correct common OCR errors
    # This is a basic example; a more comprehensive solution might use a dictionary mapping
    # or machine learning for more nuanced corrections.
    corrected_text = cleaned_text
    corrected_text = corrected_text.replace('O', '0') # Often 'O' is read as '0'
    corrected_text = corrected_text.replace('I', '1') # Often 'I' is read as '1'
    corrected_text = corrected_text.replace('S', '5') # Often 'S' is read as '5'
    corrected_text = corrected_text.replace('B', '8') # Often 'B' is read as '8'
    # Add more common confusions as needed

    # 1c. Apply formatting rules specific to number plate patterns.
    # This is a generic pattern for demonstration: letters followed by digits.
    # A more specific regex can be used based on the target country's plate format.
    # For example, for Indian plates like 'MH04AB1234': r'^([A-Z]{2}\d{2}[A-Z]{2}\d{4})$'
    # For simplicity, we'll just ensure it's alphanumeric and trim it if it's too long/short based on common length

    # Let's assume a typical plate length is between 7 and 10 characters for this example
    # This is a very simple 'formatting' and could be much more sophisticated
    if len(corrected_text) > 10:
        formatted_text = corrected_text[:10]
    elif len(corrected_text) < 6:
        formatted_text = corrected_text # Too short to apply strict patterns without losing data
    else:
        formatted_text = corrected_text

    # Further refinement: if there's a mix of letters and numbers, try to impose a common structure
    # This is a placeholder for more advanced pattern matching based on country-specific rules
    # Example: If we expect two letters followed by digits and then more letters/digits
    match = re.match(r'([A-Z]{2})(\d{2})([A-Z]{1,2})(\d{4})', formatted_text)
    if match:
        formatted_text = f"{match.group(1)}{match.group(2)}{match.group(3)}{match.group(4)}"
    else:
        # Fallback to the corrected text if no specific pattern is matched
        pass

    return formatted_text

# 2. Store the refined plate texts in a new list
final_plate_numbers = []

# 3. Iterate through the recognized_plate_texts and apply the function
print("\n--- Post-processing Results ---")
for i, original_text in enumerate(recognized_plate_texts):
    processed_text = post_process_plate_text(original_text)
    final_plate_numbers.append(processed_text)
    print(f"Original Plate {i+1}: {original_text}\nProcessed Plate {i+1}: {processed_text}\n")

# 4. Print both the original recognized texts and the post-processed texts for comparison
print("\n--- Summary Comparison ---")
for i in range(len(recognized_plate_texts)):
    print(f"Original: {recognized_plate_texts[i]} -> Processed: {final_plate_numbers[i]}")

print("\nPost-processing complete. Refined plate numbers stored in 'final_plate_numbers'.")

## Final Task

### Subtask:
Provide a comprehensive summary of the implemented Number Plate Recognition system, highlighting the successful integration of detection and OCR components. This step will also include a discussion of the system's performance and potential areas for future extensions or improvements.
