<a href="https://colab.research.google.com/github/MichaelTj02/FinalProject_Group1/blob/main/LicensePlate_Tesseract.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **License plate character detection using Tesseract OCR**

# Convert label and image file

**Convert XML labels to .box for Tesseract compatibility**



In [1]:
import os
import xml.etree.ElementTree as ET

# Define function to convert XML to .box
def convert_xml_to_box(xml_path, image_name, output_dir):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    box_path = os.path.join(output_dir, image_name + ".box")
    with open(box_path, "w") as box_file:
        for obj in root.findall("object"):
            character = obj.find("name").text
            bndbox = obj.find("bndbox")
            xmin = int(bndbox.find("xmin").text)
            ymin = int(bndbox.find("ymin").text)
            xmax = int(bndbox.find("xmax").text)
            ymax = int(bndbox.find("ymax").text)

            # Write in Tesseract .box format
            box_file.write(f"{character} {xmin} {ymin} {xmax} {ymax} 0\n")

# Process all XML files in train and test folders
def process_labels(label_dir, output_dir):
    for split in ["train", "test"]:
        split_dir = os.path.join(label_dir, split)
        output_split_dir = os.path.join(output_dir, split)
        os.makedirs(output_split_dir, exist_ok=True)

        for xml_file in os.listdir(split_dir):
            if xml_file.endswith(".xml"):
                image_name = os.path.splitext(xml_file)[0]
                xml_path = os.path.join(split_dir, xml_file)
                convert_xml_to_box(xml_path, image_name, output_split_dir)

# Paths
label_dir = r"C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\labels"
output_dir = r"C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\labels(box)"
process_labels(label_dir, output_dir)

**Convert images to tiff**

In [2]:
import os
from PIL import Image

def convert_images_to_tiff(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)  # Ensure the output directory exists

    for filename in os.listdir(input_dir):
        if filename.endswith((".png", ".jpg", ".jpeg")):  # Check for valid image files
            image_path = os.path.join(input_dir, filename)
            output_path = os.path.join(output_dir, os.path.splitext(filename)[0] + ".tif")

            # Open the image and save as TIFF
            with Image.open(image_path) as img:
                img = img.convert("RGB")  # Convert to RGB mode if necessary
                img.save(output_path, format="TIFF")
                print(f"Converted {filename} to {output_path}")

# Paths
convert_images_to_tiff(r"C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images\train"
                        , r"C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\train")
convert_images_to_tiff(r"C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images\test"
                        , r"C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\test")


Converted 0000.png to C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\train\0000.tif
Converted 0001.png to C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\train\0001.tif
Converted 0003.png to C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\train\0003.tif
Converted 0004.png to C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\train\0004.tif
Converted 0005.png to C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\train\0005.tif
Converted 0006.png to C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\train\0006.tif
Converted 0008.png to C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\train\0008.tif
Converted 0009.png to C:\Users\mtj02\Desktop\FinalProject_Group1\Datasets\LicensePlateDataset\images(tiff)\train\0009.tif
Converted 0010.png to C: