<a href="https://colab.research.google.com/github/MarouaHY/yolo11/blob/main/ocr_and_wer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import shutil

# Dossiers actuels
dataset_path = "/content/drive/MyDrive/ocr"
images_dir = os.path.join(dataset_path, "images")
annotations_dir = os.path.join(dataset_path, "annotations")

# Créer les dossiers s'ils n'existent pas
os.makedirs(images_dir, exist_ok=True)
os.makedirs(annotations_dir, exist_ok=True)

# Trier les fichiers
for file in os.listdir(dataset_path):
    file_path = os.path.join(dataset_path, file)

    if file.endswith(".jpg") or file.endswith(".png"):  # Images
        shutil.move(file_path, os.path.join(images_dir, file))
    elif file.endswith(".xml"):  # Annotations
        shutil.move(file_path, os.path.join(annotations_dir, file))

print("✅ Tri des fichiers terminé : images et annotations séparées !")


✅ Tri des fichiers terminé : images et annotations séparées !


In [None]:
import os
import xml.etree.ElementTree as ET

# Paths
xml_dir = "/content/drive/MyDrive/ocr/annotations"
image_dir = "/content/drive/MyDrive/ocr/images"
ocr_labels_dir = "/content/drive/MyDrive/ocr/ocr labels dir"

# Create output folder
os.makedirs(ocr_labels_dir, exist_ok=True)

for xml_file in os.listdir(xml_dir):
    if xml_file.endswith(".xml"):
        xml_path = os.path.join(xml_dir, xml_file)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        image_name = root.find("filename").text
        image_path = os.path.join(image_dir, image_name)
        ocr_annotation = image_path  # Start annotation with image path

        for obj in root.findall("object"):
            label = obj.find("name").text
            bbox = obj.find("bndbox")

            xmin = int(bbox.find("xmin").text)
            ymin = int(bbox.find("ymin").text)
            xmax = int(bbox.find("xmax").text)
            ymax = int(bbox.find("ymax").text)

            # Convert bounding box to four corner coordinates
            x1, y1 = xmin, ymin
            x2, y2 = xmax, ymin
            x3, y3 = xmax, ymax
            x4, y4 = xmin, ymax

            ocr_annotation += f" {x1},{y1},{x2},{y2},{x3},{y3},{x4},{y4},{label}"

        # Save OCR annotation
        ocr_label_path = os.path.join(ocr_labels_dir, xml_file.replace(".xml", ".txt"))
        with open(ocr_label_path, "w") as f:
            f.write(ocr_annotation + "\n")

print("✅ Conversion to OCR format completed!")


✅ Conversion to OCR format completed!


In [None]:
import os

# Path to the folder containing OCR annotation text files
annotation_folder = "/content/drive/MyDrive/ocr/ocr labels dir"  # Change to your actual path
image_folder = "/content/drive/MyDrive/ocr/images"  # Change to your actual image folder
output_file = "/content/path/to/ocr_annotations/ocr_labels.txt"  # Output file

# Check if the annotation folder exists
if not os.path.isdir(annotation_folder):
    print(f"❌ Error: The path '{annotation_folder}' is not a valid directory!")
else:
    print(f"📁 Processing OCR annotations from: {annotation_folder}")

    # Open the output file for writing
    with open(output_file, "w") as outfile:
        # Iterate over all annotation files
        for filename in os.listdir(annotation_folder):
            file_path = os.path.join(annotation_folder, filename)

            # Process only text files
            if file_path.endswith(".txt") and os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as infile:
                        parts = infile.readline().strip().split()

                        if len(parts) < 2:
                            print(f"⚠️ Skipping file (invalid annotation): {filename}")
                            continue

                        image_filename = parts[0]  # Image path
                        annotations = parts[1:]  # Annotations (bounding boxes + label)

                        # Extract only the characters (last column of each annotation)
                        license_plate_text = ""
                        for annotation in annotations:
                            coords = annotation.split(",")
                            if len(coords) == 9:
                                char_label = coords[8]  # Extract the character
                                license_plate_text += char_label

                        # Construct the full image path
                        image_path = os.path.join(image_folder, os.path.basename(image_filename))

                        # Write to the output file in the desired format
                        outfile.write(f"{image_path} {license_plate_text}\n")

                    print(f"✅ Processed: {filename} -> {license_plate_text}")

                except Exception as e:
                    print(f"❌ Error processing {filename}: {e}")

    print(f"✅ OCR labels saved to: {output_file}")


📁 Processing OCR annotations from: /content/drive/MyDrive/ocr/ocr labels dir
✅ Processed: syn_1038_jpg.rf.37d156c0dd84769ef31c6afb936004c4.txt -> Z0VH5T9C
✅ Processed: syn_1135_jpg.rf.38bbaa07b8b7ab73b9374f7cb88fe479.txt -> 034NCSKUR
✅ Processed: syn_1057_jpg.rf.9deda12f7a3b84a3455740df6e172585.txt -> CBMBV53
✅ Processed: syn_1030_jpg.rf.869479638c5f632e588905cef32b4c35.txt -> 6D5RHXT
✅ Processed: syn_1026_jpg.rf.c6c6cc5255e4dbfb28bc7864ed014b27.txt -> 1V6GCKN0C
✅ Processed: syn_3122_jpg.rf.bebce8633815c3a99aa58612b8a45413.txt -> BT8KGGPVA
✅ Processed: syn_3086_jpg.rf.46b500efca8732b12794f36a2c468a0c.txt -> 2TASEUE2
✅ Processed: syn_3058_jpg.rf.78a1c7c2f61f2048a24de6299c9e847b.txt -> RXBKCKGY0
✅ Processed: syn_3375_jpg.rf.6ee6f815e6ce02db613f1e445621a019.txt -> D5V050N1S
✅ Processed: syn_3094_jpg.rf.c6fa69a6a57156edaf3d5796f465e6c3.txt -> U45CC83
✅ Processed: syn_3126_jpg.rf.3b23c247b93b0b4ee51f81fe41c52695.txt -> MMKHPG8DU
✅ Processed: syn_3182_jpg.rf.74f5a01dd44e99afc8099acae31601d3.

# Partie Paddle

In [None]:
!pip install paddlepaddle paddleocr


Collecting paddlepaddle
  Downloading paddlepaddle-2.6.2-cp311-cp311-manylinux1_x86_64.whl.metadata (8.6 kB)
Collecting paddleocr
  Downloading paddleocr-2.9.1-py3-none-any.whl.metadata (8.5 kB)
Collecting astor (from paddlepaddle)
  Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting opt-einsum==3.3.0 (from paddlepaddle)
  Downloading opt_einsum-3.3.0-py3-none-any.whl.metadata (6.5 kB)
Collecting pyclipper (from paddleocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting lmdb (from paddleocr)
  Downloading lmdb-1.6.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting rapidfuzz (from paddleocr)
  Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting python-docx (from paddleocr)
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting fire>=0.3.0 (from paddleocr)
  Download

In [None]:
from paddleocr import PaddleOCR
print("Importation réussie ✅")


Importation réussie ✅


# Tester paddle sur tout le dataset

In [None]:
!pip install python-Levenshtein


Collecting python-Levenshtein
  Downloading python_Levenshtein-0.26.1-py3-none-any.whl.metadata (3.7 kB)
Collecting Levenshtein==0.26.1 (from python-Levenshtein)
  Downloading levenshtein-0.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB)
Downloading python_Levenshtein-0.26.1-py3-none-any.whl (9.4 kB)
Downloading levenshtein-0.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.7/162.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Levenshtein, python-Levenshtein
Successfully installed Levenshtein-0.26.1 python-Levenshtein-0.26.1


# Evaluer avec l'OCR avec WER  

In [None]:
import os
import re
import paddleocr
import Levenshtein

# Charger PaddleOCR
ocr = paddleocr.PaddleOCR(use_angle_cls=True, lang='en')

# Charger le fichier des Ground Truths
gt_file_path = "/content/path/to/ocr_annotations/ocr_labels.txt"
ground_truths = {}

# Lecture et extraction des Ground Truths
with open(gt_file_path, "r", encoding="utf-8") as f:
    for line in f:
        parts = line.strip().split()  # Séparation par espace (évite problème de tabulation)
        if len(parts) >= 2:
            image_path, plate_text = parts[0], parts[1]
            image_name = os.path.basename(image_path)  # Extraire uniquement le nom du fichier
            ground_truths[image_name] = plate_text.replace(" ", "")  # Enlever les espaces

# Vérification des Ground Truths chargés
print("✅ Exemple de correspondance GT :")
for i, (k, v) in enumerate(ground_truths.items()):
    if i < 5:
        print(f"{k} -> {v}")

# Fonction pour extraire uniquement le texte de la plaque
def extract_plate_text(ocr_result):
    if ocr_result is None:
        return ""  # Éviter l'erreur TypeError si rien n'est détecté

    extracted_text = []
    for res in ocr_result:
        text = res[1][0].upper()  # Récupérer le texte OCR et mettre en majuscules
        clean_text = re.sub(r"[^A-Z0-9]", "", text)  # Garder uniquement lettres & chiffres
        extracted_text.append(clean_text)

    return "".join(extracted_text)  # Concaténer tous les morceaux

# Dossier contenant les images
image_folder = "/content/drive/MyDrive/ocr/images"
wer_scores = []  # Liste pour stocker les WER de chaque image

# Traitement de chaque image
for image_name in os.listdir(image_folder):
    image_path = os.path.join(image_folder, image_name)

    # Exécuter l'OCR
    ocr_result = ocr.ocr(image_path, cls=True)
    extracted_plate = extract_plate_text(ocr_result[0] if ocr_result else None)

    # Récupérer le Ground Truth correspondant
    ground_truth = ground_truths.get(image_name, "N/A")

    # Calculer le WER (si le GT n'est pas "N/A")
    if ground_truth != "N/A":
        wer = Levenshtein.distance(ground_truth, extracted_plate) / max(len(ground_truth), 1)
        wer_scores.append(wer)
    else:
        wer = None

    # Affichage des résultats
    print(f"🖼 Image: {image_name}")
    print(f"✅ Ground Truth: {ground_truth}")
    print(f"🔍 OCR Result: {extracted_plate}")
    print(f"⚡ WER: {wer:.4f}" if wer is not None else "⚠️ WER: N/A (pas de GT disponible)")
    print("-" * 50)

# Calcul du WER moyen
if wer_scores:
    wer_mean = sum(wer_scores) / len(wer_scores)
    print("\n📊 WER Moyen sur toutes les images :", round(wer_mean, 4))
else:
    print("\n⚠️ Impossible de calculer le WER moyen (aucun GT valide).")


[2025/02/24 20:15:43] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/root/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/root/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, rec_c