# IMPORTS

In [None]:
from google.colab import drive
drive.mount('/drive',force_remount=True)

In [None]:
!unzip /drive/MyDrive/data.zip -d ../


In [None]:
!pip install ultralytics

In [None]:
from os import path, listdir
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
from torchvision import models, transforms
import torch.nn as nn
from torch.nn import Module, Sequential, Conv2d, ReLU, MaxPool2d, LSTM, Linear
from torch.optim import Adam
from ultralytics import YOLO
import matplotlib.pyplot as plt
import torch.nn.functional as F
import glob
import numpy as np
from tqdm import tqdm

In [14]:
IMAGE_HEIGHT = 128
IMAGE_WIDTH = 256
PLATE_SIZE = 12

# PreProcessing

In [None]:
!pip install unidecode

## Fonctions de preprocess

Dans cette étape, nous prétraitons les images, c'est-à-dire que nous extrayons les plaques d'immatriculation grâce aux détections (fichier detections.csv, qui contient les coordonnées des plaques extraites par le modèle YOLO). Ensuite, nous redimensionnons les plaques afin qu'elles aient toutes la même taille.

In [16]:
import os
import pandas as pd
import cv2
import torch
from unidecode import unidecode
from tqdm import tqdm
from torchvision import transforms
from concurrent.futures import ThreadPoolExecutor

def process_image(image_path, row, output_size):
    try:
        # Charger et découper l'image
        image = cv2.imread(image_path)
        if image is None:
            return None, None

        x_min, x_max, y_min, y_max = map(int, [row["X_min"], row["X_max"], row["Y_min"], row["Y_max"]])
        cropped = image[y_min:y_max, x_min:x_max]

        # Redimensionner avec transformations Torch
        transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(output_size),
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x / 255.0 + 1e-3)
        ])

        processed_plate = transform(cropped)

        # Extraire et transformer l'étiquette
        plate = row["Plaque"]
        label = unidecode(plate).upper()

        return processed_plate, label
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None, None

def extract_plates_and_labels(images_dir, csv_path, output_size):
    # Précharger les données du CSV
    detections = pd.read_csv(csv_path).set_index("Filename")

    plates, labels = [], []

    # Filtrer les fichiers image
    image_paths = [
        os.path.join(images_dir, img_name)
        for img_name in os.listdir(images_dir)
        if img_name.lower().endswith(('.jpg', '.jpeg', '.png'))
    ]

    # Créer un dictionnaire des informations pour un accès plus rapide
    detections_dict = detections.to_dict(orient='index')

    def process_image_batch(image_path):
        img_name = os.path.basename(image_path)
        if img_name in detections_dict:
            row = detections_dict[img_name]
            return process_image(image_path, row, output_size)
        return None, None

    # Utiliser un ThreadPoolExecutor pour paralléliser le traitement des images
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_image_batch, image_paths), total=len(image_paths)))

    # Ajouter les résultats dans les listes
    for processed_plate, label in results:
        if processed_plate is not None and label is not None:
            plates.append(processed_plate)

            labels.append(label)

    return plates, labels


In [17]:
def encode_labels(labels, char_to_idx, padding_value=0, size=50):
    encoded_labels = [[char_to_idx[char] for char in label] for label in labels]

    # Pad each label to the max length
    for i in range(len(encoded_labels)):
        padding = [padding_value] * (size - len(encoded_labels[i]))
        encoded_labels[i].extend(padding)

    # Convert each list into a tensor
    tensor_labels = [torch.tensor(label, dtype=torch.long) for label in encoded_labels]

    return tensor_labels


# Dictionnaires pour encoder
char_to_idx = {
            '': 0,
            **{chr(i): idx for idx, i in enumerate(range(ord('A'), ord('Z') + 1), start=1)},
            **{str(i): idx + 26 for idx, i in enumerate(range(10))},
            '-': 37,
            ' ': 38,
            '.': 39
        }
idx_to_char = {v: k for k, v in char_to_idx.items()}

In [18]:
class PlateDataset(Dataset):
    def __init__(self, images,labels):

        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        """
        Fetch a single sample (image and label).

        Args:
            idx (int): Index of the sample.

        Returns:
            Tuple[torch.Tensor, str]: The processed image tensor and its label.
        """
        return (self.images[idx],self.labels[idx])

In [None]:
csv_path = "/data/detections.csv"

X_train,Y_train = extract_plates_and_labels("/data/train",csv_path,(IMAGE_HEIGHT,IMAGE_WIDTH))
X_val,Y_val =  extract_plates_and_labels("/data/val",csv_path,(IMAGE_HEIGHT,IMAGE_WIDTH))
X_test,Y_test =  extract_plates_and_labels("/data/test",csv_path,(IMAGE_HEIGHT,IMAGE_WIDTH))

Y_train_encoded = encode_labels(Y_train,char_to_idx,0,PLATE_SIZE)
Y_val_encoded = encode_labels(Y_val,char_to_idx,0,PLATE_SIZE)
Y_test_encoded = encode_labels(Y_test,char_to_idx,0,PLATE_SIZE)



In [None]:
print(f"NB train samples : {len(Y_train_encoded)}, X_train shape : {X_train[0].shape}")
print(f"NB val samples : {len(Y_val_encoded)}, X_val shape : {X_val[0].shape}")
print(f"NB test samples : {len(Y_test_encoded)}, X_test shape : {X_test[0].shape}")

# Model

Model : actuellement un mobilenet pour extraire les features puis un transformer pour créer les séquences

In [26]:
import torch
import torch.nn as nn
import torchvision.models as models

class CRNNWithTransformer(nn.Module):
    def __init__(self, num_classes, plate_size, image_height, image_width,
                 transformer_dim=64, num_heads=8, num_layers=2, dropout_rate=0.2):
        super(CRNNWithTransformer, self).__init__()
        self.plate_size = plate_size

        # Use MobileNetV3 as feature extractor
        mobilenet_v3 = models.mobilenet_v3_large(weights=models.MobileNet_V3_Large_Weights.IMAGENET1K_V1)
        self.features = mobilenet_v3.features  # Extract convolutional layers only

        OUTPUT_FEATURES = 960  # MobileNetV3 large outputs 960 channels in the last convolutional layer

        # 1x1 Conv to match the desired feature dimensions
        self.conv1x1 = nn.Conv2d(OUTPUT_FEATURES, 960, kernel_size=1)

        # Compute convolutional output size dynamically
        def calculate_conv_output_size(height, width):
            dummy_input = torch.zeros(1, 3, height, width)
            with torch.no_grad():
                output = self.features(dummy_input)
            return output.size(2), output.size(3)

        conv_h, conv_w = calculate_conv_output_size(image_height, image_width)
        self.feature_size = conv_h * conv_w * 960  # Adjusted based on MobileNetV3 output

        # Transformer Encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=transformer_dim,
            nhead=num_heads,
            dim_feedforward=transformer_dim * 2,
            dropout=dropout_rate,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Projection from CNN feature map to transformer input dimension
        self.feature_projection = nn.Linear(self.feature_size // plate_size, transformer_dim)

        # Fully connected layer for class prediction
        self.fc = nn.Linear(transformer_dim, num_classes)

    def forward(self, x):
        batch_size = x.size(0)

        x = self.features(x)  # Shape: (batch_size, 960, conv_h, conv_w) from MobileNetV3
        x = self.conv1x1(x)  # Shape: (batch_size, 960, conv_h, conv_w)

        # Flatten and reshape for transformer
        x = x.permute(0, 2, 3, 1).contiguous()  # Shape: (batch_size, conv_h, conv_w, 1024)
        x = x.view(batch_size, self.plate_size, -1)  # Shape: (batch_size, plate_size, feature_size // plate_size)
        x = self.feature_projection(x)  # Project to transformer_dim

        # Pass through Transformer Encoder
        x = self.transformer(x)  # Shape: (batch_size, plate_size, transformer_dim)

        # Class predictions
        outputs = self.fc(x)  # Shape: (batch_size, plate_size, num_classes)
        return outputs


In [27]:
BATCH_SIZE = 16

# Créer le dataset et dataloader
dataset_train = PlateDataset(X_train,Y_train_encoded)
dataset_val = PlateDataset(X_val,Y_val_encoded)
dataset_test = PlateDataset(X_test,Y_test_encoded)
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
dataloader_val = DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

In [None]:
model = CRNNWithTransformer(dropout_rate=0.3,num_classes=len(idx_to_char)+1,plate_size=PLATE_SIZE,image_height=IMAGE_HEIGHT,image_width=IMAGE_WIDTH)

In [None]:
import gc

gc.collect()



# Train Model

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from tqdm import tqdm

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

# Move the model to the appropriate device
model = model.to(device)

EPOCHS = 100
PATIENCE = 5  # Stop if validation loss doesn't improve for 5 epochs
min_val_loss = float('inf')
patience_counter = 0
train_loss_values = []
val_loss_values = []

for epoch in range(EPOCHS):
    model.train()
    running_train_loss = 0.0

    # Training loop
    for images, labels in tqdm(dataloader_train, desc=f"Epoch {epoch+1}/{EPOCHS} - Training"):
        images = images.to(device)
        labels = labels.to(device)  # Ensure labels are on the same device

        # Forward pass
        output = model(images)  # (batch_size, sequence_length, num_classes)
        output = output.permute(0, 2, 1)  # Change to (batch_size, num_classes, sequence_length)

        # Compute loss
        loss = criterion(output, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        optimizer.step()

        running_train_loss += loss.item()

    epoch_train_loss = running_train_loss / len(dataloader_train)
    train_loss_values.append(epoch_train_loss)

    # Validation loop
    model.eval()
    running_val_loss = 0.0

    with torch.no_grad():
        for images, labels in tqdm(dataloader_val, desc=f"Epoch {epoch+1}/{EPOCHS} - Validation"):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            output = model(images)
            output = output.permute(0, 2, 1)

            # Compute loss
            loss = criterion(output, labels)
            running_val_loss += loss.item()

    epoch_val_loss = running_val_loss / len(dataloader_val)
    val_loss_values.append(epoch_val_loss)

    print(f"Epoch [{epoch + 1}/{EPOCHS}], Train Loss: {epoch_train_loss:.4f}, Validation Loss: {epoch_val_loss:.4f}")

    # Early stopping logic
    if epoch_val_loss < min_val_loss:
        min_val_loss = epoch_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'best_model.pth')  # Save the best model
        print(f"Validation loss improved. Model saved.")
    else:
        patience_counter += 1
        print(f"No improvement in validation loss. Patience: {patience_counter}/{PATIENCE}")

    if patience_counter >= PATIENCE:
        print("Early stopping triggered. Training stopped.")
        break

# Plot the loss graph for training and validation
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(train_loss_values) + 1), train_loss_values, label="Training Loss")
plt.plot(range(1, len(val_loss_values) + 1), val_loss_values, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation Loss Over Epochs")
plt.legend()
plt.grid()
plt.show()

print("Training complete. Best model saved as 'best_model.pth'.")


# Evaluate Model

In [None]:
model.load_state_dict(torch.load('best_model.pth', map_location=device))
model = model.to(device)
model.eval()  # Mettre le modèle en mode évaluation.



In [None]:
nbtot = 0
nb_good = 0

# Pour le calcul de l'accuracy par caractère
total_characters = 0
correct_characters = 0

sequence_accuracies = []
for images, labels in dataloader_test:
    # Obtenir les prédictions du modèle
    images = images.to(device)
    labels = labels.to(device)
    output = model(images)  # Output shape: (batch_size, sequence_length, num_classes)
    predicted_labels = output.argmax(dim=2)  # Shape: (batch_size, sequence_length)

    # Comparer les prédictions avec les étiquettes de vérité terrain
    for i in range(len(predicted_labels)):
        predicted_text = "".join([idx_to_char[idx] for idx in predicted_labels[i].detach().cpu().numpy()])
        ground_truth = "".join([idx_to_char[idx] for idx in labels[i].detach().cpu().numpy()])

        # Calcul de l'accuracy globale (match exact)
        if ground_truth == predicted_text:
            nb_good += 1
        else :
            print(f"{ground_truth:^{40}} -- {predicted_text}")
        # Calcul de l'accuracy par caractère
        correct_characters += sum(1 for a, b in zip(predicted_text, ground_truth) if a == b)
        total_characters += len(ground_truth)

        nbtot += 1

# Calcul final des métriques
exact_match_accuracy = nb_good / nbtot if nbtot > 0 else 0
character_accuracy = correct_characters / total_characters if total_characters > 0 else 0

print(f'Exact Match Accuracy: {exact_match_accuracy:.2%}')
print(f'Character-level Accuracy: {character_accuracy:.2%}')
torch.save(model.state_dict(), f'model_MobileNet_transfo_char={character_accuracy:.2%}_word={exact_match_accuracy:.2%}.pth')  # Save the best model


In [None]:

# Sélectionner aléatoirement 10 images
sampled_images = []
sampled_predictions = []
sampled_ground_truths = []

for images, labels in dataloader_test:
    images = images.to(device)
    labels = labels.to(device)
    output = model(images)  # Output shape: (batch_size, sequence_length, num_classes)
    predicted_labels = output.argmax(dim=2)  # Shape: (batch_size, sequence_length)

    for i in range(len(images)):
        if len(sampled_images) < 10:  # Collecter jusqu'à 10 échantillons
            sampled_images.append(images[i].detach().cpu())
            predicted_text = "".join([idx_to_char[idx] for idx in predicted_labels[i].detach().cpu().numpy()])
            ground_truth = "".join([idx_to_char[idx] for idx in labels[i].detach().cpu().numpy()])
            sampled_predictions.append(predicted_text)
            sampled_ground_truths.append(ground_truth)
        else:
            break
    if len(sampled_images) >= 10:
        break



for i,image in enumerate(sampled_images) :
    image = image.permute(1, 2, 0) *255 # Permuter pour obtenir (H, W, C)
    plt.imshow(image)
    plt.title(f"GT: {sampled_ground_truths[i]}\nPred: {sampled_predictions[i]}")
    plt.axis('off')

    plt.tight_layout()
    plt.show()


In [None]:
from collections import Counter
letters = []

for label in Y_train :
    for char in label :
            letters.append(char)
# Compter les occurrences de chaque numéro
label_counts = Counter(letters)

# Afficher les résultats
for label, count in sorted(label_counts.items()):
    print(f"Caractère {label}: {count} occurrences")
