# Project detection math expression

## Imports

In [1]:
from skimage import io, transform
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, utils
from collections import Counter
from PIL import Image
from tqdm import tqdm
from pathlib import Path

import os
import torch
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import torchvision.models.detection as detection
# Install pix2text robustly using the current Python executable
import sys, subprocess
try:
    import pix2text
except Exception:
    print('pix2text not found ‚Äî installing via python -m pip')
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pix2text>=1.1'])
    import pix2text
print('pix2text is available')
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()  

  from .autonotebook import tqdm as notebook_tqdm
 


  from .autonotebook import tqdm as notebook_tqdm
 


pix2text is available


<contextlib.ExitStack at 0x7970861ab250>

## Pre treatement

In [9]:
class CROHMEDataset(Dataset):
    """
    Dataset pour les expressions compl√®tes (PNG + LG).
    Chaque sample retourne :
        - image : Tensor CxHxW
        - target : dict contenant "boxes" et "labels"
    """

    def __init__(self, root, transform=None, meta_classes=True):
        """
        root : chemin du dossier contenant les PNG + LG
        transform : transform PyTorch (augmentations, ToTensor, Resize‚Ä¶)
        meta_classes : si True, map chaque label vers une m√©ta-classe
        """
        self.root = root
        self.transform = transform
        self.meta_classes = meta_classes

        # liste des fichiers PNG / LG
        self.images = [f for f in os.listdir(root) if f.endswith(".png")]
        self.images.sort()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.root, img_name)

        lg_name = img_name.replace(".png", ".lg")
        lg_path = os.path.join(self.root, lg_name)

        image = Image.open(img_path).convert("RGB")

        boxes = []
        labels = []

        with open(lg_path, "r", encoding='utf-8', errors='ignore') as f:
            for line in f:
                parts = [p.strip() for p in line.strip().split(",") if p.strip() != ""]
                if len(parts) < 6:
                    # fallback to whitespace splitting if commas are not reliable
                    parts = [p.strip() for p in line.strip().split() if p.strip() != ""]

                if len(parts) < 6:
                    continue

                label = parts[1]
                try:
                    xmin_s, ymin_s, xmax_s, ymax_s = parts[-4:]
                    xmin = float(xmin_s)
                    ymin = float(ymin_s)
                    xmax = float(xmax_s)
                    ymax = float(ymax_s)
                except Exception:
                    continue

                if xmax <= xmin or ymax <= ymin:
                    warnings.warn(
                            f"Found invalid bbox in '{lg_path}': [xmin={xmin}, ymin={ymin}, xmax={xmax}, ymax={ymax}]. These boxes will be skipped.")
                    continue

                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(self.map_label(label))

        # Convert to tensors; ensure correct shapes even when empty
        if len(boxes) == 0:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
        else:
            boxes = torch.tensor(boxes, dtype=torch.float32)
            labels = torch.tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}

        if self.transform:
            image = self.transform(image)

        return image, target

    def map_label(self, label):
        raw = label.split("_")[0].strip()
        if raw.isalpha():
            return 0

        if raw.isdigit():
            return 1

        if raw in {"+", "-", "=", "/", "*", "√ó", "√∑", "^"}:
            return 2
        return 3

    def raw_label_to_id(self, raw):
        if not hasattr(self, "raw_vocab"):
            self.raw_vocab = {}
        if raw not in self.raw_vocab:
            self.raw_vocab[raw] = len(self.raw_vocab)
        return self.raw_vocab[raw]

    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
    ])

In [10]:
root = '../datas/FullExpressions/CROHME2019_train_png/'

In [11]:
dataset = CROHMEDataset(
    root=root,
    transform=transforms.ToTensor(),
    meta_classes=True
)

image, target = dataset[0]
print("Image : ", image.size())
print("Target : ", target)

dataset_len = len(dataset)
train_len = int(0.8 * dataset_len)
val_len = int(0.1 * dataset_len)
test_len = dataset_len - train_len - val_len

train, val, test = torch.utils.data.random_split(
    dataset, [train_len, val_len, test_len], generator=torch.Generator().manual_seed(42)
)
print(f"Dataset sizes -> total: {dataset_len}, train: {train_len}, val: {val_len}, test: {test_len}")

Image :  torch.Size([3, 119, 500])
Target :  {'boxes': tensor([[ 10.,  40.,  39., 108.],
        [270.,  46., 320.,  72.],
        [340.,  47., 371.,  71.],
        [467.,  10., 489.,  38.],
        [121.,  38., 166.,  63.],
        [226.,  21., 265.,  65.],
        [399.,  10., 452.,  76.]]), 'labels': tensor([0, 0, 2, 1, 2, 0, 0])}
Dataset sizes -> total: 9993, train: 7994, val: 999, test: 1000


## Functions for visualization and evaluation

In [12]:
def load_image(image_path):
    """Load an image from file."""
    image = Image.open(image_path).convert("RGB")
    return image

def prepare_image(image, transform=None):
    """Prepare the image for model input."""
    if transform:
        image = transform(image)
    return image.unsqueeze(0)  # Add batch dimension

def visualize_predictions(image, boxes, labels, scores, threshold=0.4):
    """Visualize the bounding boxes and labels on the image."""
    plt.figure(figsize=(12, 8))
    plt.imshow(image.permute(1, 2, 0).numpy())

    # Filter out boxes and labels below the threshold
    for box, label, score in zip(boxes, labels, scores):
        if score >= threshold:
            x_min, y_min, x_max, y_max = box
            plt.gca().add_patch(plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min,
                                                fill=False, edgecolor='red', linewidth=3))
            plt.text(x_min, y_min, f'{label.item()}: {score:.2f}', fontsize=12, color='red')

    plt.axis('off')
    plt.show()

## Training loop

In [14]:
# Hyperparameters
num_epochs = 3
learning_rate =0.0008
batch_size = 3
# Keep val_size if you want an absolute val count fallback, but we'll use dynamic splits
val_size = 10

val_err_array = np.array([])
train_err_array = np.array([])
nb_sample_array = np.array([])
train_loss_classifier_array = np.array([])
train_loss_objectness_array = np.array([])

# Early stopping parameters
patience =5
epochs_without_improvement = 0

# Use the Subset objects created earlier by random_split: `train`, `val`, `test`.
# If `train` or `val` don't exist yet (cell not executed), compute splits here as a fallback.
try:
    train_subset = train
    val_subset = val
except NameError:
    dataset_len = len(dataset)
    train_len = int(0.8 * dataset_len)
    val_len = int(0.1 * dataset_len)
    test_len = dataset_len - train_len - val_len
    train_subset, val_subset, _ = torch.utils.data.random_split(dataset, [train_len, val_len, test_len], generator=torch.Generator().manual_seed(42))

# Create DataLoaders for training and validation
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Load a pretrained Faster R-CNN model
#model = detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
#model = detection.ssd300_vgg16(weights="DEFAULT")
model = detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")

# Set the requires_grad attribute of all the backbone parameters to False
for param in model.backbone.parameters():
    param.requires_grad = False
print("Backbone frozen. Only the RPN and heads will be trained.")

# Modify the model for the number of classes
num_classes = 5  # 20 classes + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

# Move the model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Set up the optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=learning_rate, momentum=0.9, weight_decay=0.0005)

# Function for validation
def validate(model, val_loader):
    val_loss = 0.0
    with torch.no_grad():
        for images, targets in val_loader:
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()

    return val_loss / len(val_loader)

# Training loop
best_val_loss = float('inf')  # Initialize best validation loss
for epoch in range(num_epochs):
    epoch_loss = 0.0
    epoch_loss_classifier = 0.0
    epoch_loss_objectness = 0.0
    model.train()  # Set the model to training mode
    nb_used_sample = 0 # Initialize the number of samples used in this epoch

    for images, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        # Move images and targets to the device (GPU or CPU)
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images, targets)

        # Compute total loss
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass
        losses.backward()

        # Update the weights
        optimizer.step()

        # Accumulate loss
        epoch_loss += losses.item()
        # Use .get to avoid KeyError if a particular loss term is missing
        epoch_loss_classifier += loss_dict.get('loss_classifier', torch.tensor(0.0)).item()
        epoch_loss_objectness += loss_dict.get('loss_objectness', torch.tensor(0.0)).item()
        nb_used_sample += len(images)


################ FOR VGG16 ###############
    #     # Accumulate loss - Use keys appropriate for SSD
    #     epoch_loss += losses.item()
    #     # Assuming loss_dict for SSD contains 'classification' and 'bbox_regression'
    #     if 'classification' in loss_dict:
    #         epoch_loss_classifier += loss_dict['classification'].item()
    #     if 'bbox_regression' in loss_dict:
    #          epoch_loss_objectness += loss_dict['bbox_regression'].item() # Using objectness for regression loss here
    #     nb_used_sample += batch_size


    # # Calculate average training loss for the epoch
    # train_err = epoch_loss / len(train_loader)
    # # Calculate average for classifier and regression losses only if they were accumulated
    # train_loss_classifier = epoch_loss_classifier / len(train_loader) if 'classification' in loss_dict else 0
    # train_loss_objectness = epoch_loss_objectness / len(train_loader) if 'bbox_regression' in loss_dict else 0
###########################################


    # Calculate average training loss for the epoch
    train_err = epoch_loss / len(train_loader)
    train_loss_classifier = epoch_loss_classifier / len(train_loader) if len(train_loader) > 0 else 0.0
    train_loss_objectness = epoch_loss_objectness / len(train_loader) if len(train_loader) > 0 else 0.0

    # Print epoch loss
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {train_err:.4f}, Classifier Loss: {train_loss_classifier:.4f}, Objectness Loss: {train_loss_objectness:.4f}")

    # Validate after each epoch
    val_loss = validate(model, val_loader)
    print(f"Validation Loss: {val_loss:.4f}")
    train_err_array = np.append(train_err_array, train_err)
    val_err_array = np.append(val_err_array, val_loss)
    nb_sample_array = np.append(nb_sample_array, nb_used_sample)
    train_loss_classifier_array = np.append(train_loss_classifier_array, train_loss_classifier)
    train_loss_objectness_array = np.append(train_loss_objectness_array, train_loss_objectness)

    # Save the model weights if validation loss has improved
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'faster_rcnn_voc_best.pth')
        print(f"Model weights saved. New best validation loss: {best_val_loss:.4f}")
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= patience:
        print(f"Early stopping after {patience} epochs without improvement.")
        break

# Final message
print("Training complete.")

Backbone frozen. Only the RPN and heads will be trained.


Epoch 1/3:  27%|‚ñà‚ñà‚ñã       | 709/2665 [01:48<04:58,  6.55it/s]


FileNotFoundError: [Errno 2] No such file or directory: '../datas/FullExpressions/CROHME2019_train_png/128_em_525.png'

## Visualise

In [None]:
# Load a single image for inference
image_path = full_train_dataset.image_dir + '/2007_000423.jpg'  # Replace with your image path
image = load_image(image_path)

model.load_state_dict(torch.load('faster_rcnn_voc_best.pth', weights_only=True))

print(image)
# Prepare the model for inference
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    # Prepare the image
    input_image_ = prepare_image(image, transform)
    input_image = input_image_.to(device)

    # Run inference
    predictions = model(input_image)

# Extract boxes, labels, and scores from predictions
boxes = predictions[0]['boxes']
labels = predictions[0]['labels']
scores = predictions[0]['scores']

boxes = boxes.to('cpu')
labels = labels.to('cpu')
scores = scores.to('cpu')

print(boxes)
print(labels)
print(scores)

# Visualize the results
visualize_predictions(input_image_[0], boxes, labels, scores)

In [2]:
# IoU et mAP qui viennent d'Object_Segmentation

def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
    """
    Calculates intersection over union

    Parameters:
        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
        boxes_labels (tensor): Correct Labels of Boxes (BATCH_SIZE, 4)
        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)

    Returns:
        tensor: Intersection over union for all examples
    """

    # Slicing idx:idx+1 in order to keep tensor dimensionality
    # Doing ... in indexing if there would be additional dimensions
    # Like for Yolo algorithm which would have (N, S, S, 4) in shape
    if box_format == "midpoint":
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

    elif box_format == "corners":
        box1_x1 = boxes_preds[..., 0:1]
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4]
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]

    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)

    # Need clamp(0) in case they do not intersect, then we want intersection to be 0
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))

    return intersection / (box1_area + box2_area - intersection + 1e-6)

def mean_average_precision(
    pred_boxes, true_boxes, iou_threshold=0.5, box_format="corners", num_classes=20
):
    """
    Calculates mean average precision

    Parameters:
        pred_boxes (list): list of lists containing all bboxes with each bboxes
        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
        true_boxes (list): Similar as pred_boxes except all the correct ones
        iou_threshold (float): threshold where predicted bboxes is correct
        box_format (str): "midpoint" or "corners" used to specify bboxes
        num_classes (int): number of classes

    Returns:
        float: mAP value across all classes given a specific IoU threshold
    """

    # list storing all AP for respective classes
    average_precisions = []

    # used for numerical stability later on
    epsilon = 1e-6

    for c in range(num_classes):
        detections = []
        ground_truths = []

        # Go through all predictions and targets,
        # and only add the ones that belong to the
        # current class c
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)

        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)

        # find the amount of bboxes for each training example
        # Counter here finds how many ground truth bboxes we get
        # for each training example, so let's say img 0 has 3,
        # img 1 has 5 then we will obtain a dictionary with:
        # amount_bboxes = {0:3, 1:5}
        amount_bboxes = Counter([gt[0] for gt in ground_truths])

        # We then go through each key, val in this dictionary
        # and convert to the following (w.r.t same example):
        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)

        # sort by box probabilities which is index 2
        detections.sort(key=lambda x: x[2], reverse=True)
        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)

        # If none exists for this class then we can safely skip
        if total_true_bboxes == 0:
            continue

        for detection_idx, detection in enumerate(detections):
            # Only take out the ground_truths that have the same
            # training idx as detection
            ground_truth_img = [
                bbox for bbox in ground_truths if bbox[0] == detection[0]
            ]

            num_gts = len(ground_truth_img)
            best_iou = 0

            for idx, gt in enumerate(ground_truth_img):
                iou = intersection_over_union(
                    torch.tensor(detection[3:]),
                    torch.tensor(gt[3:]),
                    box_format=box_format,
                )

                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx

            if best_iou > iou_threshold:
                # only detect ground truth detection once
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    # true positive and add this bounding box to seen
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    FP[detection_idx] = 1

            # if IOU is lower then the detection is a false positive
            else:
                FP[detection_idx] = 1

        TP_cumsum = torch.cumsum(TP, dim=0)
        FP_cumsum = torch.cumsum(FP, dim=0)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
        precisions = torch.cat((torch.tensor([1]), precisions))
        recalls = torch.cat((torch.tensor([0]), recalls))
        # torch.trapz for numerical integration
        average_precisions.append(torch.trapz(precisions, recalls))

    return sum(average_precisions) / len(average_precisions)

In [None]:
def get_predictions_and_ground_truths(model, val_loader, device):
    model.eval()
    pred_boxes = []  # To store predictions
    true_boxes = []  # To store ground truths

    with torch.no_grad():
        for images, targets in val_loader:
            images = [img.to(device) for img in images]
            outputs = model(images)

            for i, output in enumerate(outputs):
                # Get the predicted boxes, scores, and labels
                pred_boxes_list = output['boxes'].cpu().numpy()
                pred_scores_list = output['scores'].cpu().numpy()
                pred_labels_list = output['labels'].cpu().numpy()

                # Filter out predictions with low scores
                for j in range(len(pred_boxes_list)):
                    if pred_scores_list[j] >= 0.05:  # Score threshold
                        pred_boxes.append([
                            i,  # image index
                            pred_labels_list[j],
                            pred_scores_list[j],
                            *pred_boxes_list[j]
                        ])

                # Get ground truth boxes and labels
                gt_boxes = targets[i]['boxes'].cpu().numpy()
                gt_labels = targets[i]['labels'].cpu().numpy()

                for k in range(len(gt_boxes)):
                    true_boxes.append([
                        i,  # image index
                        gt_labels[k],
                        1.0,  # Assuming ground truth boxes have a score of 1.0
                        *gt_boxes[k]
                    ])

    return pred_boxes, true_boxes

In [None]:
pred_boxes, true_boxes = get_predictions_and_ground_truths(model, val_loader, device)
mAP = mean_average_precision(pred_boxes, true_boxes, iou_threshold=0.5, box_format="corners", num_classes=num_classes)
print(f"Mean Average Precision (mAP): {mAP:.4f}")

# YOLO v8

In [3]:
from ultralytics import YOLO

# Charger un mod√®le pr√©-entra√Æn√©
model = YOLO('yolov8s.pt')

# Entra√Æner
results = model.train(
    data='YOLO_dataset/data.yaml',
    epochs=10,
    imgsz=256,
    batch=16,
    name='math_symbols_detector'
)

results = model.val()

New https://pypi.org/project/ultralytics/8.3.235 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.234 üöÄ Python-3.11.5 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 3050 Ti Laptop GPU, 3769MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=YOLO_dataset/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=256, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, n

In [None]:
results = model.predict('YOLO_dataset/images/train/001-equation002.png', save=True, conf=0.5)
result = results[0]

boxes = result.boxes
print("Boxes (xyxy):", boxes.xyxy)
print("Boxes (xywhn):", boxes.xywhn)
print("Confidence:", boxes.conf)
print("Classes:", boxes.cls)

for i, box in enumerate(boxes):
    xyxy = box.xyxy[0].tolist()
    conf = box.conf[0].item() 
    cls_id = int(box.cls[0].item()) 
    cls_name = result.names[cls_id] 
    
    print(f"Box {i}: {cls_name} (conf: {conf:.2f}) -> [{xyxy[0]:.1f}, {xyxy[1]:.1f}, {xyxy[2]:.1f}, {xyxy[3]:.1f}]")

In [6]:
from ultralytics import YOLO

def evaluate_yolo_model_simple(model, data_yaml_path, imgsz=640, conf=0.25):
    metrics = model.val(
        data=data_yaml_path,
        imgsz=imgsz,
        conf=conf,
        split='val',
        plots=False 
    )
    

    results = {
        'mAP50': metrics.box.map50,       # mAP √† IoU=0.5
        'mAP50-95': metrics.box.map,      # mAP moyenne sur 0.5:0.95
        'precision': metrics.box.p,       # Pr√©cision globale
        'recall': metrics.box.r,          # Rappel global
        'fitness': metrics.box.fitness    # Score de fitness
    }
    
    print('=== R√âSULTATS D\'√âVALUATION SIMPLIFI√âE ===')
    print("mAP@0.5: ",results['mAP50'])
    print("mAP@0.5:0.95: ",results['mAP50-95'])
    print("Pr√©cision (P):",results['precision'])
    print("Rappel (R):",results['recall'])
    
    return results


data_config = 'YOLO_dataset/data.yaml' 

metrics = evaluate_yolo_model_simple(model, data_config, conf=0.25)

Ultralytics 8.3.234 üöÄ Python-3.11.5 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 3050 Ti Laptop GPU, 3769MiB)
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 35.1¬±23.3 MB/s, size: 1.8 KB)
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 35.1¬±23.3 MB/s, size: 1.8 KB)
[K[34m[1mval: [0mScanning /home/raclax/Documents/M2/Part2/DL2/Project/DL_project_marie_clara/YOLO_dataset/labels/val.cache... 1999 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1999/1999 1.8Mit/s 0.0s0s
[K[34m[1mval: [0mScanning /home/raclax/Documents/M2/Part2/DL2/Project/DL_project_marie_clara/YOLO_dataset/labels/val.cache... 1999 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1999/1999 1.8Mit/s 0.0s0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 125/125 7.4it/s 16.9s0.2s
[K                 Class     Images  Instances      Box(P

# RTDETR

In [None]:
from ultralytics import RTDETR

model = RTDETR('rtdetr-l.pt')

results = model.train(
    data='YOLO_dataset/data.yaml',
    epochs=5,
    imgsz=640,
    batch=4,
    name='math_rtdetr'
)

metrics = model.val()

predictions = model.predict('YOLO_dataset/images/001-equation001.png', conf=0.5)

In [None]:
results = model.predict('YOLO_dataset/images/train/001-equation002.png', save=True, conf=0.5)
# Acc√©der au premier r√©sultat
result = results[0]

# ===== BOUNDING BOXES =====
boxes = result.boxes

print("Boxes (xyxy):", boxes.xyxy)
print("Boxes (xywhn):", boxes.xywhn)

# Confiance de chaque pr√©diction
print("Confidence:", boxes.conf)

print("Classes:", boxes.cls)

# ===== D√âTAILS COMPLETS =====
for i, box in enumerate(boxes):
    xyxy = box.xyxy[0].tolist()
    conf = box.conf[0].item() 
    cls_id = int(box.cls[0].item()) 
    cls_name = result.names[cls_id] 
    
    print(f"Box {i}: {cls_name} (conf: {conf:.2f}) -> [{xyxy[0]:.1f}, {xyxy[1]:.1f}, {xyxy[2]:.1f}, {xyxy[3]:.1f}]")

In [8]:
from ultralytics import RTDETR
model = RTDETR('rtdetr-l.pt') 

data_config = 'YOLO_dataset/data.yaml' 

metrics = evaluate_yolo_model_simple(model, data_config, conf=0.25)

Ultralytics 8.3.234 üöÄ Python-3.11.5 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 3050 Ti Laptop GPU, 3769MiB)
rt-detr-l summary: 294 layers, 32,148,140 parameters, 0 gradients, 103.8 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 97.2¬±40.9 MB/s, size: 2.5 KB)
[K[34m[1mval: [0mScanning /home/raclax/Documents/M2/Part2/DL2/Project/DL_project_marie_clara/YOLO_dataset/labels/val.cache... 1999 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1999/1999 1.9Mit/s 0.0s0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 125/125 1.7it/s 1:140.6sss
                   all       1999      19394      0.119    0.00122     0.0605      0.024
                person       1183       5242      0.107    0.00229     0.0543     0.0177
               bicycle       1087       4518          0          0          0          0
                   car       1154       2721

# Pix2text : bonus

In [None]:
from pix2text import Pix2Text

# Initialize Pix2Text: try GPU first, fall back to CPU if ONNX Runtime doesn't expose CUDAExecutionProvider
try:
    p2t = Pix2Text.from_config(device='cuda')  # attempt GPU/ONNXRuntime CUDAExecutionProvider
    print('Pix2Text initialized on CUDA device')
except Exception as e:
    # Common failure: onnxruntime not built with GPU support -> ValueError about CUDAExecutionProvider
    print('GPU initialization failed (will fall back to CPU):', e)
    print('Initializing Pix2Text on CPU...')
    p2t = Pix2Text.from_config(device='cpu')
    print('Pix2Text initialized on CPU')

img_path = '../datas/FullExpressions/CROHME2019_train_png/001-equation000.png'
try:
    res = p2t.recognize_formula(img_path, return_text=True)
    print(res)
except Exception as e:
    print('Error running recognition:', e)