In [1]:
!pip install fsspec==2025.3.2



In [2]:
!pip install -q transformers datasets torchvision pycocotools

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m78.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m75.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# Dataset

In [3]:
!unzip -q /content/data.zip -d /content/custom_data_raw

In [4]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Répertoire d'origine pour les images et annotations
raw_data_path_images = "/content/custom_data_raw/images"  # Images sont dans ce dossier
raw_data_path_labels = "/content/custom_data_raw/obj_train_data"  # Annotations dans ce dossier

# Obtenir toutes les images (jpg/png) présentes dans le dossier "images"
images = [f for f in os.listdir(raw_data_path_images) if f.endswith(('.jpg', '.png'))]

# Split train/val (80% train, 20% val)
train_imgs, val_imgs = train_test_split(images, test_size=0.2, random_state=42)

# Création des dossiers cible pour images et labels
base_path = "/content/custom_data"


os.makedirs(base_path + "/images/train", exist_ok=True)
os.makedirs(base_path + "/images/val", exist_ok=True)
os.makedirs(base_path + "/labels/train", exist_ok=True)
os.makedirs(base_path + "/labels/val", exist_ok=True)

def move_data(image_list, split):
    for img_name in image_list:
        # Déplacer l'image vers le bon dossier
        src_img = os.path.join(raw_data_path_images, img_name)
        dst_img = os.path.join(base_path, f"images/{split}", img_name)
        shutil.copy(src_img, dst_img)

        # Vérifier et déplacer l'annotation .txt correspondante
        txt_name = img_name.rsplit('.', 1)[0] + '.txt'
        txt_src = os.path.join(raw_data_path_labels, txt_name)
        txt_dst = os.path.join(base_path, f"labels/{split}", txt_name)

        if os.path.exists(txt_src):
            shutil.copy(txt_src, txt_dst)
        else:
            print(f"Pas d'annotation pour {img_name}")

# Appliquer le déplacement aux deux splits : train et val
move_data(train_imgs, "train")
move_data(val_imgs, "val")


In [5]:
import os
import cv2
import json
from tqdm import tqdm

tile_size = 320
overlap = 0
visualize = True  # Active la visualisation des tiles avec boxes
splits = ["train", "val"]

input_root = "/content/custom_data"
output_root = "/content/split_custom_data"
visu_dir = "/content/visu_tiles"

categories = []

def yolo_to_bbox(x_center, y_center, w, h, img_w, img_h):
    x1 = (x_center - w / 2) * img_w
    y1 = (y_center - h / 2) * img_h
    w *= img_w
    h *= img_h
    return x1, y1, w, h

for split in splits:
    image_id = 0
    annotation_id = 0

    input_img_dir = os.path.join(input_root, f"images/{split}")
    input_lbl_dir = os.path.join(input_root, f"labels/{split}")
    output_img_dir = os.path.join(output_root, f"{split}/images")
    output_ann_path = os.path.join(output_root, f"{split}/instances_{split}.json")
    output_visu_dir = os.path.join(visu_dir, split)
    os.makedirs(output_img_dir, exist_ok=True)
    if visualize:
        os.makedirs(output_visu_dir, exist_ok=True)

    coco_dict = {
        "images": [],
        "annotations": [],
        "categories": [],
    }

    label_set = set()

    for filename in tqdm(os.listdir(input_img_dir), desc=f"{split}"):
        if not filename.lower().endswith(('.jpg', '.png')):
            continue

        basename = os.path.splitext(filename)[0]
        img_path = os.path.join(input_img_dir, filename)
        label_path = os.path.join(input_lbl_dir, f"{basename}.txt")

        img = cv2.imread(img_path)
        if img is None:
            continue
        img_h, img_w = img.shape[:2]

        annots = []
        if os.path.exists(label_path):
            with open(label_path, "r") as f:
                for line in f.readlines():
                    parts = line.strip().split()
                    if len(parts) == 5:
                        cls, xc, yc, w, h = map(float, parts)
                        annots.append((int(cls), *yolo_to_bbox(xc, yc, w, h, img_w, img_h)))
                        label_set.add(int(cls))

        step = tile_size - overlap
        tile_id = 0

        for y in range(0, img_h, step):
            for x in range(0, img_w, step):
                tile = img[y:y+tile_size, x:x+tile_size]
                th, tw = tile.shape[:2]
                if th < tile_size or tw < tile_size:
                    continue

                tile_fname = f"{basename}_{tile_id}.jpg"
                tile_path = os.path.join(output_img_dir, tile_fname)
                cv2.imwrite(tile_path, tile)

                tile_annot = tile.copy()

                coco_dict["images"].append({
                    "id": image_id,
                    "width": tile_size,
                    "height": tile_size,
                    "file_name": tile_fname,
                })

                for cls, x1, y1, w, h in annots:
                    x2 = x1 + w
                    y2 = y1 + h

                    if x1 >= x + tile_size or x2 <= x or y1 >= y + tile_size or y2 <= y:
                        continue

                    box_x1 = max(0, x1 - x)
                    box_y1 = max(0, y1 - y)
                    box_x2 = min(tile_size, x2 - x)
                    box_y2 = min(tile_size, y2 - y)

                    box_w = box_x2 - box_x1
                    box_h = box_y2 - box_y1

                    if box_w < 1 or box_h < 1:
                        continue

                    coco_dict["annotations"].append({
                        "id": annotation_id,
                        "image_id": image_id,
                        "category_id": cls,
                        "bbox": [box_x1, box_y1, box_w, box_h],
                        "area": box_w * box_h,
                        "iscrowd": 0,
                    })
                    annotation_id += 1

                    if visualize:
                        cv2.rectangle(tile_annot, (int(box_x1), int(box_y1)), (int(box_x2), int(box_y2)), (0, 255, 0), 1)
                        cv2.putText(tile_annot, str(cls), (int(box_x1), int(box_y1) - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)

                if visualize:
                    vis_tile_path = os.path.join(output_visu_dir, tile_fname)
                    cv2.imwrite(vis_tile_path, tile_annot)

                image_id += 1
                tile_id += 1

    coco_dict["categories"] = [
        {"id": cls, "name": str(cls), "supercategory": "none"}
        for cls in sorted(label_set)
    ]

    with open(output_ann_path, "w") as f:
        json.dump(coco_dict, f, indent=2)

    print(f"✅ COCO json sauvegardé dans : {output_ann_path}")


train: 100%|██████████| 28/28 [00:08<00:00,  3.41it/s]


✅ COCO json sauvegardé dans : /content/split_custom_data/train/instances_train.json


val: 100%|██████████| 7/7 [00:01<00:00,  4.13it/s]

✅ COCO json sauvegardé dans : /content/split_custom_data/val/instances_val.json





# Import

In [6]:
import torch
import time
import numpy as np
from transformers import AutoImageProcessor, DeformableDetrForObjectDetection
from datasets import load_dataset
from PIL import Image
from tqdm import tqdm
import os
import psutil

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

# Config

In [7]:
!unzip -q /content/output.zip -d /content/output

In [8]:
MODEL_DIR = "/content/output/output/final_model"
PROCESSOR_DIR = "/content/output/output/final_processor"
ANNOTATION_FILE = "/content/split_custom_data/val/instances_val.json"
IMAGE_DIR = "/content/split_custom_data/val/images/"
CONF_THRESHOLD = 0.8
CLASS_ID = 0  # classe "olive"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Loading

In [9]:
model = DeformableDetrForObjectDetection.from_pretrained(MODEL_DIR).to(DEVICE)
processor = AutoImageProcessor.from_pretrained(PROCESSOR_DIR)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


# Metrics

In [10]:
coco_gt = COCO(ANNOTATION_FILE)
results = []
inference_times = []

print("⚙️ Évaluation en cours...")

for img_id in tqdm(coco_gt.imgs):
    img_info = coco_gt.loadImgs(img_id)[0]
    image_path = os.path.join(IMAGE_DIR, img_info['file_name'])
    image = Image.open(image_path).convert("RGB")

    inputs = processor(images=image, return_tensors="pt").to(DEVICE)

    start_time = time.time()
    with torch.no_grad():
        outputs = model(**inputs)
    inference_times.append(time.time() - start_time)

    target_sizes = torch.tensor([image.size[::-1]]).to(DEVICE)
    results_per_image = processor.post_process_object_detection(outputs, threshold=CONF_THRESHOLD, target_sizes=target_sizes)[0]


    boxes = results_per_image["boxes"].cpu().numpy()
    scores = results_per_image["scores"].cpu().numpy()
    labels = results_per_image["labels"].cpu().numpy()

    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box
        width = x2 - x1
        height = y2 - y1
        results.append({
            "image_id": img_id,
            "category_id": int(label),
            "bbox": [x1, y1, width, height],
            "score": float(score)
        })

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
⚙️ Évaluation en cours...


  0%|          | 0/524 [00:03<?, ?it/s]


KeyboardInterrupt: 

# Version "Fix"

In [13]:
coco_gt = COCO(ANNOTATION_FILE)
results = []
inference_times = []

print("⚙️ Évaluation en cours...")

for img_id in tqdm(coco_gt.imgs):
    img_info = coco_gt.loadImgs(img_id)[0]
    image_path = os.path.join(IMAGE_DIR, img_info['file_name'])
    image = Image.open(image_path).convert("RGB")

    inputs = processor(images=image, return_tensors="pt").to(DEVICE)

    start_time = time.time()
    with torch.no_grad():
        outputs = model(**inputs)
    inference_times.append(time.time() - start_time)

    target_sizes = torch.tensor([image.size[::-1]]).to(DEVICE)
    results_per_image = processor.post_process_object_detection(outputs, threshold=CONF_THRESHOLD, target_sizes=target_sizes)[0]


    boxes = results_per_image["boxes"].cpu().numpy()
    scores = results_per_image["scores"].cpu().numpy()
    labels = results_per_image["labels"].cpu().numpy()

    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box

        # === Translation : coin inférieur droit devient nouveau centre ===
        cx = x2
        cy = y2
        dx = cx - (x1 + x2) / 2
        dy = cy - (y1 + y2) / 2

        x1_new = x1 + dx
        y1_new = y1 + dy
        x2_new = x2 + dx
        y2_new = y2 + dy

        # Clamp dans les limites de l’image (image.size = (W, H))
        W, H = image.size
        x1_new = max(0, min(W, x1_new))
        y1_new = max(0, min(H, y1_new))
        x2_new = max(0, min(W, x2_new))
        y2_new = max(0, min(H, y2_new))

        # bbox format COCO : [x, y, width, height]
        x = x1_new
        y = y1_new
        w = x2_new - x1_new
        h = y2_new - y1_new

        results.append({
            "image_id": img_id,
            "category_id": int(label),
            "bbox": [x, y, w, h],
            "score": float(score)
        })


loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
⚙️ Évaluation en cours...


100%|██████████| 524/524 [2:13:17<00:00, 15.26s/it]


# Save

In [None]:
import json
with open("detections.json", "w") as f:
    json.dump(results, f)

TypeError: Object of type float32 is not JSON serializable

In [14]:
import json
import numpy as np

with open("detections.json", "w") as f:
    # Convert NumPy float32 values to native Python floats before dumping
    json.dump(results, f, default=lambda x: float(x) if isinstance(x, np.float32) else x)

In [15]:
# COCO EVAL
coco_dt = coco_gt.loadRes("detections.json")
coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.30s).
Accumulating evaluation results...
DONE (t=0.05s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.209
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.317
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.240
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.167
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.267
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.089
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.286
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.291
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=1

# Print

In [16]:
precision = coco_eval.eval['precision'][0, :, CLASS_ID, 0, 2]  # IoU=0.5, area=all, maxDets=100
recall = coco_eval.eval['recall'][:, CLASS_ID, 0, 2]           # area=all, maxDets=100

avg_precision = np.mean(precision[precision > -1])
avg_recall = np.mean(recall[recall > -1])
f1_score = 2 * avg_precision * avg_recall / (avg_precision + avg_recall + 1e-8)

print(f"\n🔎 Métriques classe {CLASS_ID} :")
print(f" - Précision moyenne : {avg_precision:.3f}")
print(f" - Rappel moyen     : {avg_recall:.3f}")
print(f" - F1-score         : {f1_score:.3f}")


avg_inference_time = np.mean(inference_times)
print(f"\n⏱️ Temps d’inférence moyen par image : {avg_inference_time:.3f} sec")


if torch.cuda.is_available():
    print(f"📊 Mémoire GPU utilisée : {torch.cuda.max_memory_allocated() / (1024**2):.2f} MB")
else:
    print(f"📊 Mémoire RAM utilisée : {psutil.Process().memory_info().rss / (1024**2):.2f} MB")


🔎 Métriques classe 0 :
 - Précision moyenne : 0.634
 - Rappel moyen     : 0.583
 - F1-score         : 0.607

⏱️ Temps d’inférence moyen par image : 15.220 sec
📊 Mémoire RAM utilisée : 2204.45 MB
