Version de Python: au moins 3.9 pour rfdetr

In [2]:
import os
import urllib.request
from urllib.parse import urlparse
from tqdm import tqdm
import csv
from downloader import download_all_images
import tempfile
import json
from PIL import Image

In [None]:
CSV_FOLDER = 'csv_folder'
CSV_FILENAME = {
    "train": "oidv6-train-annotations-bbox.csv",
    "valid": "validation-annotations-bbox.csv",
    "test": "test-annotations-bbox.csv",
    "class": "oidv7-class-descriptions-boxable.csv"
}
TARGET_CLASSES = ["wheelchair", "bicycle"]
DATA_TYPE = ['train', 'valid', 'test']

In [4]:
def download_file(url, download_dir):
    """
    Download a file from the given URL into the specified directory,
    displaying a progress bar during the download.

    The filename is extracted from the URL's path.
    Skips download if the file already exists.
    """
    # Ensure the target directory exists
    os.makedirs(download_dir, exist_ok=True)

    # Extract the filename from the URL
    filename = os.path.basename(urlparse(url).path)
    if not filename:
        raise ValueError(f"Could not extract filename from URL: {url}")
    
    file_path = os.path.join(download_dir, filename)

    # Skip downloading if file already exists
    if os.path.exists(file_path):
        print(f"[SKIP] {file_path} already exists.")
        return

    try:
        print(f"[DOWNLOAD] {file_path} from {url}")

        # Open the URL for reading the file content
        with urllib.request.urlopen(url) as response:
            # Attempt to get the total size for progress tracking
            total_size = int(response.getheader('Content-Length', 0))
            chunk_size = 1024  # Download in 1KB chunks

            # Open the target file for writing and show a progress bar
            with open(file_path, 'wb') as out_file, tqdm(
                total=total_size,
                unit='B',
                unit_scale=True,
                desc=filename,
                leave=False
            ) as pbar:
                # Read and write the file chunk by chunk
                while True:
                    chunk = response.read(chunk_size)
                    if not chunk:
                        break
                    out_file.write(chunk)
                    pbar.update(len(chunk))

        print(f"[DONE] {file_path}")

    except Exception as e:
        # Raise a clear error if download fails
        raise RuntimeError(f"Failed to download {filename} from {url}: {e}")

def download_from_manifest(manifest_path, download_dir="."):
    """
    Download multiple files listed in a manifest file.

    Each line of the manifest should contain a URL (one per line).
    Lines starting with '#' are ignored (treated as comments).
    Files are saved into the specified download directory.
    """
    # Check that the manifest file exists
    if not os.path.isfile(manifest_path):
        raise FileNotFoundError(f"Manifest file not found: {manifest_path}")

    # Read and clean all non-comment lines from the manifest
    with open(manifest_path, "r") as f:
        urls = [
            line.strip() for line in f
            if line.strip() and not line.strip().startswith("#")
        ]

    print(f"[INFO] Found {len(urls)} files to download.\n")

    # Download each file listed in the manifest
    for url in urls:
        download_file(url, download_dir)

In [5]:
download_from_manifest("csv_manifest.txt", download_dir=CSV_FOLDER)

[INFO] Found 4 files to download.

[SKIP] csv_folder/oidv6-train-annotations-bbox.csv already exists.
[SKIP] csv_folder/validation-annotations-bbox.csv already exists.
[SKIP] csv_folder/test-annotations-bbox.csv already exists.
[SKIP] csv_folder/oidv7-class-descriptions-boxable.csv already exists.


In [5]:
class_description_csv = os.path.join(CSV_FOLDER, CSV_FILENAME['class'])
class_description_csv

'csv_folder/oidv7-class-descriptions-boxable.csv'

In [6]:
def get_label_names_from_display_names(csv_path, display_names):
    """
    Given a list of display names and a CSV file with 'LabelName' and 'DisplayName' columns,
    return a list of matching LabelNames. Returns None for names not found.

    Matching is case-insensitive and ignores spaces/underscores.
    """
    if len(display_names) != len(set(display_names)):
        raise ValueError("List display_names must be unique.")
    
    def normalize(name):
        return name.strip().lower().replace(" ", "").replace("_", "")

    # Build lookup map from normalized display names to original inputs
    target_names = {normalize(name): name for name in display_names}
    results = {name: None for name in display_names}

    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            norm_display = normalize(row["DisplayName"])
            if norm_display in target_names:
                original = target_names[norm_display]
                results[original] = row["LabelName"]

    # Preserve input order in output
    return [results[name] for name in display_names]

In [8]:
DOWNLOAD_SCRIPT_URL = "https://raw.githubusercontent.com/openimages/dataset/master/downloader.py"
download_file(DOWNLOAD_SCRIPT_URL, '.')

[SKIP] ./downloader.py already exists.


In [9]:
# Categories
labels = get_label_names_from_display_names(class_description_csv, TARGET_CLASSES) 
labels

['/m/0qmmr', '/m/0199g']

In [10]:
categories = [
    {'id':i, "name": TARGET_CLASSES[i], "supercategory": "none"}
    for i in range(len(labels))
]
categories

[{'id': 0, 'name': 'wheelchair', 'supercategory': 'none'},
 {'id': 1, 'name': 'bicycle', 'supercategory': 'none'}]

In [None]:
def extract_OIDv7_data(csv_path, labels):
    """
    Returns --- TO DOOOOOOOOOOOOOOOOOOOOO dire que c'est minimal, juste ce qu'il faut

    Parameters:
    - csv_path: str, path to the CSV file.
    - target_labels: list of str, LabelNames to filter for.

    Returns:
    - List of unique ImageIDs (str).
    """

    annotations = []
    image_IDs = {} # use of a dict because 'in' is faster on a dict than a list
    xyxyn = []
    
    # Count lines for progress bar total
    with open(csv_path, 'r') as f:
        total_lines = sum(1 for _ in f) - 1  # minus 1 for the header

    with open(csv_path, newline='') as csvfile:
        
        reader = csv.DictReader(csvfile)
        filename = os.path.basename(urlparse(csv_path).path)
        
        for row in tqdm(reader, total=total_lines, desc=f"Processing {filename}"):
            
            if row['LabelName'] in labels:                    
                imageID = row['ImageID']
                
                if imageID not in image_IDs:
                    image_IDs[imageID] = len(image_IDs)
                                    
                annotations.append(
                    {
                        "image_id": image_IDs[imageID],
                        "category_id": labels.index(row['LabelName'])
                    }
                )
                
                xyxyn.append([float(row['XMin']), float(row['YMin']), float(row['XMax']), float(row['YMax'])])

    return list(image_IDs), annotations, xyxyn

In [12]:
def get_image_dimensions(folder_path):
    image_info = {}

    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".jpg"):
            filepath = os.path.join(folder_path, filename)
            with Image.open(filepath) as img:
                width, height = img.size
                key = os.path.splitext(filename)[0]  # Remove .jpg
                image_info[key] = {
                    "width": width,
                    "height": height
                }

    return image_info

In [13]:
def xyxyn_to_xywh(xyxyn, image_width, image_height):
    x_min, y_min, x_max, y_max = xyxyn

    x = x_min * image_width
    y = y_min * image_height
    width = (x_max - x_min) * image_width
    height = (y_max - y_min) * image_height

    return [x, y, width, height]

In [None]:
for data_type in DATA_TYPE:

    annotation_csv = os.path.join(CSV_FOLDER, CSV_FILENAME[data_type])
    
    image_IDs, annotations, xyxyn = extract_OIDv7_data(annotation_csv, labels)
    
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.txt') as temp_file:
        
        for item in tqdm(image_IDs, desc="Writing to file"):
            file_path = f"./dataset/{data_type}/{item}" + ".jpg"
            if not os.path.exists(file_path):
                temp_file.write(f"{file_path}\n")
        temp_file.flush()
        
        download_folder = f'./dataset/{data_type}'

        args = {
        'download_folder': download_folder,
        'image_list': temp_file.name,
        'num_processes': 5
        }
        download_all_images(args)
        
    dimensions_dict = get_image_dimensions(download_folder)
    
    images = [{
        "id":i,
        "file_name":image+'.jpg',
        "height":dimensions_dict[image]["height"],
        "width":dimensions_dict[image]["width"]}
    for i, image in enumerate(image_IDs)
    ]

    annotations = [{**annotation,
        "bbox":xyxyn_to_xywh(xyxyn[i],
                            images[annotations[i]['image_id']]['width'],
                            images[annotations[i]['image_id']]['height']),
        "id":i,
        "area": images[annotations[i]['image_id']]['width']*images[annotations[i]['image_id']]['height']
        } for i, annotation in enumerate(annotations)
    ]
    
    data = {
        "categories": categories,
        "images": images,
        "annotations": annotations 
    }

    with open(os.path.join(download_folder, "_annotations.coco.json"), "w") as f:
        json.dump(data, f, indent=4)    

Processing oidv6-train-annotations-bbox.csv: 100%|██████████| 14610229/14610229 [01:05<00:00, 222086.57it/s]
Writing to file: 100%|██████████| 18491/18491 [00:00<00:00, 295909.05it/s]


total in read_image_list_file = 0


Downloading images: 0it [00:00, ?it/s]
Processing validation-annotations-bbox.csv: 100%|██████████| 303980/303980 [00:00<00:00, 324867.70it/s]
Writing to file: 100%|██████████| 296/296 [00:00<00:00, 165051.05it/s]


total in read_image_list_file = 0


Downloading images: 0it [00:00, ?it/s]
Processing test-annotations-bbox.csv: 100%|██████████| 937327/937327 [00:02<00:00, 320277.39it/s]
Writing to file: 100%|██████████| 851/851 [00:00<00:00, 255373.31it/s]


total in read_image_list_file = 0


Downloading images: 0it [00:00, ?it/s]


In [15]:
from rfdetr import RFDETRBase

model = RFDETRBase()

Loading pretrain weights


In [16]:
import torch

def auto_adjust_batch_params(target_total_batch=16):
    if not torch.cuda.is_available():
        print("No GPU available. Using CPU.")
        return 1, target_total_batch  # batch_size=1, accumulate more steps

    device = torch.device("cuda")
    props = torch.cuda.get_device_properties(device)
    total_vram_gb = props.total_memory / (1024**3)

    # Example logic (adjust as needed):
    if total_vram_gb >= 40:  # A100, etc.
        return target_total_batch, 1
    elif total_vram_gb >= 16:  # e.g. RTX 3080, V100
        return 8, target_total_batch // 8
    elif total_vram_gb >= 8:  # e.g. T4
        return 4, target_total_batch // 4
    else:
        return 2, target_total_batch // 2

# Example usage
batch_size, grad_accum_steps = auto_adjust_batch_params()
print(f"Using batch_size={batch_size}, grad_accum_steps={grad_accum_steps}")

Using batch_size=2, grad_accum_steps=8


In [None]:
dataset = "./dataset"
model.train(dataset_dir=dataset,
            epochs=1,
            batch_size=1,
            grad_accum_steps=1500,
            lr=1e-4,
            output_dir="results")

num_classes mismatch: model has 90 classes, but your dataset has 2 classes
reinitializing your detection head with 2 classes.
fatal: not a git repository (or any of the parent directories): .git


Unable to initialize TensorBoard. Logging is turned off for this session.  Run 'pip install tensorboard' to enable logging.
Not using distributed mode
git:
  sha: N/A, status: clean, branch: N/A

Namespace(num_classes=2, grad_accum_steps=1500, amp=True, lr=0.0001, lr_encoder=0.00015, batch_size=1, weight_decay=0.0001, epochs=1, lr_drop=100, clip_max_norm=0.1, lr_vit_layer_decay=0.8, lr_component_decay=0.7, do_benchmark=False, dropout=0, drop_path=0.0, drop_mode='standard', drop_schedule='constant', cutoff_epoch=0, pretrained_encoder=None, pretrain_weights='rf-detr-base.pth', pretrain_exclude_keys=None, pretrain_keys_modify_to_load=None, pretrained_distiller=None, encoder='dinov2_windowed_small', vit_encoder_num_layers=12, window_block_indexes=None, position_embedding='sine', out_feature_indexes=[2, 5, 8, 11], freeze_encoder=False, layer_norm=True, rms_norm=False, backbone_lora=False, force_no_pretrain=False, dec_layers=3, dim_feedforward=2048, hidden_dim=256, sa_nheads=8, ca_nheads=16,