In [2]:
import os
from PIL import Image
from tqdm import tqdm
from src.pipeline import img_pipeline, img_pipeline_multi
from pathlib import Path

In [3]:
# TDOD: convertir aussi les yahoo red ballz

def convert(source_folder: Path, output_folder: Path):

    # Créer le dossier de sortie s'il n'existe pas
    os.makedirs(output_folder, exist_ok=True)

    # Extensions d'images supportées
    image_extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp', '.gif', '.tiff', '.svg'}

    # Parcourir tous les fichiers du dossier source
    converted_count = 0
    skipped_count = 0
    error_count = 0

    for filename in os.listdir(source_folder):
        file_path = os.path.join(source_folder, filename)
        
        # Ignorer les fichiers Zone.Identifier et autres non-images
        if ':Zone.Identifier' in filename or not os.path.isfile(file_path):
            continue
        
        # Vérifier l'extension
        file_ext = Path(filename).suffix.lower()
        if file_ext not in image_extensions:
            continue
        
        try:
            # Nom de sortie (remplacer l'extension par .jpg)
            output_filename = Path(filename).stem + '.jpg'
            output_path = os.path.join(output_folder, output_filename)
            
            # Charger l'image
            with Image.open(file_path) as img:
                # Convertir en RGB si nécessaire (pour PNG avec transparence, etc.)
                if img.mode in ('RGBA', 'LA', 'P'):
                    # Créer un fond blanc pour les images avec transparence
                    rgb_img = Image.new('RGB', img.size, (255, 255, 255))
                    if img.mode == 'P':
                        img = img.convert('RGBA')
                    rgb_img.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
                    img = rgb_img
                elif img.mode != 'RGB':
                    img = img.convert('RGB')
                
                # Sauvegarder en JPG
                img.save(output_path, 'JPEG', quality=95)
                converted_count += 1
                print(f"✓ Converti: {filename} -> {output_filename}")
        
        except Exception as e:
            error_count += 1
            print(f"✗ Erreur avec {filename}: {str(e)}")

    print(f"\n{'='*60}")
    print(f"Conversion terminée!")
    print(f"Images converties: {converted_count}")
    print(f"Erreurs: {error_count}")
    print(f"Dossier de sortie: {output_folder}")
    print(f"{'='*60}")

In [4]:
# trashcans
# input = Path("/home/tonino/projects/ball segmentation/google_lens_trashcan_dl")
# output = Path("/home/tonino/projects/ball segmentation/google_lens_trashcan_dl_jpg")

# red balls x people
input = Path("datasets/raw/red balls human yahoo")
output = Path("datasets/cleaned/red_balls_human_yahoo_jpg")
convert(input, output)

✓ Converti: OIP.0tBLA2zL5r8Ykby-lNstYAHaGX.jpg -> OIP.0tBLA2zL5r8Ykby-lNstYAHaGX.jpg
✓ Converti: OIP.-aIotQzqTP9or_tFEVukdAHaHa.jpg -> OIP.-aIotQzqTP9or_tFEVukdAHaHa.jpg
✓ Converti: OIP.W5KRxio8CWB0X84gBzc-8QHaEK.jpg -> OIP.W5KRxio8CWB0X84gBzc-8QHaEK.jpg
✓ Converti: OIP.xdj0UVjPvKOsV0KcamL_swHaEK.jpg -> OIP.xdj0UVjPvKOsV0KcamL_swHaEK.jpg
✓ Converti: OIP.rCs6Tc431qqlitgfC0X7NwHaE7.jpg -> OIP.rCs6Tc431qqlitgfC0X7NwHaE7.jpg
✓ Converti: OIP.l2Zc8l9ix7AqIZ6LoCrNsQHaEK.jpg -> OIP.l2Zc8l9ix7AqIZ6LoCrNsQHaEK.jpg
✓ Converti: OIP.GLiX0s4UTeA_e8DVqqi5-QHaE8.jpg -> OIP.GLiX0s4UTeA_e8DVqqi5-QHaE8.jpg
✓ Converti: OIP.4wrt9ST-MYWdSNQ9ZKMZIAHaE7.jpg -> OIP.4wrt9ST-MYWdSNQ9ZKMZIAHaE7.jpg
✓ Converti: OIP.rlIvaxRUpQ57ba81HDdUcwHaHa.jpg -> OIP.rlIvaxRUpQ57ba81HDdUcwHaHa.jpg
✓ Converti: OIP.FprI0P1Vet6dd-5rK-Z73wHaHa.jpg -> OIP.FprI0P1Vet6dd-5rK-Z73wHaHa.jpg
✓ Converti: OIP.PqrG64tfQApSUKKZnLIdowHaEJ.jpg -> OIP.PqrG64tfQApSUKKZnLIdowHaEJ.jpg
✓ Converti: OIP.9kUBeXvTFuw34QUOHXgBWQHaFj.jpg -> OIP.9kUBeXvTFuw

In [2]:
img_paths = [*Path("tom_trashcans").glob("*"), *Path("google_lens_trashcan_dl_jpg").glob("*")]

In [8]:
from src.detection import YOLODetector
from src.segmentation import SAMSegmenter
yolo_detector = YOLODetector("yolo11n.pt", conf=0.25)
segmenter = SAMSegmenter()

In [18]:
(DET_OUTPUT := Path("auto_trashcan_det")).mkdir(exist_ok=True)
(SEG_OUTPUT := Path("auto_trashcan_seg")).mkdir(exist_ok=True)
(TEXT_OUTPUT := Path("auto_trashcan_label")).mkdir(exist_ok=True)

for img_path in tqdm(img_paths):
    img_pipeline(
        img_path,
        detect_fn=yolo_detector.detect,
        segment_fn=segmenter.segment_bbox, # Use segment_bbox for mode="bbox"
        det_output_dir=DET_OUTPUT, # / "tmp",
        seg_output_dir=SEG_OUTPUT, # / "tmp",
        txt_output_dir=TEXT_OUTPUT, # / "tmp",
        mode="bbox"
    )


100%|██████████| 58/58 [2:52:22<00:00, 178.32s/it]    


In [19]:
# TODO: exporter les images de bases dans un dossier, puis filtrer les lablels sur les images bien segmentées