In [1]:
from pathlib import Path
import shutil, yaml

# Percorso del dataset (radice che contiene train/, valid/, test/)
DATASET_DIR = Path("../../datasets/roboflow_2")
# Se vuoi salvare le etichette rimappate altrove, imposta NEW_LABEL_DIR
NEW_LABEL_DIR = None  # es. Path("../../datasets/roboflow_2_remapped")

SUBSETS = ["train", "valid", "test"]
LABELS_SUBDIR = "labels"      # Roboflow salva di default le label qui
IMAGES_SUBDIR = "images"      # cartella immagini associata

In [None]:
ORIGINAL_CLASSES = [
    "Aerosols","Aluminum can","Aluminum caps","Cardboard","Cellulose","Ceramic",
    "Combined plastic","Container for household chemicals","Disposable tableware",
    "Electronics","Foil","Furniture","Glass bottle","Iron utensils","Liquid",
    "Metal shavings","Milk bottle","Organic","Paper","Paper bag","Paper cups",
    "Paper shavings","Papier mache","Plastic bag","Plastic bottle","Plastic can",
    "Plastic canister","Plastic caps","Plastic cup","Plastic shaker",
    "Plastic shavings","Plastic toys","Postal packaging","Printing industry",
    "Scrap metal","Stretch film","Tetra pack","Textile","Tin","Unknown plastic",
    "Wood","Zip plastic bag"
]

NEW_CLASSES = ["glass","metal","organic","paper","plastic"]

In [None]:
# define remapping from original to new classes
REMAPPING = {
    # ----- metallo -----
    "Aerosols": "metal",
    "Aluminum can": "metal",
    "Aluminum caps": "metal",
    "Foil": "metal",
    "Iron utensils": "metal",
    "Metal shavings": "metal",
    "Scrap metal": "metal",
    "Tin": "metal",
    # ----- vetro -----
    "Glass bottle": "glass",
    "Ceramic": "glass",               # Cambia se vuoi trattarla diversamente
    # ----- plastica -----
    "Combined plastic": "plastic",
    "Container for household chemicals": "plastic",
    "Disposable tableware": "plastic",
    "Plastic bag": "plastic",
    "Plastic bottle": "plastic",
    "Plastic can": "plastic",
    "Plastic canister": "plastic",
    "Plastic caps": "plastic",
    "Plastic cup": "plastic",
    "Plastic shaker": "plastic",
    "Plastic shavings": "plastic",
    "Plastic toys": "plastic",
    "Stretch film": "plastic",
    "Unknown plastic": "plastic",
    "Zip plastic bag": "plastic",
    # ----- carta -----
    "Cardboard": "paper",
    "Cellulose": "paper",
    "Paper": "paper",
    "Paper bag": "paper",
    "Paper cups": "paper",
    "Paper shavings": "paper",
    "Papier mache": "paper",
    "Postal packaging": "paper",
    "Printing industry": "paper",
    "Tetra pack": "paper",            # oppure "plastic" se preferisci
    # ----- organico -----
    "Organic": "organic",
    "Liquid": "organic",
    "Wood": "organic",
    "Textile": "organic",
    "Furniture": "organic",
    'Electronics': 'organic',
    'Milk bottle': 'plastic',  # Cambia se vuoi trattarla diversamente
    # ----- medicale / oggetti taglienti -----
    # (nessuna classe di partenza specificata; aggiungi qui se ne hai)
    # "Syringe": "medical",
}

In [7]:
# check if remapping is complete
missing = set(ORIGINAL_CLASSES) - set(REMAPPING.keys())
if missing:
    print("⚠️  Classi SENZA mappatura:", missing)
else:
    print("✅ Tutte le classi sono mappate!")


✅ Tutte le classi sono mappate!


In [8]:
# build index→index tables
orig2new_idx = {ORIGINAL_CLASSES.index(k): NEW_CLASSES.index(v) for k, v in REMAPPING.items()}
print("Esempio di mapping (old→new):", list(orig2new_idx.items())[:5])

Esempio di mapping (old→new): [(0, 2), (1, 2), (2, 2), (10, 2), (13, 2)]


In [9]:

def remap_label_file(src_path: Path, dst_path: Path):
    with src_path.open() as f:
        lines = f.readlines()

    new_lines = []
    for ln in lines:
        parts = ln.strip().split()
        if not parts:
            continue
        old_idx = int(parts[0])
        if old_idx not in orig2new_idx:
            raise ValueError(f"Classe {old_idx} non mappata in {src_path}")
        parts[0] = str(orig2new_idx[old_idx])
        new_lines.append(" ".join(parts) + "\n")

    dst_path.parent.mkdir(parents=True, exist_ok=True)
    with dst_path.open("w") as f:
        f.writelines(new_lines)

# -------------------------------------------
# Cell 7 – loop su train/valid/test
# -------------------------------------------
for subset in SUBSETS:
    label_dir = DATASET_DIR / subset / LABELS_SUBDIR
    img_dir   = DATASET_DIR / subset / IMAGES_SUBDIR

    # cartelle di destinazione
    if NEW_LABEL_DIR:
        new_label_dir = NEW_LABEL_DIR / subset / LABELS_SUBDIR
        new_img_dir   = NEW_LABEL_DIR / subset / IMAGES_SUBDIR
    else:
        new_label_dir = label_dir
        new_img_dir   = img_dir

    for txt_path in label_dir.rglob("*.txt"):
        rel = txt_path.relative_to(label_dir)
        dst_txt = new_label_dir / rel
        remap_label_file(txt_path, dst_txt)

        # copia immagine corrispondente se stai creando una nuova cartella
        if NEW_LABEL_DIR:
            img_name = txt_path.with_suffix(".jpg").name  # o .png a seconda del dataset
            src_img = img_dir / rel.parent / img_name
            dst_img = new_img_dir / rel.parent / img_name
            dst_img.parent.mkdir(parents=True, exist_ok=True)
            if not dst_img.exists():
                shutil.copy2(src_img, dst_img)

print("🚀 Rimappatura terminata!")

# -------------------------------------------
# Cell 8 – aggiorna il data.yaml
# -------------------------------------------
data_yaml_path = DATASET_DIR / "data.yaml"
if NEW_LABEL_DIR:
    data_yaml_path = NEW_LABEL_DIR / "data.yaml"

if data_yaml_path.exists():
    with open(data_yaml_path) as f:
        data_cfg = yaml.safe_load(f)
else:
    data_cfg = {}

data_cfg.update({
    "train": str((NEW_LABEL_DIR or DATASET_DIR) / "train" / IMAGES_SUBDIR),
    "val":   str((NEW_LABEL_DIR or DATASET_DIR) / "valid" / IMAGES_SUBDIR),
    "test":  str((NEW_LABEL_DIR or DATASET_DIR) / "test" / IMAGES_SUBDIR),
    "nc":    len(NEW_CLASSES),
    "names": NEW_CLASSES,
})

with open(data_yaml_path, "w") as f:
    yaml.safe_dump(data_cfg, f, sort_keys=False)

print(f"✅ File data.yaml aggiornato a {data_yaml_path}")


🚀 Rimappatura terminata!
✅ File data.yaml aggiornato a ../../datasets/roboflow_2/data.yaml
