## convert_cityscapes_to_coco

In [3]:
#!/usr/bin/env python3
# convert_cityscapes_to_coco_fixed.py
#
# Reads dataset/{train,val}/img + dataset/{train,val}/mask (PNG color‐coded)
# and produces dataset/train_coco.json & dataset/val_coco.json in COCO format,
# with live progress, timing estimates, and proper JSON‐serializable RLE.

import os
import json
import time
import numpy as np
from PIL import Image
from pycocotools import mask as maskUtils
from tqdm import tqdm

# 1) Adjust these paths if needed
DATA_ROOT = "dataset"
SPLITS    = ["train", "val"]

# 2) Your merged class names & color→class map
CLASS_NAMES = [
    "road_sidewalk_parking",
    "traffic",
    "person",
    "vehicle",
    "pole",
    "structure",
    "ground",
    "dynamic",
    "terrain",
    "sky",
]
PALETTE = {
    (128,64,128):0, (244,35,232):0, (250,170,160):0,
    (250,170,30):1, (220,220,0):1,
    (220,20,60):2, (255,0,0):2,
    (0,0,142):3, (0,0,70):3, (0,60,100):3, (0,0,90):3,
    (0,0,110):3,(0,80,100):3,(0,0,230):3,(119,11,32):3,
    (153,153,153):4,
    (70,70,70):5,(102,102,156):5,(190,153,153):5,
    (180,165,180):5,(150,100,100):5,
    (81,0,81):6, (111,74,0):7, (152,251,152):8, (70,130,180):9,
}

def convert_split(split):
    img_dir  = os.path.join(DATA_ROOT, split, "img")
    mask_dir = os.path.join(DATA_ROOT, split, "mask")
    out_json = os.path.join(DATA_ROOT, f"{split}_coco.json")

    coco = {"images": [], "annotations": [], "categories": []}
    for cid, name in enumerate(CLASS_NAMES):
        coco["categories"].append({"id": cid, "name": name})

    img_files = sorted([
        f for f in os.listdir(img_dir)
        if f.lower().endswith((".png",".jpg",".jpeg"))
    ])
    total = len(img_files)
    ann_id = 1
    img_id = 1
    start_time = time.time()

    for fn in tqdm(img_files, desc=f"[{split}]", unit="img"):
        # 1) Image entry
        img_path = os.path.join(img_dir, fn)
        with Image.open(img_path) as im:
            w, h = im.size
        coco["images"].append({
            "id": img_id, "file_name": fn, "width": w, "height": h
        })

        # 2) Load & process mask
        m = np.array(Image.open(os.path.join(mask_dir, fn)).convert("RGB"))
        for rgb, cid in PALETTE.items():
            comp = (m == rgb).all(axis=-1).astype(np.uint8)
            if comp.sum() == 0:
                continue
            rle = maskUtils.encode(np.asfortranarray(comp))
            # convert bytes → str for JSON
            rle["counts"] = rle["counts"].decode("ascii")

            area = int(comp.sum())
            bbox = maskUtils.toBbox(rle).tolist()
            coco["annotations"].append({
                "id": ann_id,
                "image_id": img_id,
                "category_id": cid,
                "segmentation": rle,
                "area": area,
                "bbox": bbox,
                "iscrowd": 0
            })
            ann_id += 1

        img_id += 1

        # ETA every 100 images
        if img_id % 100 == 0 or img_id == total:
            elapsed = time.time() - start_time
            per_img = elapsed / img_id
            eta = per_img * (total - img_id)
            print(f"  [{split}] {img_id}/{total} — elapsed {elapsed:.1f}s, ETA {eta:.1f}s")

    # 3) Write JSON
    with open(out_json, "w") as f:
        json.dump(coco, f)
    total_time = time.time() - start_time
    print(f"\nFinished {split}: {total} images, {ann_id-1} annotations in {total_time:.1f}s")
    print(f"Saved: {out_json}\n")

if __name__ == "__main__":
    for split in SPLITS:
        convert_split(split)


[train]:   3%|▎         | 99/2975 [01:25<52:08,  1.09s/img] 

  [train] 100/2975 — elapsed 85.7s, ETA 2463.5s


[train]:   7%|▋         | 199/2975 [03:27<44:48,  1.03img/s]  

  [train] 200/2975 — elapsed 207.4s, ETA 2877.4s


[train]:  10%|█         | 299/2975 [04:59<34:55,  1.28img/s]

  [train] 300/2975 — elapsed 299.2s, ETA 2667.9s


[train]:  13%|█▎        | 399/2975 [06:06<27:42,  1.55img/s]

  [train] 400/2975 — elapsed 366.7s, ETA 2360.6s


[train]:  17%|█▋        | 499/2975 [07:17<26:38,  1.55img/s]

  [train] 500/2975 — elapsed 437.5s, ETA 2165.7s


[train]:  20%|██        | 599/2975 [08:28<28:09,  1.41img/s]

  [train] 600/2975 — elapsed 508.9s, ETA 2014.4s


[train]:  23%|██▎       | 699/2975 [09:53<23:16,  1.63img/s]

  [train] 700/2975 — elapsed 593.6s, ETA 1929.1s


[train]:  27%|██▋       | 799/2975 [10:53<21:42,  1.67img/s]

  [train] 800/2975 — elapsed 653.2s, ETA 1775.9s


[train]:  30%|███       | 899/2975 [11:57<20:29,  1.69img/s]

  [train] 900/2975 — elapsed 717.2s, ETA 1653.4s


[train]:  34%|███▎      | 999/2975 [13:10<24:26,  1.35img/s]

  [train] 1000/2975 — elapsed 790.6s, ETA 1561.5s


[train]:  37%|███▋      | 1099/2975 [14:26<23:32,  1.33img/s]

  [train] 1100/2975 — elapsed 866.2s, ETA 1476.5s


[train]:  40%|████      | 1199/2975 [15:38<30:31,  1.03s/img]

  [train] 1200/2975 — elapsed 938.4s, ETA 1388.0s


[train]:  44%|████▎     | 1299/2975 [16:56<23:35,  1.18img/s]

  [train] 1300/2975 — elapsed 1016.6s, ETA 1309.9s


[train]:  47%|████▋     | 1399/2975 [18:05<18:10,  1.45img/s]

  [train] 1400/2975 — elapsed 1085.1s, ETA 1220.7s


[train]:  50%|█████     | 1499/2975 [19:05<16:25,  1.50img/s]

  [train] 1500/2975 — elapsed 1145.7s, ETA 1126.6s


[train]:  54%|█████▎    | 1599/2975 [20:10<15:30,  1.48img/s]

  [train] 1600/2975 — elapsed 1210.1s, ETA 1039.9s


[train]:  57%|█████▋    | 1699/2975 [21:13<13:13,  1.61img/s]

  [train] 1700/2975 — elapsed 1273.2s, ETA 954.9s


[train]:  60%|██████    | 1799/2975 [22:17<10:55,  1.79img/s]

  [train] 1800/2975 — elapsed 1337.8s, ETA 873.3s


[train]:  64%|██████▍   | 1899/2975 [23:24<13:32,  1.32img/s]

  [train] 1900/2975 — elapsed 1404.7s, ETA 794.8s


[train]:  67%|██████▋   | 1999/2975 [24:28<08:42,  1.87img/s]

  [train] 2000/2975 — elapsed 1468.6s, ETA 715.9s


[train]:  71%|███████   | 2099/2975 [25:39<14:06,  1.04img/s]

  [train] 2100/2975 — elapsed 1539.8s, ETA 641.6s


[train]:  74%|███████▍  | 2199/2975 [27:13<13:08,  1.02s/img]

  [train] 2200/2975 — elapsed 1633.4s, ETA 575.4s


[train]:  77%|███████▋  | 2299/2975 [28:37<07:24,  1.52img/s]

  [train] 2300/2975 — elapsed 1717.2s, ETA 504.0s


[train]:  81%|████████  | 2399/2975 [30:01<10:00,  1.04s/img]

  [train] 2400/2975 — elapsed 1801.3s, ETA 431.6s


[train]:  84%|████████▍ | 2499/2975 [31:28<06:26,  1.23img/s]

  [train] 2500/2975 — elapsed 1888.9s, ETA 358.9s


[train]:  87%|████████▋ | 2599/2975 [32:36<03:36,  1.74img/s]

  [train] 2600/2975 — elapsed 1956.9s, ETA 282.2s


[train]:  91%|█████████ | 2699/2975 [34:04<04:17,  1.07img/s]

  [train] 2700/2975 — elapsed 2044.3s, ETA 208.2s


[train]:  94%|█████████▍| 2799/2975 [35:19<02:30,  1.17img/s]

  [train] 2800/2975 — elapsed 2119.3s, ETA 132.5s


[train]:  97%|█████████▋| 2899/2975 [36:56<01:08,  1.11img/s]

  [train] 2900/2975 — elapsed 2216.6s, ETA 57.3s


[train]: 100%|█████████▉| 2974/2975 [38:04<00:00,  1.35img/s]

  [train] 2975/2975 — elapsed 2284.7s, ETA 0.0s


[train]: 100%|██████████| 2975/2975 [38:05<00:00,  1.30img/s]



Finished train: 2975 images, 35084 annotations in 2298.5s
Saved: dataset\train_coco.json



[val]:  20%|█▉        | 99/500 [01:16<07:04,  1.06s/img]

  [val] 100/500 — elapsed 76.1s, ETA 304.3s


[val]:  40%|███▉      | 199/500 [02:40<04:49,  1.04img/s]

  [val] 200/500 — elapsed 160.2s, ETA 240.4s


[val]:  60%|█████▉    | 299/500 [04:37<03:57,  1.18s/img]

  [val] 300/500 — elapsed 277.6s, ETA 185.0s


[val]:  80%|███████▉  | 399/500 [06:04<01:56,  1.16s/img]

  [val] 400/500 — elapsed 364.1s, ETA 91.0s


[val]: 100%|█████████▉| 499/500 [07:20<00:00,  1.01img/s]

  [val] 500/500 — elapsed 440.9s, ETA 0.0s


[val]: 100%|██████████| 500/500 [07:22<00:00,  1.13img/s]



Finished val: 500 images, 6073 annotations in 445.0s
Saved: dataset\val_coco.json



In [4]:
#!/usr/bin/env python3
# sample_coco_subset.py

import json, random, os

IN_JSON   = "dataset/train_coco.json"
OUT_JSON  = "dataset/train_coco_small.json"
N_SAMPLES = 500   # change as you like

with open(IN_JSON) as f:
    coco = json.load(f)

# pick random image IDs
all_imgs = coco["images"]
picked   = random.sample(all_imgs, min(N_SAMPLES, len(all_imgs)))
picked_ids = {img["id"] for img in picked}

# filter annotations to those images
anns = [ann for ann in coco["annotations"] if ann["image_id"] in picked_ids]

small = {
    "images": picked,
    "annotations": anns,
    "categories": coco["categories"]
}

with open(OUT_JSON, "w") as f:
    json.dump(small, f)
print(f"Sampled {len(picked)} images → {OUT_JSON}")


Sampled 500 images → dataset/train_coco_small.json


## yolo V1

In [5]:
#!/usr/bin/env python3
# train_yolov8_seg.py
#
# Full pipeline for YOLOv8-segmentation on your COCO‐format Cityscapes data.
# Uses medium model for speed+power, FP16, cosine LR, early stopping.

import os
from ultralytics import YOLO

def main():
    # Paths
    DATA_YAML = 'data.yaml'       # points to train/val COCO JSONs and class names
    PROJECT   = 'runs/seg'        # where to save experiments
    NAME      = 'cityscapes_yolov8m_seg'

    # Instantiate a medium‐size segmentation model
    model = YOLO('yolov8m-seg.pt')  # alternatives: yolov8n-seg.pt (nano), yolov8s-seg.pt (small)

    # Train with advanced settings
    model.train(
        data=DATA_YAML,
        epochs=100,             # train up to 100 epochs
        batch=8,                # batch size (adjust to your CPU RAM)
        imgsz=1024,             # input resolution (Square: 1024×1024 for best Cityscapes detail)
        device='cpu',           # CPU training
        workers=4,              # parallel data loaders
        optimizer='AdamW',      # more stable than SGD on small datasets
        lr0=1e-3,               # initial learning rate
        lrf=0.01,               # final lr is lr0 * lrf (cosine decay)
        augment=True,           # built-in mosaic, mixup, etc.
        val=True,               # run validation each epoch
        patience=10,            # early stopping after 10 epochs without val mAP75 improvement
        save_period=5,          # save a checkpoint every 5 epochs
        save=True,              # save the best model automatically
        project=PROJECT,
        name=NAME,
        exist_ok=True,          # overwrite existing runs/seg/NAME
        fp16=True               # enable mixed-precision on CPU (if supported)
    )

    print(f"\n✅ Training complete. Checkpoint & logs in {os.path.join(PROJECT,NAME)}")

if __name__ == '__main__':
    main()


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m-seg.pt to 'yolov8m-seg.pt'...


  0%|          | 0.00/52.4M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
#!/usr/bin/env python3
# train_yolov8_seg_fast.py

from ultralytics import YOLO

def main():
    DATA_YAML = "data.yaml"          # in your project root
    MODEL     = "yolov8n-seg.pt"
    EPOCHS    = 20
    BATCH     = 2
    IMGSZ     = 512
    PATIENCE  = 5
    PROJECT   = "runs/seg"
    NAME      = "cityscapes_yolov8n_seg_fast"

    model = YOLO(MODEL)

    model.train(
        data=DATA_YAML,      # ← your data.yaml
        epochs=EPOCHS,
        batch=BATCH,
        imgsz=IMGSZ,
        device="cpu",
        workers=3,
        patience=PATIENCE,
        project=PROJECT,
        name=NAME,
        exist_ok=True,
        optimizer="AdamW",
        lr0=1e-3,
        lrf=0.05,
        augment=True,
        val=True,
        save=True,
        half=False         # ← use half FP32/FP16 toggle instead of fp16=
    )

    print(f"\n✅ Done! Checkpoint & logs in {PROJECT}/{NAME}")

if __name__ == "__main__":
    main()


New https://pypi.org/project/ultralytics/8.3.127 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.78  Python-3.8.7 torch-1.13.1+cpu CPU (Intel Pentium Silver N5030 1.10GHz)
[34m[1mengine\trainer: [0mtask=segment, mode=train, model=yolov8n-seg.pt, data=data.yaml, epochs=30, time=None, patience=5, batch=2, imgsz=512, save=True, save_period=-1, cache=False, device=cpu, workers=2, project=runs/seg, name=cityscapes_yolov8n_seg_fast, exist_ok=True, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed

[34m[1mtrain: [0mScanning C:\Users\MBQ\Desktop\ML Projet Test\ML Projet V3\dataset\train\img... 0 images, 2975 backgrounds, 0 corrupt: 100%|██████████| 2975/2975 [00:04<00:00, 731.82it/s]






[34m[1mtrain: [0mNew cache created: C:\Users\MBQ\Desktop\ML Projet Test\ML Projet V3\dataset\train\img.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
[34m[1mval: [0mScanning C:\Users\MBQ\Desktop\ML Projet Test\ML Projet V3\dataset\val\img... 0 images, 500 backgrounds, 0 corrupt: 100%|██████████| 500/500 [00:00<00:00, 732.49it/s]






[34m[1mval: [0mNew cache created: C:\Users\MBQ\Desktop\ML Projet Test\ML Projet V3\dataset\val\img.cache
Plotting labels to runs\seg\cityscapes_yolov8n_seg_fast\labels.jpg... 
zero-size array to reduction operation maximum which has no identity
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.0005), 76 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 512 train, 512 val
Using 0 dataloader workers
Logging results to [1mruns\seg\cityscapes_yolov8n_seg_fast[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/30         0G          0          0      2.515          0          0        512:   2%|▏         | 31/1488 [02:04<1:37:29,  4.01s/it]


KeyboardInterrupt: 

In [11]:
#!/usr/bin/env python3
# train_yolov8_seg_fast.py  (v2)

from ultralytics import YOLO

def main():
    DATA_YAML = "data.yaml"
    MODEL     = "yolov8n-seg.pt"
    PROJECT   = "runs/seg"
    NAME      = "cityscapes_yolov8n_seg_faster"

    # these hyperparams are tuned for CPU speed + decent quality
    model = YOLO(MODEL)
    model.train(
        data      = DATA_YAML,
        epochs    = 20,          # early stopping will cut this short
        batch     = 2,           # keep small so you don't OOM
        imgsz     = 384,         # down from 512 → much faster
        device    = "cpu",
        workers   = 4,           # match your CPU cores
        cache     = True,        # cache dataset in RAM/disk
        freeze    = [0,1,2,3],   # freeze first 4 stages of the backbone
        patience  = 5,           # early stop on val mAP75 stagnation
        rect      = True,        # rectangular batches → slight speedup
        augment   = False,       # disable heavy augment ops
        project   = PROJECT,
        name      = NAME,
        exist_ok  = True,
        optimizer = "AdamW",
        lr0       = 1e-3,
        lrf       = 0.05,
        val       = True,
        save      = True,
        half      = False        # fp32 on CPU
    )
    print(f"\n✅ Done! Checkpoint & logs in {PROJECT}/{NAME}")

if __name__ == "__main__":
    main()


New https://pypi.org/project/ultralytics/8.3.127 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.78  Python-3.8.7 torch-1.13.1+cpu CPU (Intel Pentium Silver N5030 1.10GHz)
[34m[1mengine\trainer: [0mtask=segment, mode=train, model=yolov8n-seg.pt, data=data.yaml, epochs=20, time=None, patience=5, batch=2, imgsz=384, save=True, save_period=-1, cache=True, device=cpu, workers=4, project=runs/seg, name=cityscapes_yolov8n_seg_faster, exist_ok=True, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=True, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=[0, 1, 2, 3], multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=Fal

[34m[1mtrain: [0mScanning C:\Users\MBQ\Desktop\ML Projet Test\ML Projet V3\dataset\train\img.cache... 0 images, 2975 backgrounds, 0 corrupt: 100%|██████████| 2975/2975 [00:00<?, ?it/s]




[34m[1mtrain: [0mCaching images (0.6GB RAM): 100%|██████████| 2975/2975 [00:09<00:00, 305.01it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning C:\Users\MBQ\Desktop\ML Projet Test\ML Projet V3\dataset\val\img.cache... 0 images, 500 backgrounds, 0 corrupt: 100%|██████████| 500/500 [00:00<?, ?it/s]








[34m[1mval: [0mCaching images (0.1GB RAM): 100%|██████████| 500/500 [00:01<00:00, 263.57it/s]


Plotting labels to runs\seg\cityscapes_yolov8n_seg_faster\labels.jpg... 
zero-size array to reduction operation maximum which has no identity
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.0005), 76 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 384 train, 384 val
Using 0 dataloader workers
Logging results to [1mruns\seg\cityscapes_yolov8n_seg_faster[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/20         0G          0          0     0.0191          0          0        384: 100%|██████████| 1488/1488 [30:09<00:00,  1.22s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 125/125 [02:13<00:00,  1.07s/it]


RuntimeError: torch.cat(): expected a non-empty list of Tensors

## V3

In [24]:
#!/usr/bin/env python3
import os
import numpy as np
from PIL import Image

#!/usr/bin/env python3
"""
convert_masks_shifted.py

Reads your color‐coded Cityscapes masks in dataset/{train,val}/mask/*.png
and writes single‐channel index masks to dataset/{train,val}/masks/*.png,
shifting all class IDs up by +1 so that 0 becomes background and 1–10
are your semantic classes.

Usage:
    python convert_masks_shifted.py
"""

import os
import numpy as np
from PIL import Image

# 1) Define your merged palette from RGB → class ID (0–9)
PALETTE = {
    (128,  64, 128): 0,  # road_sidewalk_parking
    (244,  35, 232): 0,
    (250, 170, 160): 0,
    (250, 170,  30): 1,  # traffic
    (220, 220,   0): 1,
    (220,  20,  60): 2,  # person
    (255,   0,   0): 2,
    (  0,   0, 142): 3,  # vehicle
    (  0,   0,  70): 3,
    (  0,  60, 100): 3,
    (  0,   0,  90): 3,
    (  0,   0, 110): 3,
    (  0,  80, 100): 3,
    (  0,   0, 230): 3,
    (119,  11,  32): 3,
    (153, 153, 153): 4,  # pole
    ( 70,  70,  70): 5,  # structure
    (102, 102, 156): 5,
    (190, 153, 153): 5,
    (180, 165, 180): 5,
    (150, 100, 100): 5,
    ( 81,   0,  81): 6,  # ground
    (111,  74,   0): 7,  # dynamic
    (152, 251, 152): 8,  # terrain
    ( 70, 130, 180): 9,  # sky
}

# 2) Process both splits
for split in ("train", "val"):
    mask_in  = os.path.join("dataset", split, "mask")
    mask_out = os.path.join("dataset", split, "masks")
    os.makedirs(mask_out, exist_ok=True)

    print(f"Converting masks for split '{split}'...")
    for fname in os.listdir(mask_in):
        if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
            continue

        # Load the color mask
        img_path = os.path.join(mask_in, fname)
        m = np.array(Image.open(img_path).convert("RGB"), dtype=np.uint8)

        # Prepare an empty index mask (uint8 is fine for 0–10)
        idx = np.zeros(m.shape[:2], dtype=np.uint8)

        # Map each RGB triplet to its class ID + 1
        for rgb, cid in PALETTE.items():
            mask = np.all(m == rgb, axis=-1)
            idx[mask] = cid + 1  # shift up by 1: background=0, classes=1..10

        # Save the index mask
        out_path = os.path.join(mask_out, fname)
        Image.fromarray(idx).save(out_path)

    print(f"  Saved {len(os.listdir(mask_out))} masks to '{mask_out}'\n")

print("Conversion complete!")



Converting masks for split 'train'...
  Saved 2975 masks to 'dataset\train\masks'

Converting masks for split 'val'...
  Saved 500 masks to 'dataset\val\masks'

Conversion complete!


In [None]:
#!/usr/bin/env python3
# train_yolov8_seg_fast.py

from ultralytics import YOLO

def main():
    model = YOLO("yolov8n-seg.pt")  # segmentation‐ready model

    model.train(
        data      = "data.yaml",
        task      = "segment",       # be explicit
        epochs    = 5,
        batch     = 4,
        imgsz     = 384,
        device    = "cpu",
        workers   = 4,
        cache     = "ram",           # or "disk"
        freeze    = [0,1,2,3],
        rect      = True,
        augment   = False,
        patience  = 5,
        optimizer = "AdamW",
        lr0       = 1e-3,
        lrf       = 0.05,
        project   = "runs/seg",
        name      = "cityscapes_yolov8n_seg_fast2",
        exist_ok  = True
    )

    print("\n✅ Done! Checkpoint & logs in runs/seg/cityscapes_yolov8n_seg_fast2")

if __name__ == "__main__":
    main()


New https://pypi.org/project/ultralytics/8.3.128 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.78  Python-3.8.7 torch-1.13.1+cpu CPU (Intel Pentium Silver N5030 1.10GHz)
[34m[1mengine\trainer: [0mtask=segment, mode=train, model=yolov8n-seg.pt, data=data.yaml, epochs=5, time=None, patience=5, batch=4, imgsz=384, save=True, save_period=-1, cache=ram, device=cpu, workers=4, project=runs/seg, name=cityscapes_yolov8n_seg_fast2, exist_ok=True, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=True, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=[0, 1, 2, 3], multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False,

FileNotFoundError: [34m[1mtrain: [0mError loading data from C:\Users\MBQ\Desktop\ML Projet Test\ML Projet V3\dataset\train_coco.json
See https://docs.ultralytics.com/datasets for dataset formatting guidance.

In [25]:
from PIL import Image
import numpy as np
m = np.array(Image.open("dataset/train/masks/train 5.png"))
print("unique values in mask:", np.unique(m))


unique values in mask: [ 0  1  2  3  4  5  6 10]


In [6]:
from ultralytics import YOLO

model = YOLO("yolov8n-seg.pt")  # instance-seg
model.train(
    data    = "data.yaml",
    task    = "segment",
    epochs  = 20,
    batch   = 4,
    imgsz   = 384,
    device  = "cpu",
    project = "runs/seg_instances",
    name    = "cityscapes_instances",
    exist_ok=True
)


New https://pypi.org/project/ultralytics/8.3.128 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.78  Python-3.8.7 torch-1.13.1+cpu CPU (Intel Pentium Silver N5030 1.10GHz)
[34m[1mengine\trainer: [0mtask=segment, mode=train, model=yolov8n-seg.pt, data=data.yaml, epochs=20, time=None, patience=100, batch=4, imgsz=384, save=True, save_period=-1, cache=False, device=cpu, workers=8, project=runs/seg_instances, name=cityscapes_instances, exist_ok=True, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, 

[34m[1mtrain: [0mScanning C:\Users\MBQ\Desktop\ML Projet Test\ML Projet V3\dataset\train\img.cache... 2975 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2975/2975 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning C:\Users\MBQ\Desktop\ML Projet Test\ML Projet V3\dataset\val\img.cache... 500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 500/500 [00:00<?, ?it/s]

Plotting labels to runs\seg_instances\cityscapes_instances\labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.0005), 76 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 384 train, 384 val
Using 0 dataloader workers
Logging results to [1mruns\seg_instances\cityscapes_instances[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/744 [00:01<?, ?it/s]


TypeError: ERROR ❌ segment dataset incorrectly formatted or not a segment dataset.
This error can occur when incorrectly training a 'segment' model on a 'detect' dataset, i.e. 'yolo train model=yolo11n-seg.pt data=coco8.yaml'.
Verify your dataset is a correctly formatted 'segment' dataset using 'data=coco8-seg.yaml' as an example.
See https://docs.ultralytics.com/datasets/segment/ for help.

In [19]:
import os
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt

# 1) Paths
ROOT         = r"C:/Users/MBQ/Desktop/ML Projet Test/Ptojet_ML_Yolo/dataset"
TRAIN_IMGD   = os.path.join(ROOT, "train", "img")

# 2) Your merged class palette (idx → RGB)
#    Note: YOLO-Seg saved class IDs 0–9; if you shifted +1 in masks, adjust here.
# reconstruction_palette.py

PALETTE = {
    0: (128,  64, 128),  # road_sidewalk_parking
    1: (250, 170,  30),  # traffic
    2: (220,  20,  60),  # person
    3: (  0,   0, 142),  # vehicle
    4: (153, 153, 153),  # pole
    5: ( 70,  70,  70),  # structure
    6: ( 81,   0,  81),  # ground
    7: (111,  74,   0),  # dynamic
    8: (152, 251, 152),  # terrain
    9: ( 70, 130, 180),  # sky
}

import os, cv2, numpy as np
from PIL import Image
import matplotlib.pyplot as plt

IMGDIR = r"C:/Users/MBQ/Desktop/ML Projet Test/Ptojet_ML_Yolo/dataset/train/img"
fns    = sorted([f for f in os.listdir(IMGDIR) if f.endswith(".png")])
img_fn = fns[0]
txt_fn = img_fn.replace(".png", ".txt")

# load image
orig = np.array(Image.open(os.path.join(IMGDIR, img_fn)))
h, w = orig.shape[:2]
color_mask = np.zeros_like(orig)

# draw polygons
with open(os.path.join(IMGDIR, txt_fn)) as f:
    for line in f:
        parts = line.strip().split()
        cls   = int(parts[0])
        if len(parts) <= 5:
            continue
        coords = np.array(list(map(float, parts[5:])))
        xs = (coords[0::2] * w).astype(int)
        ys = (coords[1::2] * h).astype(int)
        pts = np.stack((xs, ys), axis=1).reshape(-1,1,2)
        cv2.fillPoly(color_mask, [pts], PALETTE[cls])

overlay = cv2.addWeighted(orig, 0.6, color_mask, 0.4, 0)

plt.figure(figsize=(12,4))
for i,(im,t) in enumerate(zip((orig,color_mask,overlay),
                             ("Original","Mask","Overlay"))):
    plt.subplot(1,3,i+1)
    plt.imshow(cv2.cvtColor(im,cv2.COLOR_BGR2RGB))
    plt.title(t); plt.axis("off")
plt.show()
Image.fromarray(color_mask).save("mask_color.png")
print("Saved color mask to mask_color.png")


<Figure size 640x480 with 0 Axes>

<Figure size 1200x400 with 3 Axes>

Saved color mask to mask_color.png
