In [12]:
from google.colab import drive
drive.mount('/content/drive')

!find "/content/drive/MyDrive/training_kitti_raw" -maxdepth 6 -type d -name "image_2"
!find "/content/drive/MyDrive/training_kitti_raw" -maxdepth 6 -type d -name "label_2"


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/training_kitti_raw/object_image/testing/image_2
/content/drive/MyDrive/training_kitti_raw/object_image/training/image_2
/content/drive/MyDrive/training_kitti_raw/object_label/training/label_2


In [17]:
%%writefile /content/drive/MyDrive/training/prepare_kitti_yolo_fast.py
import argparse, random, shutil, os, time
from pathlib import Path
from PIL import Image

MAP = {
    "Car": 0,
    "Van": 0,
    "Truck": 0,            # ◊ê◊ù ◊ú◊ê ◊®◊ï◊¶◊î Truck ◊õ-Car, ◊™◊û◊ó◊ß ◊ê◊™ ◊î◊©◊ï◊®◊î ◊î◊ñ◊ï
    "Pedestrian": 1,
    "Person_sitting": 1,
    "Cyclist": 2,
}
IMG_EXTS = {".png", ".jpg", ".jpeg"}

def yolo_line(cls, left, top, right, bottom, w, h):
    x = ((left + right) / 2.0) / w
    y = ((top + bottom) / 2.0) / h
    bw = (right - left) / w
    bh = (bottom - top) / h
    # clamp
    x = max(0.0, min(1.0, x)); y = max(0.0, min(1.0, y))
    bw = max(0.0, min(1.0, bw)); bh = max(0.0, min(1.0, bh))
    return f"{cls} {x:.6f} {y:.6f} {bw:.6f} {bh:.6f}\n"

def link_or_copy(src: Path, dst: Path, mode: str):
    if dst.exists():
        return
    dst.parent.mkdir(parents=True, exist_ok=True)
    if mode == "symlink":
        try:
            os.symlink(str(src), str(dst))
            return
        except Exception:
            # fallback to copy if symlink fails
            pass
    shutil.copy2(src, dst)

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--img_dir", required=True)    # .../training/image_2
    ap.add_argument("--lbl_dir", required=True)    # .../training/label_2
    ap.add_argument("--work_dir", default="/content/yolo_dataset_kitti")  # fast local
    ap.add_argument("--seed", type=int, default=123)
    ap.add_argument("--train", type=float, default=0.7)
    ap.add_argument("--val", type=float, default=0.15)
    ap.add_argument("--test", type=float, default=0.15)
    ap.add_argument("--mode", choices=["symlink","copy"], default="symlink")
    ap.add_argument("--limit", type=int, default=0, help="debug: use only first N images")
    ap.add_argument("--export_dir", default="", help="optional: copy final dataset to Drive (slow)")
    args = ap.parse_args()

    img_dir = Path(args.img_dir)
    lbl_dir = Path(args.lbl_dir)
    assert img_dir.exists(), f"missing img_dir: {img_dir}"
    assert lbl_dir.exists(), f"missing lbl_dir: {lbl_dir}"

    out = Path(args.work_dir)
    # reset output
    if out.exists():
        shutil.rmtree(out)
    for s in ["train","val","test"]:
        (out/"images"/s).mkdir(parents=True, exist_ok=True)
        (out/"labels"/s).mkdir(parents=True, exist_ok=True)

    imgs = sorted([p for p in img_dir.iterdir() if p.suffix.lower() in IMG_EXTS])
    if not imgs:
        raise RuntimeError(f"No images found in {img_dir}")

    if args.limit and args.limit > 0:
        imgs = imgs[:args.limit]

    rnd = random.Random(args.seed)
    rnd.shuffle(imgs)

    n = len(imgs)
    n_train = int(n*args.train)
    n_val = int(n*args.val)
    splits = {
        "train": imgs[:n_train],
        "val": imgs[n_train:n_train+n_val],
        "test": imgs[n_train+n_val:],
    }

    print("Total images:", n, "splits:", {k: len(v) for k,v in splits.items()})
    t0 = time.time()

    for split, arr in splits.items():
        for i, img_path in enumerate(arr, start=1):
            # link/copy image
            dst_img = out/"images"/split/img_path.name
            link_or_copy(img_path, dst_img, args.mode)

            # read image size (lightweight)
            with Image.open(img_path) as im:
                w, h = im.size

            # convert label
            kitti_label = lbl_dir/(img_path.stem + ".txt")
            out_lines = []
            if kitti_label.exists():
                for line in kitti_label.read_text().splitlines():
                    if not line.strip():
                        continue
                    parts = line.split()
                    t = parts[0]
                    if t == "DontCare" or t not in MAP:
                        continue
                    left, top, right, bottom = map(float, parts[4:8])
                    if right <= left or bottom <= top:
                        continue
                    out_lines.append(yolo_line(MAP[t], left, top, right, bottom, w, h))

            (out/"labels"/split/(img_path.stem + ".txt")).write_text("".join(out_lines))

            if i % 200 == 0 or i == len(arr):
                dt = time.time() - t0
                print(f"[{split}] {i}/{len(arr)} done | elapsed {dt:.1f}s")

    # write data.yaml
    (out/"data.yaml").write_text(
        f"""path: {out}
train: images/train
val: images/val
test: images/test
names:
  0: Car
  1: Pedestrian
  2: Cyclist
"""
    )
    print("DONE:", out)
    print("data.yaml:", out/"data.yaml")

    # optional export to Drive (slow!)
    if args.export_dir.strip():
        exp = Path(args.export_dir)
        if exp.exists():
            shutil.rmtree(exp)
        shutil.copytree(out, exp, symlinks=False)  # export as real files
        print("EXPORTED to:", exp)

if __name__ == "__main__":
    main()


Writing /content/drive/MyDrive/training/prepare_kitti_yolo_fast.py


In [19]:
from pathlib import Path
from PIL import Image

img_dir = Path("/content/drive/MyDrive/training_kitti_raw/object_image/training/image_2")
lbl_dir = Path("/content/drive/MyDrive/training_kitti_raw/object_label/training/label_2")

imgs = sorted(img_dir.glob("*.png"))
print("num images:", len(imgs))
print("first image:", imgs[0])

# ◊ë◊ì◊ô◊ß◊™ ◊§◊™◊ô◊ó◊™ ◊™◊û◊ï◊†◊î
im = Image.open(imgs[0])
print("size:", im.size)

# ◊ë◊ì◊ô◊ß◊™ ◊ú◊ô◊ô◊ë◊ú
lab = lbl_dir / (imgs[0].stem + ".txt")
print("label exists:", lab.exists())
print("label sample:\n", lab.read_text().splitlines()[:3] if lab.exists() else "NO LABEL")


num images: 7481
first image: /content/drive/MyDrive/training_kitti_raw/object_image/training/image_2/000000.png
size: (1224, 370)
label exists: True
label sample:
 ['Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01']


In [20]:
import random, shutil
from pathlib import Path
from PIL import Image

MAP = {"Car":0,"Van":0,"Truck":0,"Pedestrian":1,"Person_sitting":1,"Cyclist":2}

img_dir = Path("/content/drive/MyDrive/training_kitti_raw/object_image/training/image_2")
lbl_dir = Path("/content/drive/MyDrive/training_kitti_raw/object_label/training/label_2")

out = Path("/content/yolo_kitti_small")
if out.exists():
    shutil.rmtree(out)
(out/"images/train").mkdir(parents=True, exist_ok=True)
(out/"labels/train").mkdir(parents=True, exist_ok=True)

imgs = sorted(img_dir.glob("*.png"))
sample = random.sample(imgs, 20)

def yolo_line(cls, left, top, right, bottom, w, h):
    x = ((left + right)/2)/w
    y = ((top + bottom)/2)/h
    bw = (right-left)/w
    bh = (bottom-top)/h
    return f"{cls} {x:.6f} {y:.6f} {bw:.6f} {bh:.6f}\n"

for i, img_path in enumerate(sample, 1):
    # copy image (◊ú◊û◊ß◊ï◊û◊ô -> ◊û◊î◊ô◊® ◊ô◊ï◊™◊® ◊û◊ê◊©◊® ◊ú-Drive)
    shutil.copy2(img_path, out/"images/train"/img_path.name)

    # get size
    with Image.open(img_path) as im:
        w,h = im.size

    # convert label
    kitti = lbl_dir/(img_path.stem+".txt")
    lines=[]
    if kitti.exists():
        for line in kitti.read_text().splitlines():
            if not line.strip():
                continue
            p=line.split()
            t=p[0]
            if t=="DontCare" or t not in MAP:
                continue
            left,top,right,bottom = map(float, p[4:8])
            if right<=left or bottom<=top:
                continue
            lines.append(yolo_line(MAP[t], left, top, right, bottom, w, h))

    (out/"labels/train"/(img_path.stem+".txt")).write_text("".join(lines))

    print(f"{i}/20 done: {img_path.name}")

(out/"data.yaml").write_text(f"""path: {out}
train: images/train
val: images/train
names:
  0: Car
  1: Pedestrian
  2: Cyclist
""")

print("DONE:", out)


1/20 done: 006983.png
2/20 done: 004771.png
3/20 done: 003390.png
4/20 done: 006889.png
5/20 done: 005572.png
6/20 done: 003196.png
7/20 done: 000002.png
8/20 done: 001533.png
9/20 done: 004983.png
10/20 done: 004092.png
11/20 done: 003871.png
12/20 done: 007311.png
13/20 done: 004815.png
14/20 done: 001826.png
15/20 done: 002811.png
16/20 done: 004526.png
17/20 done: 005936.png
18/20 done: 001064.png
19/20 done: 005978.png
20/20 done: 004411.png
DONE: /content/yolo_kitti_small


In [21]:
!ls -lah /content/yolo_kitti_small/images/train | head
!ls -lah /content/yolo_kitti_small/labels/train | head


total 16M
drwxr-xr-x 2 root root 4.0K Dec 18 01:43 .
drwxr-xr-x 3 root root 4.0K Dec 18 01:42 ..
-rw------- 1 root root 750K Dec 18 00:23 000002.png
-rw------- 1 root root 835K Dec 18 00:23 001064.png
-rw------- 1 root root 593K Dec 18 00:23 001533.png
-rw------- 1 root root 834K Dec 18 00:23 001826.png
-rw------- 1 root root 874K Dec 18 00:23 002811.png
-rw------- 1 root root 786K Dec 18 00:23 003196.png
-rw------- 1 root root 803K Dec 18 00:23 003390.png
total 88K
drwxr-xr-x 2 root root 4.0K Dec 18 01:43 .
drwxr-xr-x 3 root root 4.0K Dec 18 01:42 ..
-rw-r--r-- 1 root root   38 Dec 18 01:43 000002.txt
-rw-r--r-- 1 root root   76 Dec 18 01:43 001064.txt
-rw-r--r-- 1 root root  380 Dec 18 01:43 001533.txt
-rw-r--r-- 1 root root  114 Dec 18 01:43 001826.txt
-rw-r--r-- 1 root root   38 Dec 18 01:43 002811.txt
-rw-r--r-- 1 root root   76 Dec 18 01:43 003196.txt
-rw-r--r-- 1 root root  342 Dec 18 01:42 003390.txt


In [22]:
import random, shutil, yaml
from pathlib import Path
from PIL import Image

# ◊û◊ß◊ï◊®◊ï◊™ KITTI ◊î◊®◊©◊û◊ô ◊ê◊¶◊ú◊ö
IMG_DIR = Path("/content/drive/MyDrive/training_kitti_raw/object_image/training/image_2")
LBL_DIR = Path("/content/drive/MyDrive/training_kitti_raw/object_label/training/label_2")

# ◊ô◊¢◊ì (◊ì◊ê◊ò◊î◊°◊ò ◊ß◊ò◊ü)
OUT = Path("/content/drive/MyDrive/training/yolo_kitti_100")
if OUT.exists():
    shutil.rmtree(OUT)

for s in ["train","val","test"]:
    (OUT/"images"/s).mkdir(parents=True, exist_ok=True)
    (OUT/"labels"/s).mkdir(parents=True, exist_ok=True)

MAP = {"Car":0, "Van":0, "Truck":0, "Pedestrian":1, "Person_sitting":1, "Cyclist":2}

def yolo_line(cls, left, top, right, bottom, w, h):
    x = ((left + right)/2)/w
    y = ((top + bottom)/2)/h
    bw = (right-left)/w
    bh = (bottom-top)/h
    x = max(0.0, min(1.0, x)); y = max(0.0, min(1.0, y))
    bw = max(0.0, min(1.0, bw)); bh = max(0.0, min(1.0, bh))
    return f"{cls} {x:.6f} {y:.6f} {bw:.6f} {bh:.6f}\n"

imgs = sorted(IMG_DIR.glob("*.png"))
print("Total KITTI training images:", len(imgs))

random.seed(123)
sample = random.sample(imgs, 100)

# split 70/15/15
train = sample[:70]
val   = sample[70:85]
test  = sample[85:]

splits = {"train": train, "val": val, "test": test}

for split, arr in splits.items():
    for i, img_path in enumerate(arr, 1):
        shutil.copy2(img_path, OUT/"images"/split/img_path.name)

        with Image.open(img_path) as im:
            w, h = im.size

        kitti = LBL_DIR / (img_path.stem + ".txt")
        out_lines = []
        if kitti.exists():
            for line in kitti.read_text().splitlines():
                if not line.strip():
                    continue
                p = line.split()
                t = p[0]
                if t == "DontCare" or t not in MAP:
                    continue
                left, top, right, bottom = map(float, p[4:8])
                if right <= left or bottom <= top:
                    continue
                out_lines.append(yolo_line(MAP[t], left, top, right, bottom, w, h))

        (OUT/"labels"/split/(img_path.stem+".txt")).write_text("".join(out_lines))

    print(split, "done:", len(arr))

# data.yaml
data = {
    "path": str(OUT),
    "train": "images/train",
    "val": "images/val",
    "test": "images/test",
    "names": {0:"Car", 1:"Pedestrian", 2:"Cyclist"}
}
(OUT/"data.yaml").write_text(yaml.safe_dump(data, sort_keys=False))
print("Wrote:", OUT/"data.yaml")
print("DONE:", OUT)


Total KITTI training images: 7481
train done: 70
val done: 15
test done: 15
Wrote: /content/drive/MyDrive/training/yolo_kitti_100/data.yaml
DONE: /content/drive/MyDrive/training/yolo_kitti_100


In [23]:
!ls -lah "/content/drive/MyDrive/training/yolo_kitti_100/images/train" | head
!ls -lah "/content/drive/MyDrive/training/yolo_kitti_100/labels/train" | head


total 56M
-rw------- 1 root root 970K Dec 18 00:23 000013.png
-rw------- 1 root root 870K Dec 18 00:23 000054.png
-rw------- 1 root root 879K Dec 18 00:23 000173.png
-rw------- 1 root root 726K Dec 18 00:23 000301.png
-rw------- 1 root root 809K Dec 18 00:23 000312.png
-rw------- 1 root root 833K Dec 18 00:23 000359.png
-rw------- 1 root root 904K Dec 18 00:23 000425.png
-rw------- 1 root root 722K Dec 18 00:23 000428.png
-rw------- 1 root root 650K Dec 18 00:23 000572.png
total 37K
-rw------- 1 root root  38 Dec 18 01:44 000013.txt
-rw------- 1 root root  38 Dec 18 01:44 000054.txt
-rw------- 1 root root 266 Dec 18 01:45 000173.txt
-rw------- 1 root root 266 Dec 18 01:45 000301.txt
-rw------- 1 root root 190 Dec 18 01:44 000312.txt
-rw------- 1 root root 114 Dec 18 01:45 000359.txt
-rw------- 1 root root 266 Dec 18 01:44 000425.txt
-rw------- 1 root root 152 Dec 18 01:44 000428.txt
-rw------- 1 root root 342 Dec 18 01:44 000572.txt


In [24]:
!pip -q install ultralytics
!yolo detect train model=yolov8n.pt data="/content/drive/MyDrive/training/yolo_kitti_100/data.yaml" \
  epochs=30 imgsz=640 project="/content/runs_100" name="baseline" exist_ok=True


Ultralytics 8.3.240 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/training/yolo_kitti_100/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=baseline, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_m

In [29]:
!python /content/drive/MyDrive/training/generate_img2img.py \
  --input_dir "/content/drive/MyDrive/training/yolo_kitti_100/images/train" \
  --output_dir "/content/drive/MyDrive/training/synth_kitti_100" \
  --only rain,snow,fog --limit 10 --strength 0.50 --steps 35 --cfg 9


2025-12-18 02:09:01.227138: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766023741.250200   97253 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766023741.257116   97253 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766023741.274684   97253 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766023741.274724   97253 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766023741.274728   97253 computation_placer.cc:177] computation placer alr

In [32]:
%%writefile /content/drive/MyDrive/training/generate_img2img.py
import argparse
from pathlib import Path
import random

import torch
from PIL import Image
from diffusers import StableDiffusionXLImg2ImgPipeline

# Strong / stormy prompts (photorealistic, severe conditions)
PROMPTS = {
    "rain": {
        "prompt": (
            "photorealistic dashcam RAW photo, real-world severe weather, "
            "violent rainstorm, torrential rain, extreme downpour, intense rain streaks, "
            "large raindrops and smear on windshield, heavy water splashes, strong tire spray, "
            "standing water and puddles on road, wet asphalt mirror reflections, "
            "dark storm clouds, gloomy overcast sky, very low visibility, atmospheric haze, "
            "motion blur in rain only, cinematic realism, natural colors, "
            "same scene, preserve the exact vehicles and road layout, no object relocation"
        ),
        "negative": (
            "cartoon, anime, illustration, CGI, render, text, watermark, logo, "
            "extra cars, duplicated vehicles, missing vehicles, changed vehicle shape, "
            "warped road, bent lanes, deformed objects, melted surfaces, geometry change, "
            "sunny, clear sky, dry road, fantasy, surreal"
        ),
    },
    "snow": {
        "prompt": (
            "photorealistic dashcam RAW photo, real-world severe winter storm, "
            "heavy snowstorm, thick snowfall, dense snowflakes close to camera, "
            "blowing snow, strong wind gusts, swirling snow, icy slush on asphalt, "
            "snow buildup on road edges, frozen mist, cold gray atmosphere, "
            "very low visibility, reduced contrast, realistic winter lighting, "
            "cinematic realism, natural colors, "
            "same scene, preserve the exact vehicles and road layout, no object relocation"
        ),
        "negative": (
            "whiteout blank image, pure white frame, cartoon, anime, illustration, CGI, render, "
            "text, watermark, logo, summer, green grass, tropical, sunny, "
            "extra cars, duplicated vehicles, missing vehicles, changed vehicle shape, "
            "warped road, deformed objects, geometry change"
        ),
    },
    "fog": {
        "prompt": (
            "photorealistic dashcam RAW photo, real-world extreme dense fog, "
            "very thick gray haze, heavy mist, smoke-like fog in the air (NOT fire smoke), "
            "uniform volumetric fog layer, strong atmospheric perspective, "
            "major contrast reduction, desaturated colors, soft edges, "
            "distant objects almost disappear, very low visibility, realistic optics, "
            "cinematic realism, natural colors, "
            "same scene, preserve the exact vehicles and road layout, no object relocation"
        ),
        "negative": (
            "fire, flames, burning, black smoke, explosion, chimney smoke, "
            "cartoon, anime, illustration, CGI, render, text, watermark, logo, "
            "extra cars, duplicated vehicles, missing vehicles, changed vehicle shape, "
            "warped road, deformed objects, geometry change, night, pitch black"
        ),
    },
}

IMG_EXTS = {".png", ".jpg", ".jpeg", ".webp"}


def list_images(in_dir: Path):
    return sorted([p for p in in_dir.iterdir() if p.suffix.lower() in IMG_EXTS])


def pad_to_multiple_of_8(img: Image.Image):
    w, h = img.size
    new_w = ((w + 7) // 8) * 8
    new_h = ((h + 7) // 8) * 8
    if new_w == w and new_h == h:
        return img, (0, 0, w, h)
    padded = Image.new("RGB", (new_w, new_h))
    padded.paste(img, (0, 0))
    return padded, (0, 0, w, h)


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--input_dir", required=True)
    ap.add_argument("--output_dir", required=True)

    # Realistic SDXL model (you can swap if you want)
    ap.add_argument("--model_id", default="SG161222/RealVisXL_V4.0")

    # Defaults for "stormy" look (stronger than before)
    ap.add_argument("--strength", type=float, default=0.45)
    ap.add_argument("--steps", type=int, default=28)
    ap.add_argument("--cfg", type=float, default=6.5)
    ap.add_argument("--seed", type=int, default=123)

    ap.add_argument("--limit", type=int, default=0)
    ap.add_argument("--shuffle", action="store_true")
    ap.add_argument("--split", default="train")
    ap.add_argument("--only", default="", help="comma-separated: rain,snow,fog")

    ap.add_argument("--cpu_offload", action="store_true")
    ap.add_argument("--force_fp32", action="store_true")
    args = ap.parse_args()

    in_dir = Path(args.input_dir)
    out_root = Path(args.output_dir)

    images = list_images(in_dir)
    if not images:
        raise RuntimeError(f"No images found in: {in_dir}")

    if args.shuffle:
        random.seed(args.seed)
        random.shuffle(images)

    if args.limit and args.limit > 0:
        images = images[:args.limit]

    device = "cuda" if torch.cuda.is_available() else "cpu"

    if args.force_fp32 or device != "cuda":
        dtype = torch.float32
        variant = None
    else:
        dtype = torch.float16
        variant = "fp16"

    pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
        args.model_id,
        torch_dtype=dtype,
        use_safetensors=True,
        variant=variant,
    ).to(device)

    if device == "cuda":
        pipe.enable_attention_slicing()
        pipe.enable_vae_slicing()
        pipe.enable_vae_tiling()

        # prevent black images (SDXL)
        if hasattr(pipe, "upcast_vae"):
            pipe.upcast_vae()
        else:
            pipe.vae.to(dtype=torch.float32)

        if args.cpu_offload:
            try:
                import accelerate  # noqa: F401
                pipe.enable_model_cpu_offload()
            except Exception:
                print("cpu_offload requested but 'accelerate' not available. Install: pip install accelerate")

    selected = PROMPTS
    if args.only.strip():
        allowed = {x.strip() for x in args.only.split(",") if x.strip()}
        selected = {k: v for k, v in PROMPTS.items() if k in allowed}
        if not selected:
            raise RuntimeError(f"--only had no valid keys. Use one of: {list(PROMPTS.keys())}")

    for subtype, pp in selected.items():
        out_dir = out_root / subtype / "images" / args.split
        out_dir.mkdir(parents=True, exist_ok=True)

        for i, img_path in enumerate(images):
            init_img = Image.open(img_path).convert("RGB")
            padded, crop_box = pad_to_multiple_of_8(init_img)

            gen = torch.Generator(device=device).manual_seed(args.seed + i)

            with torch.inference_mode():
                out = pipe(
                    prompt=pp["prompt"],
                    negative_prompt=pp["negative"],
                    image=padded,
                    strength=args.strength,
                    guidance_scale=args.cfg,
                    num_inference_steps=args.steps,
                    generator=gen,
                ).images[0]

            out.crop(crop_box).save(out_dir / img_path.name)

            if (i + 1) % 10 == 0:
                print(f"[{subtype}] {i+1}/{len(images)}")

    print(f"Finished! Images saved to: {out_root}")


if __name__ == "__main__":
    main()


Overwriting /content/drive/MyDrive/training/generate_img2img.py


In [34]:
!python /content/drive/MyDrive/training/generate_img2img.py \
  --input_dir "/content/drive/MyDrive/training/yolo_kitti_100/images/train" \
  --output_dir "/content/drive/MyDrive/training/synth_kitti_100" \
  --only rain,snow,fog --strength 0.45 --steps 35 --cfg 7.5

2025-12-18 02:26:42.202396: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766024802.225360  101819 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766024802.232018  101819 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766024802.249249  101819 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766024802.249296  101819 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766024802.249299  101819 computation_placer.cc:177] computation placer alr

In [35]:
from pathlib import Path
import shutil

REAL = Path("/content/drive/MyDrive/training/yolo_kitti_100")
SYN  = Path("/content/drive/MyDrive/training/synth_kitti_100")

subtypes = ["rain","snow","fog"]
split = "train"

src_lbl = REAL/"labels"/split

for st in subtypes:
    img_dir = SYN/st/"images"/split
    out_lbl = SYN/st/"labels"/split
    out_lbl.mkdir(parents=True, exist_ok=True)

    count = 0
    for img in img_dir.glob("*"):
        if img.suffix.lower() not in [".png",".jpg",".jpeg",".webp"]:
            continue
        src = src_lbl/(img.stem+".txt")
        dst = out_lbl/(img.stem+".txt")
        if src.exists():
            shutil.copy2(src, dst)
        else:
            dst.write_text("")
        count += 1

    print(st, "labels copied:", count)


rain labels copied: 70
snow labels copied: 70
fog labels copied: 70


In [36]:
from pathlib import Path
import shutil, yaml

REAL = Path("/content/drive/MyDrive/training/yolo_kitti_100")
SYN  = Path("/content/drive/MyDrive/training/synth_kitti_100")
OUT  = Path("/content/drive/MyDrive/training/yolo_kitti_100_aug")

if OUT.exists():
    shutil.rmtree(OUT)

# copy real train/val/test
for split in ["train","val","test"]:
    (OUT/"images"/split).mkdir(parents=True, exist_ok=True)
    (OUT/"labels"/split).mkdir(parents=True, exist_ok=True)
    shutil.copytree(REAL/"images"/split, OUT/"images"/split, dirs_exist_ok=True)
    shutil.copytree(REAL/"labels"/split, OUT/"labels"/split, dirs_exist_ok=True)

# add synth to TRAIN only (keep val/test real)
subtypes = ["rain","snow","fog"]
for st in subtypes:
    img_dir = SYN/st/"images"/"train"
    lbl_dir = SYN/st/"labels"/"train"

    for img in img_dir.glob("*"):
        if img.suffix.lower() not in [".png",".jpg",".jpeg",".webp"]:
            continue
        new_name = f"{img.stem}_{st}{img.suffix.lower()}"
        shutil.copy2(img, OUT/"images"/"train"/new_name)

        src_lbl = lbl_dir/(img.stem+".txt")
        dst_lbl = OUT/"labels"/"train"/f"{img.stem}_{st}.txt"
        if src_lbl.exists():
            shutil.copy2(src_lbl, dst_lbl)
        else:
            dst_lbl.write_text("")

print("DONE:", OUT)

# write data.yaml
base_yaml = yaml.safe_load((REAL/"data.yaml").read_text())
data = {
    "path": str(OUT),
    "train": "images/train",
    "val": "images/val",
    "test": "images/test",
    "names": base_yaml["names"],
}
(OUT/"data.yaml").write_text(yaml.safe_dump(data, sort_keys=False))
print("Wrote:", OUT/"data.yaml")


DONE: /content/drive/MyDrive/training/yolo_kitti_100_aug
Wrote: /content/drive/MyDrive/training/yolo_kitti_100_aug/data.yaml


In [37]:
!yolo detect train model=yolov8n.pt \
  data="/content/drive/MyDrive/training/yolo_kitti_100_aug/data.yaml" \
  epochs=30 imgsz=640 project="/content/runs_compare_100" name="augmented" exist_ok=True


Ultralytics 8.3.240 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/training/yolo_kitti_100_aug/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=augmented, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, over

In [44]:
REAL="/content/drive/MyDrive/training/yolo_kitti_100/data.yaml"
AUGD="/content/drive/MyDrive/training/yolo_kitti_100_aug/data.yaml"
PROJ="/content/drive/MyDrive/runs_kitti100_compare_v1"

!yolo detect train model=yolov8n.pt data="$REAL" epochs=30 imgsz=640 \
  project="$PROJ" name="baseline" exist_ok=True

!yolo detect train model=yolov8n.pt data="$AUGD" epochs=30 imgsz=640 \
  project="$PROJ" name="augmented" exist_ok=True


Ultralytics 8.3.240 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/training/yolo_kitti_100/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=baseline, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_m

In [4]:
!pip -q install ultralytics



[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.1/1.1 MB[0m [31m43.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [20]:
from google.colab import drive
drive.mount("/content/drive")


Mounted at /content/drive


In [23]:
%env BASELINE_MODEL=/content/drive/MyDrive/runs_kitti100_compare_v1/baseline/weights/best.pt
%env AUG_MODEL=/content/drive/MyDrive/runs_kitti100_compare_v1/augmented/weights/best.pt
%env REAL=/content/drive/MyDrive/training/yolo_kitti_100/data.yaml

!yolo detect val model="$BASELINE_MODEL" data="$REAL" split=test project="/content/runs_val" name=val_baseline
!yolo detect val model="$AUG_MODEL"      data="$REAL" split=test project="/content/runs_val" name=val_augmented


env: BASELINE_MODEL=/content/drive/MyDrive/runs_kitti100_compare_v1/baseline/weights/best.pt
env: AUG_MODEL=/content/drive/MyDrive/runs_kitti100_compare_v1/augmented/weights/best.pt
env: REAL=/content/drive/MyDrive/training/yolo_kitti_100/data.yaml
Ultralytics 8.3.240 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
Model summary (fused): 72 layers, 3,006,233 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.3¬±0.1 ms, read: 324.5¬±33.0 MB/s, size: 852.5 KB)
[K[34m[1mval: [0mScanning /content/drive/MyDrive/training/yolo_kitti_100/labels/test.cache... 15 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 15/15 222.3Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1/1 4.2it/s 0.2s
                   all         15         73      0.013      0.749      0.346      0.207
                   Car         1

In [28]:
import pandas as pd

base = pd.read_csv("/content/drive/MyDrive/runs_kitti100_compare_v1/baseline/results.csv").tail(1).assign(model="baseline")
aug  = pd.read_csv("/content/drive/MyDrive/runs_kitti100_compare_v1/augmented/results.csv").tail(1).assign(model="augmented")

df = pd.concat([base, aug], ignore_index=True)

(df.style.hide(axis="index").format(precision=4))



epoch,time,train/box_loss,train/cls_loss,train/dfl_loss,metrics/precision(B),metrics/recall(B),metrics/mAP50(B),metrics/mAP50-95(B),val/box_loss,val/cls_loss,val/dfl_loss,lr/pg0,lr/pg1,lr/pg2,model
30,30.121,1.1473,1.204,0.9888,0.6632,0.4024,0.3469,0.1964,1.4607,1.1249,1.1832,0.0001,0.0001,0.0001,baseline
30,69.3426,1.1355,0.9244,1.019,0.9509,0.3078,0.3999,0.2026,1.4055,0.9477,1.183,0.0001,0.0001,0.0001,augmented
