# STAC (Self-Training with Augmented Consistency)

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input/disaster-response-object-detection-dataset'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/disaster-response-object-detection-dataset/data.yaml
/kaggle/input/disaster-response-object-detection-dataset/val/labels/559.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/1093.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/1356.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/557.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/361.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/1366.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/1437.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/992.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/1038.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/40.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/812.txt
/kaggle/input/disaster-response-object-detection-dataset/val/labels/1025.txt
/kaggle/input/di

In [2]:
!pip -q install --upgrade --no-warn-script-location ultralytics albumentations pycocotools torchmetrics tqdm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m96.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m92.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
!git clone -q https://github.com/ultralytics/ultralytics.git


In [4]:
# ================================================================
# 0. Install dependencies
# ================================================================
#!pip install -q ultralytics

# ================================================================
# 1. Imports & Paths
# ================================================================
import shutil, warnings
from pathlib import Path
from ultralytics import YOLO
import yaml

# -------------------- Dataset Paths --------------------
BASE_TRAIN = Path("/kaggle/input/disaster-response-object-detection-dataset/train/images")
BASE_VAL   = Path("/kaggle/input/disaster-response-object-detection-dataset/val/images")
BASE_TEST  = Path("/kaggle/input/disaster-response-object-detection-dataset/test/images")

# -------------------- Working Directories --------------------
WORK       = Path("/kaggle/working/disaster_teacher_student")
SPLIT      = WORK/"0_yolo_split"
PSEUDO     = WORK/"1_pseudo"
STU_ROOT   = WORK/"2_student"

# create split dirs
for split in ["train","valid","test"]:
    (SPLIT/split/"images").mkdir(parents=True, exist_ok=True)
    (SPLIT/split/"labels").mkdir(parents=True, exist_ok=True)

# create pseudo and student dirs
(PSEUDO/"raw"/"labels").mkdir(parents=True, exist_ok=True)
for sub in ["train","valid"]:
    (STU_ROOT/sub/"images").mkdir(parents=True, exist_ok=True)
    (STU_ROOT/sub/"labels").mkdir(parents=True, exist_ok=True)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


# Write Data YAMLs

In [5]:
data_yaml = {
    'path': str(SPLIT),
    'train': 'train/images',
    'val':   'valid/images',
    'test':  'test/images',
    'nc':    6,
    'names': ["person","fire","smoke","small_vehicle","large_vehicle","two_wheeler"]
}
(data_yaml_path := WORK/"data_disaster.yaml").write_text(yaml.dump(data_yaml))

stu_yaml = {
    'path': str(STU_ROOT),
    'train': 'train/images',
    'val':   'valid/images',
    'nc':    6,
    'names': ["person","fire","smoke","small_vehicle","large_vehicle","two_wheeler"]
}
(WORK/"data_disaster_student.yaml").write_text(yaml.dump(stu_yaml))

178

# **Copy Images for Split**

In [6]:
def safe_copy_images(src_dir, dest_img_dir, dest_lbl_dir):
    for img in src_dir.glob("*.jpg"):
        # Copy image
        shutil.copy(img, dest_img_dir/img.name)
        # Corresponding label
        lbl = src_dir.parent/"labels"/f"{img.stem}.txt"
        if lbl.exists():
            shutil.copy(lbl, dest_lbl_dir/f"{img.stem}.txt")
        else:
            # Create empty label if missing
            (dest_lbl_dir/f"{img.stem}.txt").write_text("")
            print(f"⚠️ Label missing for {img.name}, created empty label.")

# Copy train images
safe_copy_images(BASE_TRAIN, SPLIT/"train"/"images", SPLIT/"train"/"labels")
# Copy valid images
safe_copy_images(BASE_VAL, SPLIT/"valid"/"images", SPLIT/"valid"/"labels")
# Copy test images
safe_copy_images(BASE_TEST, SPLIT/"test"/"images", SPLIT/"test"/"labels")

⚠️ Label missing for 11501.jpg, created empty label.


# INITIAL teachers training

In [7]:
import os

print("TRAIN IMAGES:", len(os.listdir("/kaggle/input/disaster-response-object-detection-dataset/train/images")))
print("TRAIN LABELS:", len(os.listdir("/kaggle/input/disaster-response-object-detection-dataset/train/labels")))

print("VAL IMAGES:", len(os.listdir("/kaggle/input/disaster-response-object-detection-dataset/val/images")))
print("VAL LABELS:", len(os.listdir("/kaggle/input/disaster-response-object-detection-dataset/val/labels")))


TRAIN IMAGES: 10450
TRAIN LABELS: 10450
VAL IMAGES: 1556
VAL LABELS: 1556


In [8]:
import os
import shutil
from pathlib import Path
import random

# Path to your dataset
DATASET = Path("/kaggle/input/disaster-response-object-detection-dataset")

train_img_dir = DATASET/"train"/"images"
train_lbl_dir = DATASET/"train"/"labels"

# New dataset root
NEW_ROOT = Path("/kaggle/working/dataset")
(NEW_ROOT/"train"/"images").mkdir(parents=True, exist_ok=True)
(NEW_ROOT/"train"/"labels").mkdir(parents=True, exist_ok=True)
(NEW_ROOT/"valid"/"images").mkdir(parents=True, exist_ok=True)
(NEW_ROOT/"valid"/"labels").mkdir(parents=True, exist_ok=True)

# List all image paths
images = [p for p in train_img_dir.iterdir() if p.suffix.lower() in [".jpg", ".jpeg", ".png"]]

# Shuffle to get random split
random.seed(42)
random.shuffle(images)

# 90% train, 10% valid
split_idx = int(0.9 * len(images))
train_files = images[:split_idx]
val_files = images[split_idx:]

def copy_files(files, dst_img, dst_lbl):
    for img_path in files:
        lbl_path = train_lbl_dir / f"{img_path.stem}.txt"
        if lbl_path.exists():
            shutil.copy(img_path, dst_img / img_path.name)
            shutil.copy(lbl_path, dst_lbl / f"{img_path.stem}.txt")

copy_files(train_files, NEW_ROOT/"train"/"images", NEW_ROOT/"train"/"labels")
copy_files(val_files, NEW_ROOT/"valid"/"images", NEW_ROOT/"valid"/"labels")

print("New TRAIN size:", len(os.listdir(NEW_ROOT/'train/images')))
print("New VALID size:", len(os.listdir(NEW_ROOT/'valid/images')))
print("Dataset split created successfully!")


New TRAIN size: 9404
New VALID size: 1045
Dataset split created successfully!


In [9]:
teacher = YOLO("yolo11n.pt")
_ = teacher.train(
    data    = str(data_yaml_path),
    epochs  = 20,
    imgsz   = 640,
    batch   = 8,
    project = str(WORK),
    name    = "teacher",
    device  = 0,
    amp=False 
)
teacher_weights = WORK/"teacher"/"weights"/"best.pt"

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt': 100% ━━━━━━━━━━━━ 5.4MB 70.8MB/s 0.1s
Ultralytics 8.3.234 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=False, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/working/disaster_teacher_student/data_disaster.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momen

  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all       1556       2401      0.799       0.79      0.811      0.636
                person        181        284      0.558      0.644       0.62      0.378
                  fire        675       1045      0.787      0.723      0.762      0.422
                 smoke        163        244      0.649       0.57      0.583      0.388
         small_vehicle        389        475       0.96      0.948      0.975      0.924
         large_vehicle        172        198      0.924      0.965      0.988      0.951
           two_wheeler        123        155      0.916       0.89      0.937       0.75
Speed: 0.2ms preprocess, 3.3ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1m/kaggle/working/disaster_teacher_student/teacher[0m


# Populate Student Validation

In [10]:
for img in (SPLIT/"valid"/"images").iterdir():
    shutil.copy(img, STU_ROOT/"valid"/"images"/img.name)
    lbl = SPLIT/"valid"/"labels"/f"{img.stem}.txt"
    if lbl.exists():
        shutil.copy(lbl, STU_ROOT/"valid"/"labels"/f"{img.stem}.txt")
    else:
        (STU_ROOT/"valid"/"labels"/f"{img.stem}.txt").write_text("")
        print(f"⚠️ Label missing for {img.name}, created empty label.")


# STAC-style Self-Training Loop

In [11]:
IMGZ        = 640
TAU         = 0.7
STAC_ITERS  = 5  
STU_EPOCHS  = 20  
BATCH       = 8  

for it in range(STAC_ITERS):
    print(f"\n=== STAC iteration {it+1}/{STAC_ITERS} ===")

    # 6a) Generate pseudo-labels on train/images
    _ = YOLO(str(teacher_weights)).predict(
        source    = str(SPLIT/"train"/"images"),
        save_txt  = True,
        save_conf = True,
        conf      = TAU,
        project   = str(PSEUDO),
        name      = f"it{it}",
        device    = 0
    )
    RAW_LB = PSEUDO/f"it{it}"/"labels"

    # 6b) Merge GT + pseudo into 2_student/train
    for f in (STU_ROOT/"train"/"labels").glob("*.txt"):
        f.unlink()  # clear old labels
    for im in (SPLIT/"train"/"images").glob("*.jpg"):
        shutil.copy(im, STU_ROOT/"train"/"images"/im.name)
    for gt in (SPLIT/"train"/"labels").glob("*.txt"):
        dst = STU_ROOT/"train"/"labels"/gt.name
        shutil.copy(gt, dst)
        pseudo_f = RAW_LB/gt.name
        if pseudo_f.exists():
            with open(dst, "a") as d, open(pseudo_f) as p:
                d.write(p.read())

    # 6c) Train student from last teacher_weights
    student = YOLO(str(teacher_weights))
    _ = student.train(
        data    = str(WORK/"data_disaster_student.yaml"),
        epochs  = STU_EPOCHS,
        imgsz   = IMGZ,
        batch   = BATCH,
        project = str(WORK),
        name    = f"student_it{it}",
        device  = 0
    )

    # promote student → teacher
    teacher_weights = WORK/f"student_it{it}"/"weights"/"best.pt"


=== STAC iteration 1/5 ===

inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

image 1/10450 /kaggle/working/disaster_teacher_student/0_yolo_split/train/images/1.jpg: 640x640 1 fire, 8.4ms
image 2/10450 /kaggle/working/disaster_teacher_student/0_yolo_split/train/images/10.jpg: 640x640 1 fire, 8.4ms
image 3/10450 /kaggle/working/disaster_teacher_student/0_yolo_split/train/images/10000.jpg: 480x640 1 two_wheeler, 45.5ms
image 4/10450 /kaggle/working/disaster_teacher_student/0_yolo_split/train/images/10002.jpg: 640x6

  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all       1556       2401      0.803      0.754      0.793      0.594
                person        181        284      0.529      0.602      0.551      0.299
                  fire        675       1045      0.789       0.67      0.754      0.391
                 smoke        163        244      0.626      0.553      0.565      0.356
         small_vehicle        389        475      0.969      0.909      0.965      0.895
         large_vehicle        172        198      0.949      0.945      0.983      0.918
           two_wheeler        123        155      0.955      0.845      0.938      0.704
Speed: 0.2ms preprocess, 1.9ms inference, 0.0ms loss, 1.0ms postprocess per image
Results saved to [1m/kaggle/working/disaster_teacher_student/student_it0[0m

=== STAC iteration 2/5 ===

inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultra

# Final Evaluation

In [None]:
## warnings.filterwarnings("ignore", category=RuntimeWarning)
final = YOLO(str(teacher_weights))
metrics = final.val(
    data    = str(data_yaml_path),
    imgsz   = 640,
    batch   = 16,
    device  = 0,
    plots   = False
)
print("\n🏁 STAC-style final student metrics:", metrics)