In [2]:
# Dataset sanity-check + summary (run this cell as-is)
from pathlib import Path
from collections import Counter
import yaml

DATASET_ROOT = Path(r"D:\My Projects\Brain Dataset\CT-Brain-Segmentation-1")

# 1) Read data.yaml
data_yaml = DATASET_ROOT / "data.yaml"
with open(data_yaml, "r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)

print("== data.yaml ==")
print(f"nc: {cfg.get('nc')}")
print(f"names: {cfg.get('names')}")
print(f"train: {cfg.get('train')}")
print(f"val:   {cfg.get('val')}")
print(f"test:  {cfg.get('test')}")

# 2) Count files per split
print("\n== split counts ==")
for split in ["train", "valid", "test"]:
    img_dir = DATASET_ROOT / split / "images"
    lbl_dir = DATASET_ROOT / split / "labels"
    img_count = len(list(img_dir.glob("*")))
    lbl_count = len(list(lbl_dir.glob("*.txt")))
    print(f"{split}: images={img_count}, labels={lbl_count}")

# 3) Count annotation rows per class id
class_counts = Counter()
label_files = list((DATASET_ROOT / "train" / "labels").glob("*.txt")) \
            + list((DATASET_ROOT / "valid" / "labels").glob("*.txt")) \
            + list((DATASET_ROOT / "test" / "labels").glob("*.txt"))

for lf in label_files:
    with open(lf, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            cls_id = line.split()[0]
            class_counts[cls_id] += 1

print("\n== annotation rows by class id ==")
for k in sorted(class_counts.keys(), key=int):
    print(f"class_{k}: {class_counts[k]}")
print(f"total annotation rows: {sum(class_counts.values())}")


== data.yaml ==
nc: 2
names: ['0', '1']
train: ../train/images
val:   ../valid/images
test:  ../test/images

== split counts ==
train: images=9395, labels=9395
valid: images=738, labels=738
test: images=725, labels=725

== annotation rows by class id ==
class_0: 4547
class_1: 6389
total annotation rows: 10936


In [3]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"


In [4]:
# 2) Train YOLOv8 segmentation
from ultralytics import YOLO

data_yaml = r"D:\My Projects\Brain Dataset\CT-Brain-Segmentation-1\data.yaml"
model = YOLO("yolov8n-seg.pt")  # small/fast starter model

model.train(
    data=data_yaml,
    epochs=50,
    imgsz=640,
    batch=8,
    project="runs_brain_ct",
    name="yolov8n_seg_baseline"
)


Ultralytics 8.4.14  Python-3.13.11 torch-2.10.0+cpu CPU (Intel Core i7-8850H 2.60GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\My Projects\Brain Dataset\CT-Brain-Segmentation-1\data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, end2end=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=0.0, name=yolov8n_seg_baseline4, nbs=64, nms=False, opset=None, optimize=

KeyboardInterrupt: 

In [None]:
model.val(data=data_yaml)
