In [18]:
!pip install ultralytics datasets pillow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [19]:
import os
from datasets import load_dataset
from ultralytics import YOLO
import torch


In [20]:
# Define paths
BASE_DIR = os.getcwd()  # Current working directory
OUTPUT_DIR = os.path.join(BASE_DIR, 'brain_tumor_yolo')
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Normalize bounding boxes to YOLO format
def normalize_bbox(bbox, image_size):
    image_width, image_height = image_size
    x_min, y_min, box_width, box_height = bbox
    x_center = (x_min + box_width / 2) / image_width
    y_center = (y_min + box_height / 2) / image_height
    box_width /= image_width
    box_height /= image_height
    return x_center, y_center, box_width, box_height

# Save YOLO `data.yaml`
def save_data_yaml(output_dir, num_classes):
    """
    Save a YOLO-compatible data.yaml file and return its path.
    """
    data_yaml = {
        "train": os.path.join(output_dir, "train", "images"),
        "val": os.path.join(output_dir, "validation", "images"),
        "test": os.path.join(output_dir, "test", "images"),  # Optional, include if test split exists
        "nc": num_classes,
        "names": ["negative", "positive"]  # Add your class names here
    }
    yaml_path = os.path.join(output_dir, "data.yaml")
    with open(yaml_path, 'w') as yaml_file:
        for key, value in data_yaml.items():
            yaml_file.write(f"{key}: {value if isinstance(value, str) else value}\n")
    print(f"Saved YOLO data.yaml to {yaml_path}")
    return yaml_path



In [21]:
# Load and clean dataset
def load_and_clean_dataset():
    dataset = load_dataset("mmenendezg/brain-tumor-object-detection")
    for split in ['train', 'validation', 'test']:
        os.makedirs(os.path.join(OUTPUT_DIR, split, 'images'), exist_ok=True)
        os.makedirs(os.path.join(OUTPUT_DIR, split, 'labels'), exist_ok=True)
        process_split(dataset[split], split)

def process_split(dataset_split, split_name):
    images_dir = os.path.join(OUTPUT_DIR, split_name, 'images')
    labels_dir = os.path.join(OUTPUT_DIR, split_name, 'labels')

    for idx, example in enumerate(dataset_split):
        image_path = os.path.join(images_dir, f"{idx:06d}.jpg")
        example['image'].save(image_path)
        label_path = os.path.join(labels_dir, f"{idx:06d}.txt")
        with open(label_path, 'w') as label_file:
            for bbox, label in zip(example['objects']['bbox'], example['objects']['label']):
                x_center, y_center, width, height = normalize_bbox(bbox, example['image'].size)
                label_file.write(f"{label} {x_center} {y_center} {width} {height}\n")


In [22]:
# Train YOLOv8
def train_yolo(data_yaml, model_type='yolov8n', epochs=3, img_size=640):
    model = YOLO(f"{model_type}.pt")  # Load a pretrained YOLOv8 model
    model.train(
        data=data_yaml,
        epochs=epochs,
        imgsz=img_size,
        device="cuda" if torch.cuda.is_available() else "cpu"
    )
    return model


In [23]:
# Evaluate YOLOv8
def evaluate_yolo(model, data_yaml):
    results = model.val(data=data_yaml, split="test")
    print("Evaluation Results on Test Split:")
    print(results)


In [24]:
# Step 1: Load and clean the dataset
load_and_clean_dataset()

# Step 2: Save the data.yaml file
yaml_path = save_data_yaml(OUTPUT_DIR, num_classes=2)

print(yaml_path)

Saved YOLO data.yaml to /Users/nathanelkhoury/Desktop/School/Fall2024/COMP3106/finalproj/brain_tumor_yolo/data.yaml
/Users/nathanelkhoury/Desktop/School/Fall2024/COMP3106/finalproj/brain_tumor_yolo/data.yaml


In [25]:
# Step 3: Train YOLOv8
model = train_yolo(data_yaml=yaml_path, model_type='yolov8n', epochs=3, img_size=640)

# Step 4: Evaluate on the test split
evaluate_yolo(model, data_yaml=yaml_path)


New https://pypi.org/project/ultralytics/8.3.38 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.37 🚀 Python-3.12.6 torch-2.5.1 CPU (Apple M2 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/Users/nathanelkhoury/Desktop/School/Fall2024/COMP3106/finalproj/brain_tumor_yolo/data.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cpu, workers=8, project=None, name=train4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=N

[34m[1mtrain: [0mScanning /Users/nathanelkhoury/Desktop/School/Fall2024/COMP3106/finalproj/brain_tumor_yolo/train/labels.cache... 614 images, 0 backgrounds, 0 corrupt: 100%|██████████| 614/614 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /Users/nathanelkhoury/Desktop/School/Fall2024/COMP3106/finalproj/brain_tumor_yolo/validation/labels.cache... 264 images, 0 backgrounds, 0 corrupt: 100%|██████████| 264/264 [00:00<?, ?it/s]


Plotting labels to /Users/nathanelkhoury/Desktop/School/Fall2024/COMP3106/finalproj/runs/detect/train4/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/Users/nathanelkhoury/Desktop/School/Fall2024/COMP3106/finalproj/runs/detect/train4[0m
Starting training for 3 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/39 [00:01<?, ?it/s]


KeyboardInterrupt: 