# Data Preparation


## JSON Conversion
### COCO Format to YOLO Machine Learning Format

In [None]:
from ultralytics.data.converter import convert_coco

In [None]:
convert_coco (
    labels_dir='data', # Target is a JSON file in 'Data' directory
    save_dir='yolo_annos',
    use_keypoints= False,
    use_segments=True
)

Annotations /Users/mitchellpalmer/Projects/solafune-canopy-capstone-clean/data/train.json: 100%|██████████| 150/150 [00:00<00:00, 281.76it/s]

COCO data converted successfully.
Results saved to /Users/mitchellpalmer/Projects/solafune-canopy-capstone-clean/yolo_annos2





## Data Split | Train / Evaluation

- Automatic dataset split with Python. Using transferable code for designated directory paths

- Training Data split 70:30 between **Training** and **Evaluation**

In [None]:
import os, random, shutil
from pathlib import Path

# Paths
IMG_DIR = Path("model-data/images copy/train")
LBL_DIR = Path("model-data/labels copy/train")

OUT_DIR = Path("data")
splits = {"train": 0.7, "val": 0.3, "test": 0.0}  # 70/30/00 split

# Collect all images
images = list(IMG_DIR.glob("*.jpg")) + list(IMG_DIR.glob("*.png")) + list(IMG_DIR.glob("*.tif")) # glob is from package Path
random.shuffle(images)

# Split indices
n = len(images)
train_end = int(splits["train"] * n)
val_end = train_end + int(splits["val"] * n)

datasets = {
    "train": images[:train_end],
    "val": images[train_end:val_end],
    "test": images[val_end:],
}

# Copy files into YOLO structure
for split, files in datasets.items():
    (OUT_DIR / "images" / split).mkdir(parents=True, exist_ok=True)
    (OUT_DIR / "labels" / split).mkdir(parents=True, exist_ok=True)

    for img in files:
        label = LBL_DIR / (img.stem + ".txt")
        shutil.copy(img, OUT_DIR / "images" / split / img.name)
        if label.exists():
            shutil.copy(label, OUT_DIR / "labels" / split / label.name)


## YOLO Model

In [None]:
import ultralytics
from ultralytics import YOLO
import numpy as np
import time

# Load a pretrained model
model = YOLO('yolo11s-seg.pt') # yolo version 11s segementation

# Train the model
results = model.train(
                    data = 'training_configuration_tree_canopy-seg.yaml', # Data Source
                    device='mps',        # your Apple Metal GPU
                    epochs=100,          # number of training epochs
                    imgsz=416,           # image size (default 640)
                    batch=1,             # batch size
                    seed=0,               # Faced numers NMS issues. Stating for comparisons across parameter changes/model changes

                    name='training_fastNMS',
                    conf=0.5,           # filter low-confidence preds early
                    iou=0.5,             # merge more aggressively in NMS
                    max_det=100,         # cap kept detections per image
                    val=True,
                    plots=False,           # don't draw plots every val
                    agnostic_nms=True,     # merge across classes
                    workers=2              # keep small on macOS/MPS
                    # save_json=False,     # skip COCO json export during train
                    # workers=4            # if your CPU can handle it
                    )

In [None]:
# Initial run on local MacOS with MPS GPU

# import torch
# torch.mps.empty_cache()

In [None]:
# ---- Example Formatting For YOLO Model Phases ----

# from ultralytics import YOLO

# # Create a new YOLO model from scratch
# model = YOLO("yolo11n.yaml")

# # Load a pretrained YOLO model (recommended for training)
# model = YOLO("yolo11n.pt")

# # Train the model using the 'coco8.yaml' dataset for 3 epochs
# results = model.train(data="coco8.yaml", epochs=3)

# # Evaluate the model's performance on the validation set
# results = model.val()

# # Perform object detection on an image using the model
# results = model("https://ultralytics.com/images/bus.jpg")

# # Export the model to ONNX format
# success = model.export(format="onnx")