In [None]:
# main training code for my video segmentation project, that will eventually lean on "Human Motion Recognition
# Using Isomap and Dynamic Time Warping" by Blackburn and Ribeiro to use the silhouettes/masks for dimensionality reduction purposes in biomechanics

# will likely be updated further for iterative training, and possibly 3d capability

# if interested in preparing your own project of this type, you will need to build a dataset
# my recommendation for building an image or video dataset is cvat.ai, where you can manually draw polygons and then download the masks
# use a different script in this same Github repo to convert your masks to .txt coordinate files and train from those + the natural PNGs

# this youtube creator has an excellent tutorial on this and all things computer vision:
# https://www.youtube.com/watch?v=aVKGjzAUHz0&list=PLb49csYFtO2HAdNGChGzohFJGnJnXBOqd

# Ryan Gunther
# Brock University MSc in Statistics
# written in 2025
# my bluesky account: https://bsky.app/profile/ryangunther1.bsky.social

# seeking jobs in data science and professional baseball

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# create data.yaml file for yolo to read and understand the structure of my folders
# should output roughly a 70-20-10 training / val / testing split
import os, glob


DATASET_DIR = "/content/drive/MyDrive/BaseballCV/Segmentation_training_2"
DATA = os.path.join(DATASET_DIR, "data.yaml")


assert os.path.exists(DATA), f"data.yaml not found at {DATA}"

print("Using dataset:", DATASET_DIR)

# quick counts of images/labels per split
for split in ["train","val","test"]:
    img_count = len(glob.glob(os.path.join(DATASET_DIR, "images", split, "*.png")))
    lbl_count = len(glob.glob(os.path.join(DATASET_DIR, "labels", split, "*.txt")))
    print(f"{split:5s} images={img_count:3d}  labels={lbl_count:3d}")

In [None]:
# train! (use Colab and free GPU access, or 150 epochs will never end)
!pip -q install ultralytics

from ultralytics import YOLO
import torch, os
# directory for my dataset:
DATASET_DIR = "/content/drive/MyDrive/BaseballCV/Segmentation_training_2"

# referencing the above data file
fixed_yaml = f"""path: {DATASET_DIR}
train: images/train
val: images/val
test: images/test
names:
  0: hitter
"""

with open("/content/data_fixed.yaml", "w") as f:
    f.write(fixed_yaml)

from ultralytics import YOLO
model = YOLO("yolov8n-seg.pt")
model.train(
    data="/content/data_fixed.yaml",
    imgsz=960, # bigger seems to be better generally, but also would take more time to train
    epochs=150,
    batch=16,
    patience=25, # this is basically saying "if there's no improvement for 25 straight epochs, kill the training early"
    pretrained=True,
    device=0,
    project="/content/drive/MyDrive/yolo_runs",
    name="hitter_seg"
)


In [None]:
# testing on unseen video footage

from ultralytics import YOLO
model = YOLO("/content/drive/MyDrive/yolo_runs/hitter_seg2/weights/best.pt")

model.predict(
    source="/content/bogaerts_testing_recode.mp4",
    imgsz=960,
    conf=0.5,       # baseline confidence required to identify a player
    iou=0.6,
    augment=True,
    save=True, save_txt=True, device=0, max_det=5
)

In [None]:
# check if the video can be read properly - I had some issues with incorrect .mp4 types
# will read False if video is not being read correctly
import cv2
cap = cv2.VideoCapture("/content/bogaerts_testing_recode.mp4")
ok, frame = cap.read()
print("Frame read:", ok, "shape:" if ok else "", None if not ok else frame.shape)
cap.release()

In [None]:
# not necessarily required every time, but if above reads False, this will convert to a suitable .mp4 type if video is not working properly
!ffmpeg -y -i "/content/drive/MyDrive/BaseballCV/bogaerts_testing_clip.mp4" \
  -c:v libx264 -preset fast -crf 20 -c:a aac \
  "/content/bogaerts_testing_recode.mp4"

In [None]:
# not necessary but I used this to grab a 10fps sample of new video to throw it into my training set. Hence the first testing run I did was on a 20 annotated/ 100 unseen frames video of Bogaerts
# sample 10 fps to train quickly (used this to grab a random sample of frames of the first bogaerts video)
# eventually tested on completely unseen

!mkdir -p /content/new_swing_frames
!ffmpeg -y -i /content/bogaerts_recode.mp4 -vf "fps=10" /content/new_swing_frames/%06d.png

In [None]:
# zip the 20 sampled pngs (10 fps * 2 secs) and download them
!zip -r /content/new_swing_frames.zip /content/new_swing_frames

from google.colab import files
files.download("/content/new_swing_frames.zip")

In [None]:
#### not really using this at the moment but it can be used to update training instead of starting again

# tack on updated training including the new 20 Xander bogaerts frames to see if that can predict on the remaining xander frames

!pip -q install ultralytics
from ultralytics import YOLO

BEST = "/content/drive/MyDrive/yolo_runs/hitter_seg2/weights/best.pt"
DATA = "/content/drive/MyDrive/BaseballCV/Segmentation_training_clean/data.yaml"

model = YOLO(BEST)

model.train(
    data=DATA,
    imgsz=960,
    epochs=40,
    batch=16,
    lr0=5e-4,
    patience=12,
    device=0,
    project="/content/drive/MyDrive/yolo_runs",
    name="hitter_seg_adapt"
)
