In [1]:
import sys
import os
import cv2
from pathlib import Path
from PIL import Image, ImageSequence
import numpy as np
from ultralytics import YOLO

In [2]:
# Find pyproject.toml in parent directories and add its directory to sys.path
current_path = Path.cwd()
for parent in [current_path] + list(current_path.parents):
    pyproject_file = parent / "pyproject.toml"
    if pyproject_file.exists():
        sys.path.insert(0, str(parent))
        break

In [3]:
# Create output directory and save result images
output_dir = current_path.parent / "output"
output_dir.mkdir(exist_ok=True)

In [4]:
# Load a pre-trained YOLO model (e.g., YOLOv8s for small, yolov8n for nano)
model = YOLO("yolo12l.pt") 

In [9]:
gif_path: Path = Path("../images/syamu_cake.gif")

In [10]:
# Open GIF and iterate frames
im = Image.open(gif_path)


In [11]:
frames_out = []
durations = []

In [12]:
for frame in ImageSequence.Iterator(im):
    durations.append(frame.info.get("duration", 100))  # ms per frame (default 100)
    rgb = frame.convert("RGB")
    arr = np.array(rgb)

    # Run YOLO prediction on the frame (array in RGB)
    results = model(arr)
    # Get annotated image (numpy array)
    annotated = results[0].plot()

    # Convert annotated array back to PIL Image and collect
    frames_out.append(Image.fromarray(annotated))

# Save annotated frames back to a GIF
if frames_out:
    out_path = output_dir / f"{gif_path.stem}_predicted.gif"
    frames_out[0].save(
        out_path,
        save_all=True,
        append_images=frames_out[1:],
        duration=durations,
        loop=0,
        optimize=False,
    )
    print("Saved predicted GIF to:", out_path)
else:
    print("No frames extracted from:", gif_path)


0: 384x640 1 person, 1 bowl, 39.9ms
Speed: 11.1ms preprocess, 39.9ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bowl, 38.8ms
Speed: 1.1ms preprocess, 38.8ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bowl, 38.4ms
Speed: 1.2ms preprocess, 38.4ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bowl, 34.9ms
Speed: 0.9ms preprocess, 34.9ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bowl, 33.3ms
Speed: 0.9ms preprocess, 33.3ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bowl, 32.0ms
Speed: 0.7ms preprocess, 32.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bowl, 31.7ms
Speed: 1.1ms preprocess, 31.7ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bowl, 32.8ms
Speed: 0.7ms preprocess,