In [1]:
import torch

In [2]:
def detect_events_on(frames):
    """ 
    Detects events/objects on given frames using YOLOv5. 

    Args:
        frames (list): List of paths to frame images.
    Returns:
        detections (dict): Mapping from frame path to detected objects.
    """

    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

    detections = {}

    for frame_path in frames:
        img = frame_path
        results = model(img)
        detected = results.pandas().xyxy[0]
        detections[frame_path] = detected.to_dict(orient='records')

    return detections

In [3]:
import cv2
import os

def extract_frames(video_path, output_folder, fps = 1):
    """ 
    Extracts frames from video at the specified frames for second rate.

    Args: 
        video_path (str): Path to the input video file.
        output_folder (str): Directory where extracted frames will be saved.
        fps (int): Frames per second rate for extraction. Default is 1.

    Returns:
        List of frame file paths extracted.
    """

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    cap = cv2.VideoCapture(video_path)
    original_fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(original_fps / fps)

    count = 0
    saved_frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if count % frame_interval == 0:
            frame_path = os.path.join(output_folder, f"frame_{count}.jpg")
            cv2.imwrite(frame_path, frame)
            saved_frames.append(frame_path)
        count += 1

    cap.release()
    return saved_frames

In [4]:
video_file = 'IMG_4876.MOV'
frames_folder = './extracted_frames'
frames = extract_frames(video_file, frames_folder, fps=1)
detections = detect_events_on(frames)
print(f"Detected objects in first frame: {detections[frames[0]]}")

Using cache found in C:\Users\10839330/.cache\torch\hub\ultralytics_yolov5_master


Creating new Ultralytics Settings v0.0.6 file  
View Ultralytics Settings with 'yolo settings' or at 'C:\Users\10839330\AppData\Roaming\Ultralytics\settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


  import pkg_resources as pkg
YOLOv5  2025-8-5 Python-3.13.1 torch-2.7.1+cpu CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|██████████| 14.1M/14.1M [00:03<00:00, 4.18MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Detected objects in first frame: [{'xmin': 1299.9686279296875, 'ymin': 339.4145202636719, 'xmax': 1490.21826171875, 'ymax': 898.4437866210938, 'confidence': 0.8889763355255127, 'class': 0, 'name': 'person'}, {'xmin': 884.81494140625, 'ymin': 338.6013488769531, 'xmax': 1036.012451171875, 'ymax': 883.3131103515625, 'confidence': 0.8867971897125244, 'class': 0, 'name': 'person'}, {'xmin': 1598.5797119140625, 'ymin': 380.02862548828125, 'xmax': 1857.048828125, 'ymax': 945.7855224609375, 'confidence': 0.885729193687439, 'class': 0, 'name': 'person'}, {'xmin': 1159.235595703125, 'ymin': 369.9943542480469, 'xmax': 1323.4622802734375, 'ymax': 880.0205078125, 'confidence': 0.8689597845077515, 'class': 0, 'name': 'person'}, {'xmin': 1020.5399780273438, 'ymin': 354.8042907714844, 'xmax': 1193.2943115234375, 'ymax': 873.4462890625, 'confidence': 0.8673372864723206, 'class': 0, 'name': 'person'}, {'xmin': 69.0732421875, 'ymin': 260.69989013671875, 'xmax': 376.37127685546875, 'ymax': 1068.8575439453

In [5]:
video_file = 'IMG_4939.MOV'
frames_folder = './extracted_frames'
frames = extract_frames(video_file, frames_folder, fps=1)
detections = detect_events_on(frames)
print(f"Detected objects in first frame: {detections[frames[0]]}")

Using cache found in C:\Users\10839330/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2025-8-5 Python-3.13.1 torch-2.7.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Detected objects in first frame: []


In [7]:
video_file = 'IMG_4049.MOV'
frames_folder = './extracted_frames'
frames = extract_frames(video_file, frames_folder, fps=1)
detections = detect_events_on(frames)
print(f"Detected objects in first frame: {detections[frames[7]]}")

Using cache found in C:\Users\10839330/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2025-8-5 Python-3.13.1 torch-2.7.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Detected objects in first frame: [{'xmin': 55.911346435546875, 'ymin': 232.64492797851562, 'xmax': 1071.026611328125, 'ymax': 1906.935791015625, 'confidence': 0.8457403182983398, 'class': 0, 'name': 'person'}]
