In [34]:

import json

import cv2
import torch

In [10]:
def get_frames_from_video(video_path, fps=1) -> list:
    """
    Get frames from video with specified fps.

    :param video_path: path to video.
    :param fps: frames per second.
    :return: list of frames.
    """

    frames = []

    video = cv2.VideoCapture(video_path)
    video_fps = int(video.get(cv2.CAP_PROP_FPS))
    video_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    fps_interval = video_fps // fps

    for frame_i in range(0, video_frames, fps_interval):
        video.set(cv2.CAP_PROP_POS_FRAMES, frame_i)
        ret, frame = video.read()

        if ret:
            frames.append(frame)

    video.release()

    return frames

In [11]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# MODEL = torch.hub.load("ultralytics/yolov5", "yolov5x")
MODEL = torch.hub.load("ultralytics/yolov5", "custom", "./model_weights.pt")
MODEL.to(DEVICE)

Using cache found in C:\Users\Bas_K/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-12-6 Python-3.11.5 torch-2.1.1 CUDA:0 (NVIDIA GeForce GTX 1060, 6144MiB)

Fusing layers... 
Model summary: 322 layers, 86482948 parameters, 0 gradients, 204.7 GFLOPs
Adding AutoShape... 


AutoShape(
  (model): DetectMultiBackend(
    (model): DetectionModel(
      (model): Sequential(
        (0): Conv(
          (conv): Conv2d(3, 80, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
          (act): SiLU(inplace=True)
        )
        (1): Conv(
          (conv): Conv2d(80, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (act): SiLU(inplace=True)
        )
        (2): C3(
          (cv1): Conv(
            (conv): Conv2d(160, 80, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(160, 80, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv3): Conv(
            (conv): Conv2d(160, 160, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (m): Sequential(
            (0): Bottleneck(
              (cv1): Conv(
                (conv): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1

In [12]:
STREAM_DOWNLOAD_LOCATION = "../stream_downloader/streams"

In [45]:
videos = [
    STREAM_DOWNLOAD_LOCATION + "/HsLvnFQW_yM/20231202_115928.mp4",
    # STREAM_DOWNLOAD_LOCATION + "/Ihr_nwydXi0/20231202_115928.mp4",
    # STREAM_DOWNLOAD_LOCATION + "/yPSYdCWRWFA/20231202_115927.mp4",
]

In [14]:
frames = get_frames_from_video(videos[0], 1)

In [15]:
type(frames[0])

numpy.ndarray

In [47]:
for video in videos:
    frames = get_frames_from_video(video, 1)

In [48]:
MODEL(frames)

YOLOv5 <class 'models.common.Detections'> instance
image 1/15: 1080x1920 2 Sea lions, 3 Whales
image 2/15: 1080x1920 1 Sea lion
image 3/15: 1080x1920 1 Sea lion, 1 Whale
image 4/15: 1080x1920 1 Sea lion, 2 Whales
image 5/15: 1080x1920 3 Whales
image 6/15: 1080x1920 1 Sea lion, 1 Whale
image 7/15: 1080x1920 1 Polar bear, 1 Sea lion, 1 Whale
image 8/15: 1080x1920 1 Polar bear, 2 Whales
image 9/15: 1080x1920 1 Sea lion, 1 Whale
image 10/15: 1080x1920 1 Bear, 1 Sea lion, 1 Whale
image 11/15: 1080x1920 2 Sea lions, 2 Whales
image 12/15: 1080x1920 2 Sea lions, 1 Whale
image 13/15: 1080x1920 1 Monkey, 1 Penguin, 1 Whale
image 14/15: 1080x1920 1 Monkey
image 15/15: 1080x1920 1 Penguin
Speed: 2.5ms pre-process, 40.6ms inference, 5.8ms NMS per image at shape (15, 3, 384, 640)

In [43]:
for video in videos:
    frames = get_frames_from_video(video, 1)

    animals = []
    animal_count = {}
    animal_surfaces = {}

    for frame in frames:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        results = MODEL(frame)

        # results.show()
        results.save()

        findings = results.pandas().xyxy[0]
        findings = findings[findings["confidence"] >= 0.2]

        for i in range(len(findings)):

            if findings["name"][i] not in animals:
                animals.append(findings["name"][i])

                animal_count[findings["name"][i]] = 0
                animal_surfaces[findings["name"][i]] = 0

            animal_surfaces[findings["name"][i]] += (findings["xmax"][i] - findings["xmin"][i]) * (
                    findings["ymax"][i] - findings["ymin"][i]
            )
            animal_count[findings["name"][i]] += 1

    # Make counts and surfaces averages.
    animal_count = {key: count / len(frames) for key, count in animal_count.items()}
    animal_surfaces = {key: count / len(frames) for key, count in animal_surfaces.items()}

    print(f"{video}:")
    print(f" Animals: {animals}")
    print(f" Count: {animal_count}")
    print(f" Surfaces: {animal_surfaces}\n")
    
    json_output = {
        "video": video,
        "animals": animals,
        "count": animal_count,
        "surfaces": animal_surfaces
    }

    with open(f"./runs/statistics_{video.split('/')[3]}.json", "w") as outfile:
        json.dump(json_output, outfile)

Saved 1 image to [1mruns\detect\exp[0m
Saved 1 image to [1mruns\detect\exp2[0m
Saved 1 image to [1mruns\detect\exp3[0m
Saved 1 image to [1mruns\detect\exp4[0m
Saved 1 image to [1mruns\detect\exp5[0m
Saved 1 image to [1mruns\detect\exp6[0m
Saved 1 image to [1mruns\detect\exp7[0m
Saved 1 image to [1mruns\detect\exp8[0m
Saved 1 image to [1mruns\detect\exp9[0m
Saved 1 image to [1mruns\detect\exp10[0m
Saved 1 image to [1mruns\detect\exp11[0m
Saved 1 image to [1mruns\detect\exp12[0m
Saved 1 image to [1mruns\detect\exp13[0m
Saved 1 image to [1mruns\detect\exp14[0m
Saved 1 image to [1mruns\detect\exp15[0m


../stream_downloader/streams/HsLvnFQW_yM/20231202_115928.mp4:
 Animals: ['Polar bear', 'Sea lion', 'Whale', 'Bear', 'Monkey', 'Hippopotamus']
 Count: {'Polar bear': 1.8666666666666667, 'Sea lion': 0.26666666666666666, 'Whale': 0.26666666666666666, 'Bear': 0.26666666666666666, 'Monkey': 0.13333333333333333, 'Hippopotamus': 0.06666666666666667}
 Surfaces: {'Polar bear': 46235.19481799246, 'Sea lion': 5961.117777845555, 'Whale': 12082.980895255507, 'Bear': 5978.521441647255, 'Monkey': 4126.422238499252, 'Hippopotamus': 1824.7468914647898}
