In [1]:
import os
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
import torch
from ultralytics import settings
from ultralytics.utils.plotting import Annotator
import pickle

In [2]:
device = 'cuda' if torch.cuda.is_available else 'cpu'

In [3]:
model = YOLO('yolov8m.pt')

In [4]:
model.to(device)

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_

In [5]:
video_files = glob.glob('video_footages/*')
video_files

['video_footages\\4K Road traffic video for object detection and tracking - free download now!.mp4',
 'video_footages\\Central Park People Watching.mp4',
 'video_footages\\Creatures of the Night - trail cam videos.mp4',
 'video_footages\\Dutch bike rush hour this morning was smooth as ever.mp4',
 'video_footages\\Free City Street Footage - Royalty Free Stock Footage - People Walking Stock Footage No Copyright.mp4',
 'video_footages\\morning bike traffic during commute to Dutch university.mp4',
 'video_footages\\People Walking Free Stock Footage, Royalty-Free No Copyright Content.mp4',
 'video_footages\\Raw Video_ Pittsburg Neighborhood Drive-By Shootings.mp4',
 'video_footages\\Road traffic video for object recognition.mp4',
 'video_footages\\Shooting captured by surveillance camera in Parma.mp4',
 'video_footages\\Shopping, People, Commerce, Mall, Many, Crowd, Walking   Free Stock video footage   YouTube.mp4',
 'video_footages\\snow dispute.mp4',
 'video_footages\\Surveillance camera 

In [6]:
labels = model.names
len(labels)

80

In [7]:
cc = torch.randint(0, 255, size=(len(labels), 3)).to(device)

In [11]:
cc[0] = torch.tensor([0, 0, 255], dtype=torch.long)

In [8]:
detect_classes = set(["person", "bicycle", "car", "motorcycle", "airplane",
                      "bus", "train", "truck", "boat", "bird", "cat", "dog",
                      "horse", "sheep", "cow", "bear"])

In [9]:
file_path = 'turkish_labels_2.pickle'
turkish_labels = None
with open(file_path, 'rb') as file:
    turkish_labels = pickle.load(file)

In [14]:
back_sub = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=100, detectShadows=True)
#thresh=25
kernel = np.ones((5, 5), np.uint8)


for video in video_files[:5]:
    cap = cv2.VideoCapture(video)
    ret, frame = cap.read()
    h, w, l = frame.shape
    n_h = int(h/2)
    n_w = int(w/2)
    
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            frame = cv2.resize(frame, (n_w, n_h))
            fg_mask = back_sub.apply(frame)
            fg_mask = cv2.dilate(fg_mask, kernel, iterations=1)
            new_frame = cv2.bitwise_and(frame, frame, mask=fg_mask)
            results = model.predict(new_frame, verbose=False)
            for r in results:
                annotator = Annotator(frame, pil=True)
                
                boxes = r.boxes
                for box in boxes:
                    b = box.xyxy[0]
                    #b_int = b.type(torch.int)
                    c = box.cls
                    if labels[int(c)] in detect_classes:
                        #if fg_mask[b_int[1]:b_int[3], b_int[0]:b_int[2]].mean() > thresh:
                        annotator.box_label(b, turkish_labels[int(c)], color=(int(cc[int(c)][0]),
                                                                              int(cc[int(c)][1]),
                                                                              int(cc[int(c)][2])))
                
            
            
            img = annotator.result()
            #cv2.imshow('mask', fg_mask)
            cv2.imshow('multiply', new_frame)
            cv2.imshow('YOLO V8 Detection', img)
    
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            
    
        else:
            break
    
    cap.release()
    
    cv2.destroyAllWindows()