In [1]:
import os
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
import torch
from ultralytics import settings
from ultralytics.utils.plotting import Annotator
import pickle

In [2]:
device = 'cuda' if torch.cuda.is_available else 'cpu'

In [3]:
model_files = glob.glob("pt models/yolov8s_epoch50/*")
model_files

['pt models/yolov8s_epoch50\\best.pt',
 'pt models/yolov8s_epoch50\\best.torchscript',
 'pt models/yolov8s_epoch50\\last.pt']

In [4]:
model = YOLO(model_files[0])

In [5]:
model.to(device)

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(96, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_s

In [6]:
video_files = glob.glob('video_footages/*')
video_files

['video_footages\\Raw Video_ Pittsburg Neighborhood Drive-By Shootings.mp4',
 'video_footages\\Shooting captured by surveillance camera in Parma.mp4',
 'video_footages\\Shopping, People, Commerce, Mall, Many, Crowd, Walking   Free Stock video footage   YouTube.mp4',
 'video_footages\\snow dispute.mp4',
 'video_footages\\Surveillance camera captures tanks entering Ukraine via Belarus border.mp4',
 'video_footages\\trail cam video creatures of the night.mp4',
 'video_footages\\Unbelievably busy bicycle crossing in Amsterdam.mp4',
 'video_footages\\vecteezy_people-crossing-the-road-on-zebra-tallin_28257759.mp4',
 'video_footages\\차흐르듯 (1).mp4',
 'video_footages\\차흐르듯.mp4']

In [7]:
labels = model.names
len(labels)

80

In [8]:
cc = torch.randint(0, 255, size=(len(labels), 3)).to(device)

In [9]:
cc[0] = torch.tensor([0, 0, 255], dtype=torch.long)

In [10]:
detect_classes = set(["person", "bicycle", "car", "motorcycle", "airplane",
                      "bus", "train", "truck", "boat", "bird", "cat", "dog",
                      "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe"])

In [11]:
file_path = 'turkish_labels_2.pickle'
turkish_labels = None
with open(file_path, 'rb') as file:
    turkish_labels = pickle.load(file)

In [12]:
for video in video_files:
    cap = cv2.VideoCapture(video)
    ret, frame = cap.read()
    h, w, l = frame.shape
    n_h = 360 #int(h/1.5)
    n_w = 600 #int(w/1.5)
    
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            frame = cv2.resize(frame, (n_w, n_h))
            results = model.predict(frame, verbose=False)
            for r in results:
                annotator = Annotator(frame, pil=True)
                
                boxes = r.boxes
                for box in boxes:
                    b = box.xyxy[0]
                    c = box.cls
                    if labels[int(c)] in detect_classes:
                        annotator.box_label(b, turkish_labels[int(c)], color=(int(cc[int(c)][0]),
                                                                  int(cc[int(c)][1]),
                                                                  int(cc[int(c)][2])))
                        
                        #cv2.rectangle(frame, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (int(cc[int(c)][0]),
                        #                                                                      int(cc[int(c)][1]),
                        #                                                                      int(cc[int(c)][2])), thickness=2)

                        #frame = cv2.putText(frame, turkish_labels[int(c)],
                        #                    (int(b[0]), int(b[1]-3)), 
                        #                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
                        #                    (255,
                        #                    255,
                        #                    255),
                        #                    thickness=1)
                        
                        
    
            img = annotator.result()  
            cv2.imshow('YOLO V8 Detection', img)
    
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            
    
        else:
            break
    
    cap.release()
    
    cv2.destroyAllWindows()