In [13]:
import torch
import numpy as np
import cv2
from time import time

In [14]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
classes = model.names
model.to('cpu')

Using cache found in C:\Users\maksy/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-2-16 Python-3.10.10 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


AutoShape(
  (model): DetectMultiBackend(
    (model): DetectionModel(
      (model): Sequential(
        (0): Conv(
          (conv): Conv2d(3, 32, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
          (act): SiLU(inplace=True)
        )
        (1): Conv(
          (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (act): SiLU(inplace=True)
        )
        (2): C3(
          (cv1): Conv(
            (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv3): Conv(
            (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (m): Sequential(
            (0): Bottleneck(
              (cv1): Conv(
                (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
  

In [15]:
faceCascade = cv2.CascadeClassifier('./haarcascade_frontalface_alt2.xml')

In [16]:
cap = cv2.VideoCapture('../input_4k.mp4')
assert cap.isOpened()
x_shape = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
y_shape = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('output_4k.avi', cv2.VideoWriter_fourcc(*"MJPG"), 20, (x_shape, y_shape))

In [17]:
_ , prev_frame = cap.read()
prev_frame_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

#  Shi-Tomashi Corner Detection
mask = np.zeros_like(prev_frame_gray)
# mask[400:600, 1000:1300] = 255  #  1920 * 1080
mask[864:1200, 1100:2000] = 255  #  3840 * 2160
mask = mask.astype(np.uint8)
# mask = None  #  test
n_points = 20
kp_prev = cv2.goodFeaturesToTrack(prev_frame_gray, mask=mask, maxCorners=n_points, qualityLevel=0.5, minDistance=50, blockSize=10)

mask = np.zeros_like(prev_frame)
color = np.random.randint(0, 255, (n_points, 3))

In [18]:
count = 1  #  1120 for 1080p
while True:
    start_time = time()
    ret, frame = cap.read()
    try:
        assert ret
    except AssertionError:
        print('***** END *****')
        break
    
    #  Object detection
    results = model([frame])
    labels, cord = results.xyxyn[0][:, -1].numpy(), results.xyxyn[0][:, :-1].numpy()
    x_shape, y_shape = frame.shape[1], frame.shape[0]
    for i in range(len(labels)):
        row = cord[i]
        if row[4] >= 0.2:
            x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape)
            bgr = (0, 255, 0)
            cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 2)
            cv2.putText(frame, classes[int(labels[i])], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.9, bgr, 2)
    
    #  Face detection
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = faceCascade.detectMultiScale(frame_gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60), flags=cv2.CASCADE_SCALE_IMAGE)
    for (x,y,w,h) in faces:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255,0,0), 2)
    
    #  Lucas-Kanade optical flows calculation
    try:
        kp, st, err = cv2.calcOpticalFlowPyrLK(prev_frame_gray, frame_gray, kp_prev, None, winSize=(15, 15), maxLevel=3, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
        if kp is not None:
            good_new = kp[st == 1]
            good_old = kp_prev[st == 1]
            for i, (new, old) in enumerate(zip(good_new, good_old)):
                x1, y1 = new.ravel()
                x2, y2 = old.ravel()
                mask = cv2.line(mask, (int(x1),int(y1)), (int(x2),int(y2)), color[i].tolist(), 2)
                frame = cv2.circle(frame, (int(x1),int(y1)), 5, color[i].tolist(), -1)
            cv2.add(frame, mask)
            prev_frame_gray = frame_gray.copy()
            kp_prev = good_new.reshape(-1, 1, 2)
    except Exception as e:
        print(e)
    
    out.write(frame)
    print(f"frame {count} proccessing time : {time() - start_time}")
    count += 1

frame 1 proccessing time : 1.8054258823394775
frame 2 proccessing time : 1.7604734897613525
frame 3 proccessing time : 1.7516038417816162
frame 4 proccessing time : 1.7395682334899902
frame 5 proccessing time : 1.7550551891326904
frame 6 proccessing time : 1.7915711402893066
frame 7 proccessing time : 1.7713987827301025
frame 8 proccessing time : 1.773465633392334
frame 9 proccessing time : 1.7771320343017578
frame 10 proccessing time : 1.7693986892700195
frame 11 proccessing time : 1.8242881298065186
frame 12 proccessing time : 1.7737691402435303
frame 13 proccessing time : 1.7673981189727783
frame 14 proccessing time : 1.760645866394043
frame 15 proccessing time : 1.7463421821594238
frame 16 proccessing time : 1.8093030452728271
frame 17 proccessing time : 1.7808561325073242
frame 18 proccessing time : 1.774399757385254
frame 19 proccessing time : 1.786545991897583
frame 20 proccessing time : 1.7731800079345703
frame 21 proccessing time : 1.7754006385803223
frame 22 proccessing time 