In [4]:
import torch
import cv2
from torchvision import transforms
import numpy as np
from src.utils.datasets import letterbox
from src.utils.general import non_max_suppression_kpt
from src.utils.plots import output_to_keypoint
import torchinfo

INPUT = "../data/20230307_143020.mp4"  # video
# INPUT = "../data/video.mp4"
device = torch.device("cuda:0")  #select device
try:
    model = torch.hub.load("WongKinYiu/yolov7", "yolov7")['model']
except:
    pass
model = torch.load('../ml_models/yolov7-w6-pose.pt', map_location=device)['model']
_ = model.float().eval()
torchinfo.summary(model, input_size=(1, 3, 960, 960))
# if torch.cuda.is_available():
#     model.half().to(device)

Using cache found in C:\Users\jurek/.cache\torch\hub\WongKinYiu_yolov7_main

                 from  n    params  module                                  arguments                     
  0                -1  1       928  models.common.Conv                      [3, 32, 3, 1]                 
  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  2                -1  1     36992  models.common.Conv                      [64, 64, 3, 1]                
  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  4                -1  1      8320  models.common.Conv                      [128, 64, 1, 1]               
  5                -2  1      8320  models.common.Conv                      [128, 64, 1, 1]               
  6                -1  1     36992  models.common.Conv                      [64, 64, 3, 1]                
  7                -1  1     36992  models.common.Conv             

Layer (type:depth-idx)                             Output Shape              Param #
Model                                              [1, 57375, 57]            --
├─Sequential: 1-1                                  --                        --
│    └─ReOrg: 2-1                                  [1, 12, 480, 480]         --
│    └─Conv: 2-2                                   [1, 64, 480, 480]         --
│    │    └─Conv2d: 3-1                            [1, 64, 480, 480]         6,912
│    │    └─BatchNorm2d: 3-2                       [1, 64, 480, 480]         128
│    │    └─SiLU: 3-3                              [1, 64, 480, 480]         --
│    └─Conv: 2-3                                   [1, 128, 240, 240]        --
│    │    └─Conv2d: 3-4                            [1, 128, 240, 240]        73,728
│    │    └─BatchNorm2d: 3-5                       [1, 128, 240, 240]        256
│    │    └─SiLU: 3-6                              [1, 128, 240, 240]        --
│    └─Conv: 2-4          

In [5]:
def plot_skele(im, kpts, steps, orig_shape=None):
    skel = [[6, 7], [7, 9], [9, 11], [6, 8], [8, 10]]
    num_kpts = len(kpts) // steps
    r = 255
    g = 255
    b = 255
    radius = 3
    for kid in range(num_kpts):
        if kid not in [1, 5, 6, 7, 8, 9, 10]:
            continue
        if kid == 1:
            label = "head"
        elif kid == 9:
            label = "L hand"
        elif kid == 10:
            label = "R hand"
        else:
            label = str(kid)
        x_coord, y_coord = kpts[steps * kid], kpts[steps * kid + 1]
        cv2.circle(im, (int(x_coord), int(y_coord)), radius, (int(r), int(g), int(b)), -1)
        cv2.putText(im, label, (int(x_coord), int(y_coord)), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1,
                    cv2.LINE_AA)
    for sk_id, sk in enumerate(skel):
        pos1 = (int(kpts[(sk[0] - 1) * steps]), int(kpts[(sk[0] - 1) * steps + 1]))
        pos2 = (int(kpts[(sk[1] - 1) * steps]), int(kpts[(sk[1] - 1) * steps + 1]))
        cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2)

In [7]:
cap = cv2.VideoCapture(INPUT)
with torch.no_grad():
    while cap.isOpened():
        ret, frame = cap.read()
        # frame = letterbox(frame, 960, stride=64, auto=True)[0]
        frame = cv2.resize(frame, (960,960), interpolation=cv2.INTER_LINEAR)

        img = transforms.ToTensor()(frame)
        img = torch.tensor(np.array([img.numpy()])).cuda()
        # img = img.half().to(device)
        output, _ = model(img)
        output = non_max_suppression_kpt(output, 0.25, 0.65, nc=model.yaml['nc'], nkpt=model.yaml['nkpt'],
                                         kpt_label=True)
        output = output_to_keypoint(output)
        nimg = img[0].permute(1, 2, 0) * 255
        nimg = nimg.cpu().numpy().astype(np.uint8)
        nimg = cv2.cvtColor(nimg, cv2.IMREAD_COLOR)
        # cv2.putText(nimg, f"{output.shape[0]}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        for idx in range(output.shape[0]):
            plot_skele(nimg, output[idx, 7:].T, 3)

        cv2.imshow('image', nimg)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        # del frame, img, output, nimg
        # torch.cuda.empty_cache()

cap.release()
cv2.destroyAllWindows()