In [1]:
import torch
from torchvision import transforms

from utils.datasets import letterbox
from utils.general import non_max_suppression_kpt
from utils.plots import output_to_keypoint, plot_skeleton_kpts

import matplotlib.pyplot as plt
import cv2
import numpy as np

fc = 1
#na = np.empty((1, 51))
na = []
currentKeypoints = []
c=0

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def load_model():
    model = torch.load('yolov7-w6-pose.pt', map_location=device)['model']
    # Put in inference mode
    model.float().eval()

    if torch.cuda.is_available():
        # half() turns predictions into float16 tensors
        # which significantly lowers inference time
        model.half().to(device)
    return model

model = load_model()

In [3]:
def run_inference(image):
    # Resize and pad image
    image = letterbox(image, 960, stride=64, auto=True)[0] # shape: (567, 960, 3)
    # Apply transforms
    image = transforms.ToTensor()(image) # torch.Size([3, 567, 960])
    if torch.cuda.is_available():
        image = image.half().to(device)
    # Turn image into batch
    image = image.unsqueeze(0) # torch.Size([1, 3, 567, 960])
    with torch.no_grad():
        output, _ = model(image)
    return output, image

In [17]:
def draw_keypoints(output, image):
    global fc, c
    output = non_max_suppression_kpt(output, 
                                     0.02, # Confidence Threshold
                                     0.1, # IoU Threshold
                                     nc=model.yaml['nc'], # Number of Classes
                                     nkpt=model.yaml['nkpt'], # Number of Keypoints
                                     kpt_label=True)
    #0.2, 0.4
    with torch.no_grad():
        output = output_to_keypoint(output)
        #print(f'Frame Number: {fc}; Data Size: {output.shape}')
    try:
        t = output[0] #retrieves only first skeleton data
        t = t[-51:] #retrieves last 51 elements
        #t = t[::3] cuts every third element (confidence level)
        t = [x for i, x in enumerate(t) if (i+1)%3 != 0]
        na.append(t)
        currentKeypoints = []
    except:
        #currentKeypoints = 
        c += 1
        #na.append([null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null,null, null])
    nimg = image[0].permute(1, 2, 0) * 255
    nimg = nimg.cpu().numpy().astype(np.uint8)
    nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)

    
    
    for idx in range(output.shape[0]):
        plot_skeleton_kpts(nimg, output[idx, 7:].T, 3)
    
    
        
    return nimg

In [49]:
currentFrame = 0
fCount = 1

def swimPose_estimate(filename):
    global fc
    cap = cv2.VideoCapture(filename)
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, currentFrame)
    
    # VideoWriter for saving the video
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter('Free_Skel.mp4', fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))
    while cap.isOpened():
        (ret, frame) = cap.read()
        if ret == True:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            output, frame = run_inference(frame)
            frame = draw_keypoints(output, frame)
            
            #frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
            #frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
            output, frame = run_inference(frame)
            frame = draw_keypoints(output, frame)
            
            
            
            
            
            
            fc += 1 #
            frame = cv2.resize(frame, (int(cap.get(3)), int(cap.get(4))))
            frame = cv2.resize(frame,(720,1280),fx=0,fy=0, interpolation = cv2.INTER_CUBIC)
            out.write(frame)
            cv2.imshow('Pose estimation', frame)
        else:
            break

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [46]:
def pose_estimation_video(filename):
    global fc
    cap = cv2.VideoCapture(filename)
    # VideoWriter for saving the video
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter('Free_Skel.mp4', fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))
    while cap.isOpened():
        (ret, frame) = cap.read()
        if ret == True:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            #frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
            #frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
            output, frame = run_inference(frame)
            print(frame.shape)
            frame = draw_keypoints(output, frame)
            fc += 1 #
            frame = cv2.resize(frame, (int(cap.get(3)), int(cap.get(4))))
            frame = cv2.resize(frame,(1280,720),fx=0,fy=0, interpolation = cv2.INTER_CUBIC)
            out.write(frame)
            cv2.imshow('Pose estimation', frame)
        else:
            break

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [45]:
def pose_estimation_video2(filename):
    global fc
    cap = cv2.VideoCapture(filename)
    # VideoWriter for saving the video
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter('Free_Skel.mp4', fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))
    while cap.isOpened():
        (ret, frame) = cap.read()
        if ret == True:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            #frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
            frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
            output, frame = run_inference(frame)
            frame = draw_keypoints(output, frame)
            fc += 1 #
            frame = cv2.resize(frame, (int(cap.get(3)), int(cap.get(4))))
            frame = cv2.resize(frame,(720,720),fx=0,fy=0, interpolation = cv2.INTER_CUBIC)
            out.write(frame)
            cv2.imshow('Pose estimation', frame)
        else:
            break

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [47]:
#video = "C:/Users/jonso/OneDrive/Desktop/IMG_5498.MOV"
video = "C:/Users/jonso/OneDrive/Desktop/Free Training Data.mp4"
na = []
c=0
pose_estimation_video(video)
skelData = np.array(na)
skelData = skelData.reshape(skelData.shape[0], 17, 2)
print(skelData.shape)
print(f'{c} total empty frames')
#print(na)
#print(skelData)
#np.save('Fly_Skel_Training.npy', skelData)

torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
torch.Size([1, 3, 576, 960])
(21, 17, 2)
6 total empty frames


In [43]:
#video = "C:/Users/jonso/OneDrive/Desktop/IMG_5498.MOV"
video = "C:/Users/jonso/OneDrive/Desktop/Free Training Data.mp4"
na = []
c=0
pose_estimation_video2(video)
skelData = np.array(na)
skelData = skelData.reshape(skelData.shape[0], 17, 2)
print(skelData.shape)
print(f'{c} total empty frames')
#print(na)
print(skelData)
#np.save('Fly_Skel_Training.npy', skelData)

(14542, 17, 2)
208 total empty frames
[[[      278.5       516.5]
  [     282.25       508.5]
  [     272.25      509.25]
  ...
  [     285.25         899]
  [        282       958.5]
  [     283.25       958.5]]

 [[        280         508]
  [     285.25         500]
  [        273       500.5]
  ...
  [     281.25         871]
  [     272.75         945]
  [      270.5       949.5]]

 [[      279.5         523]
  [      284.5         515]
  [     272.25       516.5]
  ...
  [      290.5         878]
  [     279.75       913.5]
  [        272       916.5]]

 ...

 [[     361.75       384.5]
  [      365.5      378.75]
  [      359.5      379.75]
  ...
  [        393         543]
  [     350.75         585]
  [     405.25       599.5]]

 [[      375.5         387]
  [     377.25         382]
  [     371.25      382.75]
  ...
  [     389.25       544.5]
  [        349         588]
  [     403.75       598.5]]

 [[     380.75      382.75]
  [      380.5       378.5]
  [     375.25      

In [33]:
#video = "C:/Users/jonso/OneDrive/Desktop/IMG_5498.MOV"
video = "C:/Users/jonso/OneDrive/Desktop/Free Training Data.mp4"
na = []
c=0
swimPose_estimate(video)
skelData = np.array(na)
skelData = skelData.reshape(skelData.shape[0], 17, 2)
print(skelData.shape)
print(f'{c} total empty frames')
#print(na)
print(skelData)
#np.save('Fly_Skel_Training.npy', skelData)

(291, 17, 2)
0 total empty frames
[[[        268      446.75]
  [      267.5      437.75]
  [        262         441]
  ...
  [        277       777.5]
  [      330.5       889.5]
  [     268.25         899]]

 [[      284.5       457.5]
  [        283      449.75]
  [      274.5       453.5]
  ...
  [        277       774.5]
  [      336.5       885.5]
  [     259.75         881]]

 [[     284.25       457.5]
  [     282.75         450]
  [      274.5      453.75]
  ...
  [        277       774.5]
  [      336.5       885.5]
  [     259.75         881]]

 ...

 [[     283.75       469.5]
  [     281.25       462.5]
  [      281.5       464.5]
  ...
  [      279.5       760.5]
  [        343         871]
  [      265.5         873]]

 [[      261.5      439.25]
  [     259.25         436]
  [     258.75         439]
  ...
  [     277.25         762]
  [      339.5       862.5]
  [     265.75         864]]

 [[        276       461.5]
  [     274.25       454.5]
  [     273.75       456

In [129]:
def preprocess(videoPath, savePath):
    global fc, c, na, skelData
    c=0
    video = videoPath
    na = []
    pose_estimation_video(video)
    skelData = np.array(na)
    skelData = skelData.reshape(skelData.shape[0], 17, 2)
    print(skelData.shape)
    print(f'{c} total empty frames')
    #print(na)
    print(skelData[0])
    np.save(savePath, skelData)

In [130]:
preprocess('C:/Users/jonso/OneDrive/Desktop/Free Training Data.mp4', 'Free_Skel_Training.npy')
preprocess("C:/Users/jonso/OneDrive/Desktop/Fly Training Data.mp4", 'Fly_Skel_Training.npy')
preprocess("C:/Users/jonso/OneDrive/Desktop/Back Training Data.mp4", 'Back_Skel_Training.npy')
preprocess("C:/Users/jonso/OneDrive/Desktop/Breast Training Data.mp4", 'Breast_Skel_Training.npy')
preprocess("C:/Users/jonso/OneDrive/Desktop/Underwater Training Data.mp4", 'Underwater_Skel_Training.npy')
preprocess("C:/Users/jonso/OneDrive/Desktop/Dive Training Data.mp4", 'Dive_Skel_Training.npy')

(13973, 17, 2)
2226 total empty frames
[[     58.906      180.62]
 [     64.375       172.5]
 [     48.688      171.38]
 [     70.562         178]
 [     26.406      175.75]
 [       78.5      218.12]
 [     11.016      214.25]
 [      96.25      258.25]
 [        7.5         257]
 [     66.625       263.5]
 [     17.562      260.75]
 [     65.188      276.25]
 [     22.391         275]
 [      98.25       255.5]
 [     29.688      257.75]
 [     49.844       262.5]
 [     30.797         263]]
(19048, 17, 2)
1976 total empty frames
[[        524      339.75]
 [      517.5      334.25]
 [        521      343.25]
 [        525      322.25]
 [        528         337]
 [        567         308]
 [      563.5       325.5]
 [     505.75      287.25]
 [     496.25       298.5]
 [        447       270.5]
 [      445.5       275.5]
 [        632       309.5]
 [      627.5      319.75]
 [      563.5       285.5]
 [        557       295.5]
 [        513       271.5]
 [        501         294]]
(1

In [None]:
"""
Frame Number: 84; Data Size: (1, 58)
        
        batch_id, class_id, x, y, w, h, conf
        
[[          0           0      724.97      320.72      201.62      94.438     0.55844         
  0 699       321.5     0.24487         
  1 700      319.75     0.17566         
  2 697         322    0.088501       
  3 708.5      316.75     0.28247       
  4 700.5      320.75     0.13757         
  5 729         301     0.81641         
  6 700        314.5      0.8335       
  7 753.5      290.75     0.87598       
  8 677.5      307.25     0.87158         
  9 756        316.25     0.85107       
  10 650.5      322        0.85352       
  11 756.5      305        0.88818       
  12 744.5      313.5      0.89648         
  13 794        314.75     0.7168       
  14 786.5      325.75     0.74707
  15 792.5      307.5      0.67725         
  16 785        307.5      0.70459]]
"""