In [1]:
import torch
from torchvision import transforms

from utils.datasets import letterbox
from utils.general import non_max_suppression_kpt
from utils.plots import output_to_keypoint, plot_skeleton_kpts

import matplotlib.pyplot as plt
import cv2
import numpy as np
import statistics

import math

fc = 1
#na = np.empty((1, 51))
na = []
actualKp = []
currentKeypoints = []
c=0
lK = []
nK = []
la = []

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def load_model():
    model = torch.load('yolov7-w6-pose.pt', map_location=device)['model']
    # Put in inference mode
    model.float().eval()

    if torch.cuda.is_available():
        # half() turns predictions into float16 tensors
        # which significantly lowers inference time
        model.half().to(device)
    return model

model = load_model()

In [3]:
def run_inference(image):
    # Resize and pad image
    image = letterbox(image, 960, stride=64, auto=True)[0] # shape: (567, 960, 3)
    # Apply transforms
    image = transforms.ToTensor()(image) # torch.Size([3, 567, 960])
    if torch.cuda.is_available():
        image = image.half().to(device)
    # Turn image into batch
    image = image.unsqueeze(0) # torch.Size([1, 3, 567, 960])
    with torch.no_grad():
        output, _ = model(image)
    return output, image

In [4]:
def draw_keypoints(output, image):
    global fc, c, t, nK, lK, na, nc
    output = non_max_suppression_kpt(output, 
                                     0.04, # Confidence Threshold
                                     0.25, # IoU Threshold
                                     nc=model.yaml['nc'], # Number of Classes
                                     nkpt=model.yaml['nkpt'], # Number of Keypoints
                                     kpt_label=True)
    #0.4, 0.25
    with torch.no_grad():
        output = output_to_keypoint(output)
        #print(f'Frame Number: {fc}; Data Size: {output.shape}')
    try:
        t = output[0] #retrieves only first skeleton data
        t = t[-51:] #retrieves last 51 elements
        #t = t[::3] cuts every third element (confidence level)
        t = [x for i, x in enumerate(t) if (i+1)%3 != 0]
        #append all nose x coords
        nK.append(t[10])
        #appends all hip x coords
        lK.append(t[22]) #array t is unsorted, the x values is every other starting at index 0 and y values every other starting at 1
        #index 22 gives the x coordinate for right hip joint
        na.append(t)
        currentKeypoints = []
    except:
        #currentKeypoints = 
        c += 1
    nimg = image[0].permute(1, 2, 0) * 255
    nimg = nimg.cpu().numpy().astype(np.uint8)
    nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)

    
    
    for idx in range(output.shape[0]):
        plot_skeleton_kpts(nimg, output[idx, 7:].T, 3)
    
    
        
    return nimg

In [5]:
video = "C:/Users/jonso/OneDrive/Desktop/Testing Data 2.mp4"
#video = "C:/Users/jonso/OneDrive/Desktop/Free Training Data.mp4"
path = 'test.npy'
#swimPose_estimate(video, path)

In [6]:
nc = 0
fa = []
def swimPose_estimate(filename, savepath):
    global fc, c, t, nK, lK, na, fa
    
    cap = cv2.VideoCapture(filename)
    totalFrames = math.floor(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))/32)
    print(f'TF: {totalFrames}')
    i = 0
    fa = []
    
    while i < totalFrames:
        na = []
        fc = 0
        nK = []
        lK = []
        c=0
        poseH(filename, "none", i*32)
        print(f'Original data: {c} empty frames')
        #print("nK: ",nK)
        #print("lK: ",lK)

        #print("-=-=-=-=-=-=-=-")
        #print("nK mode: ",statistics.median(nK))
        #print("lK mode: ",statistics.median(lK))
        #print(c)
        cap.release()
        cv2.destroyAllWindows()
        
        if c > 5:
            fc = 0
            c=0
            if statistics.median(nK) > statistics.median(lK):
                na = []
                #print("needs counterclockwise rotation")
                poseH(filename, "cc", i*32)
                print("transformation completed")
                print(f'Counterclockwise rotation; {c} empty frames')
                if c <= 5:
                    fa.extend(na)
                else:
                    print("too many missing frames, batch discarded")
            else:
                na = []
                #print("needs clockwise rotation")
                poseH(filename, "c", i*32)
                print("transformation completed")
                print(f'Clockwise rotation; {c} empty frames')
                if c <= 5:
                    fa.extend(na)
                else:
                    print("too many missing frames, batch discarded")
        else:
            print("no change necessary")
            fa.extend(na)

        
        #print(na[10])
        #skelData = np.array(na)
        #skelData = skelData.reshape(skelData.shape[0], 17, 2)
        #print(skelData[10])
    
        i += 1
        print(f'batch {i} complete')
        
    print("=======================================================")
    print("-----------Skeleton Data Extraction Complete-----------")
    print("=======================================================")

    x = np.array(fa)
    x = np.reshape(x, (x.shape[0], 17, 2))
    print(f'Array shape: {x.shape}')
    np.save(savepath, x)
    print(f'Data saved to: {savepath}')
    
    print("=======================================================")
    cv2.destroyAllWindows()

In [7]:
def swimPose_train(filename, savepath, labelpath):
    global fc, c, t, nK, lK, na, fa, la
    
    cap = cv2.VideoCapture(filename)
    totalFrames = math.floor(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))/32)
    print(f'TF: {totalFrames}')
    i = 0
    fa = []
    la = []
    
    while i < totalFrames:
        na = []
        fc = 0
        nK = []
        lK = []
        c=0
        poseH(filename, "none", i*32)
        print(f'Original data: {c} empty frames')
        #print("nK: ",nK)
        #print("lK: ",lK)

        #print("-=-=-=-=-=-=-=-")
        #print("nK mode: ",statistics.median(nK))
        #print("lK mode: ",statistics.median(lK))
        #print(c)
        cap.release()
        #cv2.destroyAllWindows() #remove
        
        if c > 5:
            cv2.destroyAllWindows()
            fc = 0
            c=0
            if statistics.median(nK) > statistics.median(lK):
                na = []
                #print("needs counterclockwise rotation")
                poseH(filename, "cc", i*32)
                print("transformation completed")
                print(f'Counterclockwise rotation; {c} empty frames')
                if c <= 5:
                    fa.extend(na)
                else:
                    print("too many missing frames, batch discarded")
            else:
                na = []
                #print("needs clockwise rotation")
                poseH(filename, "c", i*32)
                print("transformation completed")
                print(f'Clockwise rotation; {c} empty frames')
                if c <= 5:
                    fa.extend(na)
                else:
                    print("too many missing frames, batch discarded")
        else:
            print("no change necessary")
            fa.extend(na)

        #print(na[10])
        #skelData = np.array(na)
        #skelData = skelData.reshape(skelData.shape[0], 17, 2)
        #print(skelData[10])
    
        i += 1
        print(f'batch {i} complete')
        if c<=10:
            while True:
                key = cv2.waitKey(0)

                if key == ord('0'):
                    print("Batch labeled as Freestyle")
                    [la.append([0]) for _ in range(32)]
                    break
                elif key == ord('1'):
                    print("Batch labeled as Butterfly")
                    [la.append([1]) for _ in range(32)]
                    break
                elif key == ord('2'):
                    print("Batch labeled as Backstroke")
                    [la.append([2]) for _ in range(32)]
                    break
                elif key == ord('3'):
                    print("Batch labeled as Breastroke")
                    [la.append([3]) for _ in range(32)]
                    break
                elif key == ord('4'):
                    print("Batch labeled as Underwater")
                    [la.append([4]) for _ in range(32)]
                    break
                elif key == ord('5'):
                    print("Batch labeled as Dive")
                    [la.append([5]) for _ in range(32)]
                    break
                """
                elif key == ord('d'):
                    print("Batch Manually Discarded")
                """
            cv2.destroyAllWindows()
        
    print("=======================================================")
    print("-----------Skeleton Data Extraction Complete-----------")
    print("=======================================================")

    x = np.array(fa)
    x = np.reshape(x, (x.shape[0], 17, 2))
    a = np.array(la)
    while a.shape[0] != x.shape[0]:
        a = np.delete(a, -1, axis=0)
    print(f'Array shape: {x.shape}')
    print(f'Label shape: {a.shape}')
    np.save(savepath, x)
    np.save(labelpath, a)
    print(f'Skeleton Data saved to: {savepath}')
    print(f'Label Data saved to: {labelpath}')
    
    print("=======================================================")
    #cv2.destroyAllWindows()
    

In [8]:
def poseH(filename, rotation, currentFrame):
    global fc, c, t, nK, lK, na
    #cv2.destroyAllWindows()

    cap = cv2.VideoCapture(filename)
    # VideoWriter for saving the video
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter('Free_Skel.mp4', fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, currentFrame)

    
    while fc < 32 and cap.isOpened():
        (ret, frame) = cap.read()
        if ret == True:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            if rotation == "cc":
                frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
            elif rotation == "c":
                frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
            elif rotation == "none":
                pass

            output, frame = run_inference(frame)
            frame = draw_keypoints(output, frame)
            fc += 1
            
            frame = cv2.resize(frame, (int(cap.get(3)), int(cap.get(4))))
            
            if rotation == "cc" or rotation == "c":
                frame = cv2.resize(frame,(720,1280),fx=0,fy=0, interpolation = cv2.INTER_CUBIC)
            else:
                frame = cv2.resize(frame,(1280,720),fx=0,fy=0, interpolation = cv2.INTER_CUBIC)
            out.write(frame)
            cv2.imshow('Pose estimation', frame)
        else:
            break

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    out.release()
    #cv2.destroyAllWindows()


In [9]:
%%time
video = "C:/Users/jonso/OneDrive/Desktop/Testing Data 2.mp4"
path = 'skel2_test.npy'
labelPath = 'label2_test.npy'
#swimPose_train(video, path, labelPath)
#swimPose_estimate(video, path)
#counterclockwise

CPU times: total: 0 ns
Wall time: 0 ns


In [10]:
%%time
video = "C:/Users/jonso/OneDrive/Desktop/Free Training Data.mp4"
path = 'Free_Skel_Training.npy'
swimPose_estimate(video, path)
#counterclockwise

TF: 285


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Original data: 17 empty frames
transformation completed
Counterclockwise rotation; 2 empty frames
batch 1 complete
Original data: 16 empty frames
transformation completed
Counterclockwise rotation; 0 empty frames
batch 2 complete
Original data: 12 empty frames
transformation completed
Counterclockwise rotation; 0 empty frames
batch 3 complete
Original data: 18 empty frames
transformation completed
Counterclockwise rotation; 0 empty frames
batch 4 complete
Original data: 6 empty frames
transformation completed
Counterclockwise rotation; 0 empty frames
batch 5 complete
Original data: 18 empty frames
transformation completed
Counterclockwise rotation; 0 empty frames
batch 6 complete
Original data: 9 empty frames
transformation completed
Counterclockwise rotation; 0 empty frames
batch 7 complete
Original data: 13 empty frames
transformation completed
Counterclockwise rotation; 0 empty frames
batch 8 complete
Original data: 8 empty frames
transformation completed
Counterclockwise rotation; 

In [11]:
%%time
video = "C:/Users/jonso/OneDrive/Desktop/Fly Training Data.mp4"
path = 'Fly_Skel_Training.npy'
swimPose_estimate(video, path)
#counterclockwise

TF: 236
Original data: 0 empty frames
no change necessary
batch 1 complete
Original data: 0 empty frames
no change necessary
batch 2 complete
Original data: 2 empty frames
no change necessary
batch 3 complete
Original data: 0 empty frames
no change necessary
batch 4 complete
Original data: 0 empty frames
no change necessary
batch 5 complete
Original data: 0 empty frames
no change necessary
batch 6 complete
Original data: 0 empty frames
no change necessary
batch 7 complete
Original data: 0 empty frames
no change necessary
batch 8 complete
Original data: 0 empty frames
no change necessary
batch 9 complete
Original data: 0 empty frames
no change necessary
batch 10 complete
Original data: 0 empty frames
no change necessary
batch 11 complete
Original data: 0 empty frames
no change necessary
batch 12 complete
Original data: 0 empty frames
no change necessary
batch 13 complete
Original data: 1 empty frames
no change necessary
batch 14 complete
Original data: 0 empty frames
no change necessary

In [12]:
%%time
video = "C:/Users/jonso/OneDrive/Desktop/Back Training Data.mp4"
path = 'Back_Skel_Training.npy'
swimPose_estimate(video, path)
#counterclockwise

TF: 231
Original data: 0 empty frames
no change necessary
batch 1 complete
Original data: 0 empty frames
no change necessary
batch 2 complete
Original data: 0 empty frames
no change necessary
batch 3 complete
Original data: 0 empty frames
no change necessary
batch 4 complete
Original data: 0 empty frames
no change necessary
batch 5 complete
Original data: 2 empty frames
no change necessary
batch 6 complete
Original data: 5 empty frames
no change necessary
batch 7 complete
Original data: 8 empty frames
transformation completed
Clockwise rotation; 1 empty frames
batch 8 complete
Original data: 8 empty frames
transformation completed
Counterclockwise rotation; 0 empty frames
batch 9 complete
Original data: 17 empty frames
transformation completed
Counterclockwise rotation; 0 empty frames
batch 10 complete
Original data: 0 empty frames
no change necessary
batch 11 complete
Original data: 0 empty frames
no change necessary
batch 12 complete
Original data: 0 empty frames
no change necessary


In [13]:
%%time
video = "C:/Users/jonso/OneDrive/Desktop/Breast Training Data.mp4"
path = 'Breast_Skel_Training.npy'
swimPose_estimate(video, path)
#counterclockwise

TF: 223
Original data: 3 empty frames
no change necessary
batch 1 complete
Original data: 3 empty frames
no change necessary
batch 2 complete
Original data: 0 empty frames
no change necessary
batch 3 complete
Original data: 0 empty frames
no change necessary
batch 4 complete
Original data: 0 empty frames
no change necessary
batch 5 complete
Original data: 3 empty frames
no change necessary
batch 6 complete
Original data: 2 empty frames
no change necessary
batch 7 complete
Original data: 0 empty frames
no change necessary
batch 8 complete
Original data: 2 empty frames
no change necessary
batch 9 complete
Original data: 2 empty frames
no change necessary
batch 10 complete
Original data: 0 empty frames
no change necessary
batch 11 complete
Original data: 7 empty frames
transformation completed
Clockwise rotation; 3 empty frames
batch 12 complete
Original data: 6 empty frames
transformation completed
Counterclockwise rotation; 9 empty frames
too many missing frames, batch discarded
batch 1

In [14]:
%%time
video = "C:/Users/jonso/OneDrive/Desktop/Underwater Training Data.mp4"
path = 'Underwater_Skel_Training.npy'
swimPose_estimate(video, path)
#counterclockwise

TF: 55
Original data: 11 empty frames
transformation completed
Clockwise rotation; 0 empty frames
batch 1 complete
Original data: 1 empty frames
no change necessary
batch 2 complete
Original data: 0 empty frames
no change necessary
batch 3 complete
Original data: 5 empty frames
no change necessary
batch 4 complete
Original data: 0 empty frames
no change necessary
batch 5 complete
Original data: 0 empty frames
no change necessary
batch 6 complete
Original data: 1 empty frames
no change necessary
batch 7 complete
Original data: 19 empty frames
transformation completed
Counterclockwise rotation; 7 empty frames
too many missing frames, batch discarded
batch 8 complete
Original data: 0 empty frames
no change necessary
batch 9 complete
Original data: 0 empty frames
no change necessary
batch 10 complete
Original data: 0 empty frames
no change necessary
batch 11 complete
Original data: 1 empty frames
no change necessary
batch 12 complete
Original data: 7 empty frames
transformation completed
C

In [15]:
%%time
video = "C:/Users/jonso/OneDrive/Desktop/Dive Training Data.mp4"
path = 'Dive_Skel_Training.npy'
#swimPose_estimate(video, path)
#counterclockwise

CPU times: total: 0 ns
Wall time: 0 ns
