In [45]:
import cv2
import time
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
%matplotlib inline

## Specify the model to be used
COCO and MPI are body pose estimation model. COCO has 18 points and MPI has 15 points as output.

HAND is hand keypoints estimation model. It has 22 points as output

In [6]:
MODE = "MPI"

if MODE == "COCO":
    protoFile = "pose/coco/pose_deploy_linevec.prototxt"
    weightsFile = "pose/coco/pose_iter_440000.caffemodel"
    nPoints = 18
    POSE_PAIRS = [ [1,0],[1,2],[1,5],[2,3],[3,4],[5,6],[6,7],[1,8],[8,9],[9,10],[1,11],[11,12],[12,13],[0,14],[0,15],[14,16],[15,17]]

elif MODE == "MPI" :
    protoFile = "pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt"
    weightsFile = "pose/mpi/pose_iter_160000.caffemodel"
    nPoints = 15
    POSE_PAIRS = [[0,1], [1,2], [2,3], [3,4], [1,5], [5,6], [6,7], [1,14], [14,8], [8,9], [9,10], [14,11], [11,12], [12,13] ]
    

In [40]:
# Empty list to store the detected keypoints
def show_keypoints(frame):
    
    frameCopy = np.copy(frame)
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    threshold = 0.1
    
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
                          (0, 0, 0), swapRB=False, crop=False)

    net.setInput(inpBlob)

    output = net.forward()
    H = output.shape[2]
    W = output.shape[3]
    
    points = []
    
    for i in range(nPoints):
        # confidence map of corresponding body's part.
        probMap = output[0, i, :, :]
    
        # Find global maxima of the probMap.
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
        
        # Scale the point to fit on the original image
        x = (frameWidth * point[0]) / W
        y = (frameHeight * point[1]) / H
    
        if prob > threshold : 
            cv2.circle(frameCopy, (int(x), int(y)), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
            cv2.putText(frameCopy, "{}".format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
            cv2.circle(frame, (int(x), int(y)), 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
    
            # Add the point to the list if the probability is greater than the threshold
            points.append((int(x), int(y)))
        else :
            points.append(None)
    
    # Draw Skeleton
    for pair in POSE_PAIRS:
        partA = pair[0]
        partB = pair[1]
    
        if points[partA] and points[partB]:
            cv2.line(frame, points[partA], points[partB], (0, 255, 255), 3)
    
    return frame, points
#     plt.figure(figsize=[10,10])
#     plt.imshow(cv2.cvtColor(frameCopy, cv2.COLOR_BGR2RGB))
#     plt.figure(figsize=[10,10])
#     plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
#     plt.show()

In [51]:
def process_video(input_path, output_path):
    
    video_reader = cv2.VideoCapture(input_path)
    fps = int(video_reader.get(cv2.CAP_PROP_FPS))
    height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    
    if output_path:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    
        video_writer = cv2.VideoWriter(
            output_path, fourcc, fps, 
            (height, width))
    i = 0
    res_points = []
    with tqdm(total=int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)), position=0, leave=True) as pbar:
        while (video_reader.isOpened()):
            ret, frame = video_reader.read()
            if ret == True:
                i+=1
                
                frame, points = show_keypoints(frame)
                res_points.append(points)
                
                if output_path:
                    video_writer.write(frame)
        
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
                
                pbar.update(1)
            else:
                break
    
            
        if output_path:
            video_reader.release()
            video_writer.release()
            cv2.destroyAllWindows()
    return res_points

In [50]:
input_dir = 'test'
out_dir = 'out'
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
for video_name in os.listdir(input_dir):
    print (os.path.join(out_dir, video_name))
    res_points = process_video(os.path.join(input_dir, video_name), os.path.join(out_dir, video_name))
    
    with open(os.path.join(out_dir, video_name.split('.')[0] + '.txt'), "wb") as fp: 
        pickle.dump(res_points, fp)
    break

out/signer14_sample144_color.mp4
512


  0%|          | 0/66 [00:00<?, ?it/s]