# 1. Import Libraries

In [3]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
from matplotlib import pyplot as plt
import numpy as np

## 2. GPU Check & Prevent Memory Throw

In [6]:
gpu_check = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpu_check:
    tf.config.experimental.set_memory_growth(gpu, True)
print(gpu_check)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# 3. Load Model

In [7]:
model = hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1')

In [8]:
# extract the model
exr_movenet = model.signatures['serving_default']

# 4. Make Detections

In [34]:
# Function to loop through each person detected and render
def loop_through_people(frame, keypoints_with_scores, edges, confidence_threshold):
    for person in keypoints_with_scores:
        draw_connections(frame, person, edges, confidence_threshold)
        draw_keypoints(frame, person, confidence_threshold)

In [35]:
# Online Video Capture
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    
    # Resize image
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 256, 256) # recommended minimum shape size is 256, shape size is dependent on frame
    
    # Convert image to integer
    convert_image = tf.cast(img, dtype=tf.int32)
    
    # Detection
    captured_res = exr_movenet(convert_image)
    keypoints_with_scores = captured_res['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    #print(captured_res)
    
    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.1) # edges is defined of confidence threshold
    
    cv2.imshow('Movenet Multipose', frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [10]:
frame

array([[[217, 201, 191],
        [217, 201, 191],
        [217, 201, 191],
        ...,
        [116, 112, 110],
        [116, 112, 110],
        [116, 112, 110]],

       [[217, 201, 191],
        [217, 201, 191],
        [217, 201, 191],
        ...,
        [117, 112, 110],
        [118, 112, 110],
        [118, 112, 110]],

       [[219, 201, 191],
        [218, 201, 191],
        [217, 201, 191],
        ...,
        [116, 112, 110],
        [116, 112, 110],
        [116, 112, 110]],

       ...,

       [[  9,   9,   9],
        [  9,   9,   9],
        [  9,   9,   9],
        ...,
        [ 57,  44,  35],
        [ 77,  65,  57],
        [ 98,  86,  78]],

       [[  9,   9,   9],
        [  9,   9,   9],
        [  9,   9,   9],
        ...,
        [ 94,  81,  72],
        [ 56,  44,  36],
        [ 34,  22,  14]],

       [[  9,   9,   9],
        [  9,   9,   9],
        [  9,   9,   9],
        ...,
        [127, 113, 104],
        [135, 121, 113],
        [117, 103,  95]]

In [None]:
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

In [36]:
# Detection via Video
cap = cv2.VideoCapture('#filename.mp4') # the longest side of the video should be a minimum of 256 pixels and both values need to be a multipile of 32 
while cap.isOpened():
    ret, frame = cap.read()
    
    # Resize image
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 384, 640) # recommended minimum shape size is 256, shape size is dependent on frame
    
    # Convert image to integer
    convert_image = tf.cast(img, dtype=tf.int32)
    
    # Detection
    captured_res = exr_movenet(convert_image)
    keypoints_with_scores = captured_res['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    #print(captured_res)
    
    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.1) # edges is defined of confidence threshold
    
    cv2.imshow('Movenet Multipose', frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [27]:
# Check a frame size
frame.shape

(1080, 1920, 3)

In [28]:
# Get similar aspect ratio to existing captured image
1080 / 1920

0.5625

In [30]:
360 / 0.5625

640.0

In [31]:
# Values need to be a multipile of 32 
360 / 32

11.25

In [32]:
# Approximate value
32 * 12

384

In [19]:
# Check array values of captured image 
# in the format y_0, x_0, s_0, y_1, x_1, s_1 
# the order of 17 keypoint: [nose, left_eye, right_eye, left_ear, right_ear, 
# left_shoulder, right_shoulder, left_elbow, right_elbow, left_wrist, right_wrist,
# left_hip , right_hip, left_knee, right_knee, left_ankle & right_ankle]
 
keypoints_with_scores

array([[[0.25835884, 0.5408215 , 0.05128162],
        [0.24413782, 0.53405464, 0.05231812],
        [0.24122077, 0.53682107, 0.02042377],
        [0.22610329, 0.5441764 , 0.0272614 ],
        [0.21237728, 0.56046075, 0.00351454],
        [0.28716594, 0.57216674, 0.586503  ],
        [0.25457135, 0.64240664, 0.2207593 ],
        [0.4229686 , 0.58244807, 0.42630887],
        [0.3455741 , 0.7109286 , 0.3591295 ],
        [0.48520324, 0.5320727 , 0.30815354],
        [0.43212816, 0.7534434 , 0.02635468],
        [0.4918021 , 0.6258785 , 0.92411363],
        [0.48787254, 0.670015  , 0.87747365],
        [0.6930932 , 0.5859362 , 0.48881927],
        [0.6957552 , 0.6709063 , 0.75257313],
        [0.8826117 , 0.5557196 , 0.1138115 ],
        [0.8701302 , 0.691549  , 0.53081506]],

       [[0.44438303, 0.16775623, 0.41339478],
        [0.44094476, 0.17436382, 0.4793458 ],
        [0.43957376, 0.16225688, 0.38548675],
        [0.44962955, 0.1819278 , 0.4439863 ],
        [0.44872457, 0.15537277,

# 3. Draw Keypoints

In [13]:
# Draw keypoints for each person and action
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 6, (0,255,0), -1)

# 4. Draw Edges Connections

In [14]:
# edges of joints, eg. 0 is nose and 1 is left eye, so m is a connection of
# nose and left eye
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [15]:
# render results
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 4)