In [6]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from matplotlib import pyplot as plt
import cv2

In [7]:
model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
movenet = model.signatures['serving_default']

In [66]:
def connect_keypoints(frame, keypoints, edges, confidence):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1,x1,c1 = shaped[p1]
        y2,x2,c2 = shaped[p2]
        if (c1 > confidence) & (c2 > confidence):
            cv2.line(frame, (int(x1), int(y1)), (int(x2),int(y2)), (0,0,255), 2)

In [62]:
#  confidence = confidence threshold
def render_keypoints(frame, keypoints, confidence):
    y,x,c = frame.shape
#     squeeze makes it so you don't have to do keypoints[0][0][0]... when multiple arrays are wrapped
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for keypoint in shaped:
        ky, kx, kp_conf = keypoint
        if kp_conf > confidence:
            cv2.circle(frame, (int(kx), int(ky)), 4, (0,0,255),-1)

In [64]:
# keypoints that connect
edges = {
    (0,1):'m',
    (0,2):'c',
    (1,3):'m',
    (2,4):'c',
    (0,5):'m',
    (0,6):'c',
    (5,7):'m',
    (7,9):'m',
    (6,8):'c',
    (8,10):'c',
    (5,6):'y',
    (5,11):'m',
    (6,12):'c',
    (11,12):'y',
    (11,13):'m',
    (13,15):'m',
    (12,14):'c',
    (14,16):'c'
}

In [67]:
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    
#     reshape to 192x192x3 for predictions
    img = frame.copy()
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0),192,192)
    input_image = tf.cast(img, dtype=tf.int32)
    
#     make predictions
    outputs = movenet(input_image)
    keypoints = outputs['output_0']
    keypoints = np.array(keypoints)
    
#     render and connect keypoints on original image
    connect_keypoints(frame,keypoints,edges,0.4)
    render_keypoints(frame,keypoints,0.4)
        
    cv2.imshow('MoveNet Lightning',frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            break
        
cap.release()
cv2.destroyAllWindows()

# testing

In [23]:
cap.release()
cv2.destroyAllWindows()

In [49]:
keypoints = np.array(keypoints)
keypoints

array([[[[0.31869435, 0.6149604 , 0.578938  ],
         [0.28373867, 0.6319184 , 0.78487885],
         [0.2896815 , 0.5712187 , 0.58937466],
         [0.29884943, 0.6353476 , 0.42747188],
         [0.31400707, 0.48757836, 0.59329516],
         [0.48371086, 0.70575416, 0.7718184 ],
         [0.47461396, 0.418931  , 0.5442802 ],
         [0.652882  , 0.8587491 , 0.5514015 ],
         [0.41454837, 0.10369816, 0.5412239 ],
         [0.7121167 , 0.99589   , 0.17565593],
         [0.24730925, 0.30501956, 0.5433493 ],
         [0.87306684, 0.7398648 , 0.41355956],
         [0.89467096, 0.5097911 , 0.3946021 ],
         [0.85224026, 1.0009174 , 0.07488456],
         [0.8737728 , 0.4558462 , 0.04254714],
         [0.88131344, 0.6757777 , 0.02033579],
         [0.25546405, 0.293768  , 0.12477973]]]], dtype=float32)

In [51]:
right_eye = keypoints[0][0][2]
left_elbow = keypoints[0][0][7]

In [55]:
right_eye

array([0.2896815 , 0.5712187 , 0.58937466], dtype=float32)

In [58]:
right_eye[0:2]

array([0.2896815, 0.5712187], dtype=float32)

In [59]:
#    480 640 image original dimensions(height, width) for my webcame
np.array(right_eye[0:2]*[480,640]).astype(int)

array([139, 365])