In [None]:
pip list

# Import the required packages

In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
from matplotlib import pyplot as plt
import numpy as np

# Load the Movenet Model

In [3]:
model=hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1')
movenet=model.signatures['serving_default']

# Standard webfeed/video capture code- Detections

In [4]:
#standard webcam capture code
cap=cv2.VideoCapture(0)
while cap.isOpened():
    ret,frame=cap.read()
    cv2.imshow('Movenet Multipose',frame)
    
    if cv2.waitKey(10)& 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [None]:
plt.imshow(frame)

In [None]:
##Above image is in BGR mode, we can convert it to RGB using below function
plt.imshow(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))

In [None]:
##Requirement for Movenet model
#1) The height/width are both multiple of 32.
#The height to width ratio is close (and enough) to cover the original image's aspect ratio.
#Make the larger side to be 256 (one should adjust this based on the speed/accuracy requirements). 
      #For example, a 720p image (i.e. 720x1280 (HxW)) should be resized and padded to 160x256 image.

# Resize the image and make Detection

In [None]:
cap=cv2.VideoCapture(0)
while cap.isOpened():
    ret,frame=cap.read()
    
    #Resize image
    img=frame.copy() #making a copy of frame
    img=tf.image.resize_with_pad(tf.expand_dims(img,axis=0),256,256)#resize multiple of 32, larger u go cud slow down
    input_img=tf.cast(img,dtype=tf.int32)
    
    #Detection section
    results=movenet(input_img)
    print(results)
    
    
    
    cv2.imshow('Movenet Multipose',frame)
    
    if cv2.waitKey(10)& 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

# Convert the results to numpy array and reshape the image

In [None]:
#Refer the Readme document for more info
results
#shape(1,6,56)
#output_0 represents one set of detections, 6 set of arrays wrapped inside 1 array, 6 here means 6 people max which the model
#can detect.
#56 here is keypoints or values= these represent X,Y and score coordinates (This is 3*17 coordinates=51+5 key points for bouding box)

In [None]:
# grab 1st 51 coordinates
results['output_0'].numpy()[:,:,:51]
#[:,:,:51] is basically indexing to grab from shape(1,6,56), numpy() is for array conversion
#the values represent Y,X and score (detection confidence)

In [None]:
##reshape to get 6 people, 17 keypoints with 3 values for each keypoint
results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
#The order of the 17 keypoint joints is: [nose, left eye, right eye, left ear, right ear, left shoulder, 
#right shoulder, left elbow, right elbow, left wrist, right wrist, left hip, right hip, left knee, right knee, 
#left ankle, right ankle]

##Refer the link
#https://tfhub.dev/google/movenet/multipose/lightning/1

In [None]:
#We will go ahead and make additions to the above code to make detections
cap=cv2.VideoCapture(0)
while cap.isOpened():
    ret,frame=cap.read()
    
    #Resize image
    img=frame.copy() #making a copy of frame
    img=tf.image.resize_with_pad(tf.expand_dims(img,axis=0),256,256)#resize multiple of 32, larger u go cud slow down
    input_img=tf.cast(img,dtype=tf.int32)
    
    #Detection section
    results=movenet(input_img)
    #Apply transformation to only have keypoints with scores
    keypoints_with_scores=results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    print(keypoints_with_scores)
    
    
    
    cv2.imshow('Movenet Multipose',frame)
    
    if cv2.waitKey(10)& 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [None]:
##To grab the normalised coordinates for 1st person
keypoints_with_scores[0]
##Model is able to detect with confidence scores for 1st 7 keypoints and scores are horrible for remianing. 
#Because in the webcam it was only able to see 1st half of our body

# Draw connections and edges

In [14]:
#Draw Keypoints
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 6, (0,255,0), -1)

In [15]:
##Draw Edges= Connections between body parts
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [16]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 4)

In [17]:
# Function to loop through each person detected and render
def loop_through_people(frame, keypoints_with_scores, edges, confidence_threshold):
    for person in keypoints_with_scores:
        draw_connections(frame, person, edges, confidence_threshold)
        draw_keypoints(frame, person, confidence_threshold)

In [21]:
#We will go ahead and make additions to the above code to make detections
cap=cv2.VideoCapture('srk6.mp4')
while cap.isOpened():
    ret,frame=cap.read()
    
    #Resize image
    img=frame.copy() #making a copy of frame
    img=tf.image.resize_with_pad(tf.expand_dims(img,axis=0),480,640)#resize multiple of 32, larger u go cud slow down
    input_img=tf.cast(img,dtype=tf.int32)
    
    #Detection section
    results=movenet(input_img)
    #Apply transformation to only have keypoints with scores
    keypoints_with_scores=results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    
    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.3)
    
    cv2.imshow('Movenet Multipose',frame)
    
    if cv2.waitKey(10)& 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()