In [1]:
#This Link is Important: https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/3 
#Link to Tutorial I followed: https://www.youtube.com/watch?v=SSW9LzOJSus&ab_channel=NicholasRenotte 

In [2]:
!pip install tensorflow==2.10.0 opencv-python matplotlib



In [3]:
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import cv2

In [4]:
interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_lightning_3.tflite')
#interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_thunder_3.tflite')
interpreter.allocate_tensors()

In [5]:
#Ignore, just for testing
interpreter.get_input_details()

[{'name': 'serving_default_input:0',
  'index': 0,
  'shape': array([  1, 192, 192,   3]),
  'shape_signature': array([  1, 192, 192,   3]),
  'dtype': numpy.float32,
  'quantization': (0.0, 0),
  'quantization_parameters': {'scales': array([], dtype=float32),
   'zero_points': array([], dtype=int32),
   'quantized_dimension': 0},
  'sparsity_parameters': {}}]

In [6]:
print(192 in interpreter.get_input_details()[0]['shape']) #For testing only

True


In [7]:
#Ignore, just for testing
interpreter.get_output_details()

[{'name': 'StatefulPartitionedCall:0',
  'index': 312,
  'shape': array([ 1,  1, 17,  3]),
  'shape_signature': array([ 1,  1, 17,  3]),
  'dtype': numpy.float32,
  'quantization': (0.0, 0),
  'quantization_parameters': {'scales': array([], dtype=float32),
   'zero_points': array([], dtype=int32),
   'quantized_dimension': 0},
  'sparsity_parameters': {}}]

In [8]:
#Drawing Keypoints

def draw_keypoints(frame, keypoints, confidence):
    """
    Draws in the locations of the key joints
    """
    y, x, c = frame.shape #y coordinate, x, coordginate, the channel coordinate
    shaped = np.squeeze(np.multiply(keypoints, [y, x, 1])) #the third coordinate in our keypoints is the confidence value, so we multiply by one to maintain that coordinate
    
    for kp in shaped:
        ky, kx, kp_conf = kp #extracting our y, our x, 
        if kp_conf > confidence:
            cv2.circle(frame, (int(kx), int(ky)), 4, (0, 255, 0), -1) #last three parameters are size, color, fill
    

In [9]:
#Dictionary that represents the connections for the function below
# the value is the letter that corresponds to the color of the edge (they are just here, we are not really using them)
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [10]:
#Drawing Edges

def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape #y coordinate, x, coordginate, the channel coordinate
    shaped = np.squeeze(np.multiply(keypoints, [y, x, 1])) #the third coordinate in our keypoints is the confidence value, so we multiply by one to maintain that coordinate
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 2)

In [11]:
#Attempting to calculate angles

def calculate_angle(a, b, c):
    """
    Given three keypoints, calculate the angle between them
    """
    #In the mediapipe implementation, the points needed to be converted into numpy.ndarray type
    #Here, the points from the keypoints are already in this type, so we don't need to convert
        #p[0] ->  y-coordinate
        #p[1] -> x-coordinate
    
    radians = np.arctan2(c[0] - b[0], c[1] - b[1]) - np.arctan2(a[0] - b[0], a[1] - b[1])
    angle = np.abs(radians * 180.0 / np.pi)
    
    if angle > 180.0:
        angle = 360 - angle
    
    return angle
    
    

In [18]:
#Making Detections

cap = cv2.VideoCapture(0)
counter = 0
stage = None
while cap.isOpened():
    ret, frame = cap.read()
    #Need to reshape image. Movenet Architecture expects an image of dimensions 192x192x3
    img = frame.copy() #makes a copy of the captured frame and saves it to a variable 'img'
    #to resize, we need to encapsulate it inside another array (that's what expand_dims does)
    #the resizing happens with the resize_with_pad function, where we give it the proper dimensions. 
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192, 192) #For Lightning
    #img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 256, 256) #For Thunder

    #Image needs to be represented as a float32 tensor, that is what the below line does
    input_image = tf.cast(img, dtype=tf.float32)
    
    
    #Set input and Output (using the TF interpreter)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    #Make Predictions
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image)) #Setting input details equal to input image
    interpreter.invoke() #invoking (making) our prediction
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index']) # getting output details
    #print(keypoints_with_scores)
    
    try:
        left_shoulder = keypoints_with_scores[0][0][5]
        left_elbow = keypoints_with_scores[0][0][7]
        left_wrist = keypoints_with_scores[0][0][9]
        
        angle = calculate_angle(left_shoulder, left_elbow, left_wrist)
        #print(angle)
        
#         #Visualize (purely just to put this on the feed)
#         cv2.putText(image, str(angle), 
#                     tuple(np.multiply(left_elbow, [640, 480]).astype(int)), 
#                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
#                    )
        
        #Curl Counter Logic (this might need to be better lol)
        if angle > 160:
            stage = "down"
        if angle < 30 and stage == "down":
            stage = "up"
            counter += 1
            print("counter: ", counter)
    except:
        pass
    
    #Rendering
    draw_connections(frame, keypoints_with_scores, EDGES, 0.5)
    draw_keypoints(frame, keypoints_with_scores, 0.5)
    
    #this tiny snippet is just for naming the camera properly based on the model we r using lol
    if 192 in interpreter.get_input_details()[0]['shape']:
        cv2.imshow('MoveNet Lightning', frame)
    if 256 in interpreter.get_input_details()[0]['shape']:
        cv2.imshow('MoveNet Thunder', frame)
    #Exiting the Camera (Press q)
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

counter:  1
counter:  2
counter:  3
counter:  4
counter:  5
counter:  6
counter:  7
counter:  8
counter:  9
counter:  10
counter:  11


In [13]:
keypoints_with_scores.shape #For testing (this is as expected from the documentation)

(1, 1, 17, 3)

In [14]:
# Outputs
# A float32 tensor of shape [1, 1, 17, 3].
        # The first two channels of the last dimension represents the yx coordinates (normalized to image frame, i.e. range in [0.0, 1.0]) of the 17 keypoints 
        #(in the order of: [nose, left eye, right eye, left ear, right ear, left shoulder, right shoulder, left elbow, right elbow, left wrist, right wrist, left hip, right hip, left knee, right knee, left ankle, right ankle]).
        # The third channel of the last dimension represents the prediction confidence scores of each keypoint, also in the range [0.0, 1.0].

In [15]:
#Using the order of body parts given in the above cell
right_eye = keypoints_with_scores[0][0][2]
left_elbow = keypoints_with_scores[0][0][7]
left_elbow #printing one of them out to see what it looks like

array([0.64665157, 0.6305033 , 0.7997859 ], dtype=float32)

In [16]:
np.array(left_elbow[:2]*[480,640]).astype(int) #standardized coordinates to frame coordinates

array([310, 403])

In [17]:
frame.shape

(480, 640, 3)