In [1]:
# Import all dependencies
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import csv
import pandas as pd
import os
import sys
import tempfile
import tqdm
from matplotlib.collections import LineCollection
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
# try camera capture

cap = cv2.VideoCapture(0)
while cap.isOpened():
        
        # Read feed
        ret, frame = cap.read()
        # display to screen
        cv2.imshow('video capture', frame)
            
        # Break
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()

In [2]:
# Some modules to display an animation using imageio.
import imageio
from IPython.display import HTML, display

# Helper functions
# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

# Maps bones to a matplotlib color name.
KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def _keypoints_and_edges_for_display(keypoints_with_scores,
                                     height,
                                     width,
                                     keypoint_threshold=0.11):
  """Returns high confidence keypoints and edges for visualization.

  Args:
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    height: height of the image in pixels.
    width: width of the image in pixels.
    keypoint_threshold: minimum confidence score for a keypoint to be
      visualized.

  Returns:
    A (keypoints_xy, edges_xy, edge_colors) containing:
      * the coordinates of all keypoints of all detected entities;
      * the coordinates of all skeleton edges of all detected entities;
      * the colors in which the edges should be plotted.
  """
  keypoints_all = []
  keypoint_edges_all = []
  edge_colors = []
  num_instances, _, _, _ = keypoints_with_scores.shape
  for idx in range(num_instances):
    kpts_x = keypoints_with_scores[0, idx, :, 1]
    kpts_y = keypoints_with_scores[0, idx, :, 0]
    kpts_scores = keypoints_with_scores[0, idx, :, 2]
    kpts_absolute_xy = np.stack(
        [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
    kpts_above_thresh_absolute = kpts_absolute_xy[
        kpts_scores > keypoint_threshold, :]
    keypoints_all.append(kpts_above_thresh_absolute)

    for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
      if (kpts_scores[edge_pair[0]] > keypoint_threshold and
          kpts_scores[edge_pair[1]] > keypoint_threshold):
        x_start = kpts_absolute_xy[edge_pair[0], 0]
        y_start = kpts_absolute_xy[edge_pair[0], 1]
        x_end = kpts_absolute_xy[edge_pair[1], 0]
        y_end = kpts_absolute_xy[edge_pair[1], 1]
        line_seg = np.array([[x_start, y_start], [x_end, y_end]])
        keypoint_edges_all.append(line_seg)
        edge_colors.append(color)
  if keypoints_all:
    keypoints_xy = np.concatenate(keypoints_all, axis=0)
  else:
    keypoints_xy = np.zeros((0, 17, 2))

  if keypoint_edges_all:
    edges_xy = np.stack(keypoint_edges_all, axis=0)
  else:
    edges_xy = np.zeros((0, 2, 2))
  return keypoints_xy, edges_xy, edge_colors


def draw_prediction_on_image(
    image, keypoints_with_scores, crop_region=None, close_figure=False,
    output_image_height=None):
  """Draws the keypoint predictions on image.

  Args:
    image: A numpy array with shape [height, width, channel] representing the
      pixel values of the input image.
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    crop_region: A dictionary that defines the coordinates of the bounding box
      of the crop region in normalized coordinates (see the init_crop_region
      function below for more detail). If provided, this function will also
      draw the bounding box on the image.
    output_image_height: An integer indicating the height of the output image.
      Note that the image aspect ratio will be the same as the input image.

  Returns:
    A numpy array with shape [out_height, out_width, channel] representing the
    image overlaid with keypoint predictions.
  """
  height, width, channel = image.shape
  aspect_ratio = float(width) / height
  fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))
  # To remove the huge white borders
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  im = ax.imshow(image)
  line_segments = LineCollection([], linewidths=(4), linestyle='solid')
  ax.add_collection(line_segments)
  # Turn off tick labels
  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

  (keypoint_locs, keypoint_edges,
   edge_colors) = _keypoints_and_edges_for_display(
       keypoints_with_scores, height, width)

  line_segments.set_segments(keypoint_edges)
  line_segments.set_color(edge_colors)
  if keypoint_edges.shape[0]:
    line_segments.set_segments(keypoint_edges)
    line_segments.set_color(edge_colors)
  if keypoint_locs.shape[0]:
    scat.set_offsets(keypoint_locs)

  if crop_region is not None:
    xmin = max(crop_region['x_min'] * width, 0.0)
    ymin = max(crop_region['y_min'] * height, 0.0)
    rec_width = min(crop_region['x_max'], 0.99) * width - xmin
    rec_height = min(crop_region['y_max'], 0.99) * height - ymin
    rect = patches.Rectangle(
        (xmin,ymin),rec_width,rec_height,
        linewidth=1,edgecolor='b',facecolor='none')
    ax.add_patch(rect)

  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(
      fig.canvas.get_width_height()[::-1] + (3,))
  plt.close(fig)
  if output_image_height is not None:
    output_image_width = int(output_image_height / height * width)
    image_from_plot = cv2.resize(
        image_from_plot, dsize=(output_image_width, output_image_height),
         interpolation=cv2.INTER_CUBIC)
  return image_from_plot

def to_gif(images, fps):
  """Converts image sequence (4D numpy array) to gif."""
  imageio.mimsave('./animation.gif', images, fps=fps)
  return embed.embed_file('./animation.gif')

def progress(value, max=100):
  return HTML("""
      <progress
          value='{value}'
          max='{max}',
          style='width: 100%'
      >
          {value}
      </progress>
  """.format(value=value, max=max))

In [3]:
# Load model
model_name = "movenet_lightning"

if "tflite" in model_name:
  if "movenet_lightning_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite
    input_size = 256
  elif "movenet_lightning_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  # Initialize the TFLite interpreter
  interpreter = tf.lite.Interpreter(model_path="model.tflite")
  interpreter.allocate_tensors()

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    # TF Lite format expects tensor type of uint8.
    input_image = tf.cast(input_image, dtype=tf.uint8)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
    # Invoke inference.
    interpreter.invoke()
    # Get the model prediction.
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    return keypoints_with_scores

else:
  if "movenet_lightning" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
    input_size = 192
  elif "movenet_thunder" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/thunder/4")
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    model = module.signatures['serving_default']

    # SavedModel format expects tensor type of int32.
    input_image = tf.cast(input_image, dtype=tf.int32)
    # Run model inference.
    outputs = model(input_image)
    # Output is a [1, 1, 17, 3] tensor.
    keypoints_with_scores = outputs['output_0'].numpy()
    return keypoints_with_scores

In [25]:
# Load the input image.
im = cv2.imread('tt.jpg')

input_image = tf.expand_dims(im, axis=0)
input_image = tf.image.resize_with_pad(input_image, input_size, input_size)

keypoints_with_scores = movenet(input_image)
        # Run model inference.
display_image = tf.expand_dims(im, axis=0)
display_image = tf.cast(tf.image.resize_with_pad(
    display_image, 1280, 1280), dtype=tf.int32)
output_overlay = draw_prediction_on_image(
    np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)
print(keypoints_with_scores.flatten())
cv2.imshow('img', output_overlay)
cv2.waitKey(0)

[0.42549047 0.43278465 0.5444765  0.38204643 0.47446996 0.7259425
 0.37924474 0.38446122 0.6296815  0.38354918 0.515577   0.56189644
 0.38273415 0.31135556 0.70981675 0.48329577 0.5898636  0.51802987
 0.51127815 0.22126776 0.6386699  0.68134236 0.69870603 0.54877305
 0.746228   0.20930386 0.4969758  0.6067182  0.75045764 0.5188302
 0.7583356  0.42308867 0.20144114 0.7807901  0.559992   0.40153325
 0.7953875  0.34320718 0.36094886 0.72064316 0.70056343 0.10607925
 0.73343766 0.28714418 0.11325663 0.7711828  0.4893267  0.16492221
 0.744725   0.49592584 0.12548757]


-1

In [12]:
keypoints_with_scores[0][0]

17

In [6]:
import time

cap = cv2.VideoCapture(0)
while cap.isOpened():
        prevTime = 0
        # Read feed
        ret, frame = cap.read()
        # display to screen
        # Calculate and display
        input_image = tf.expand_dims(frame, axis=0)
        input_image = tf.image.resize_with_pad(input_image, input_size, input_size)

        # Run model inference.
        keypoints_with_scores = movenet(input_image)

        # Visualize the predictions with image.
        display_image = tf.expand_dims(frame, axis=0)
        display_image = tf.cast(tf.image.resize_with_pad(
            display_image, 1280, 1280), dtype=tf.int32)
        output_overlay = draw_prediction_on_image(
            np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)

        currTime = time.time()
        fps = 1 / (currTime - prevTime)
        prevTime = currTime
        #plt.figure(figsize=(5, 5))
        cv2.putText(output_overlay, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_PLAIN, 3, (0, 196, 255), 2)
        cv2.imshow('img',output_overlay)
        # Break
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()

In [20]:
c = np.array(keypoints_with_scores.flatten())
print(type(c))

<class 'numpy.ndarray'>


In [15]:
len(c)

51

In [21]:
c

array([0.6559156 , 0.72443557, 0.4138219 , 0.6166811 , 0.7778954 ,
       0.639906  , 0.588055  , 0.6641348 , 0.5117934 , 0.63595104,
       0.80376625, 0.5820173 , 0.59039855, 0.5363361 , 0.46274075,
       0.75294846, 0.8168471 , 0.52782726, 0.7618448 , 0.36229166,
       0.48607445, 0.8864328 , 0.91721976, 0.23484388, 0.8864305 ,
       0.26026797, 0.13810208, 0.8405983 , 0.8048035 , 0.05706173,
       0.71531314, 0.5416248 , 0.10091946, 0.84768534, 0.49462116,
       0.12050214, 0.84226525, 0.21473715, 0.07177103, 0.83141154,
       0.7795328 , 0.05651763, 0.84563744, 0.28721422, 0.04540712,
       0.8052138 , 0.6221494 , 0.07318342, 0.7024039 , 0.5272614 ,
       0.07673904], dtype=float32)

In [17]:
keypoints_with_scores

array([[[[0.6559156 , 0.72443557, 0.4138219 ],
         [0.6166811 , 0.7778954 , 0.639906  ],
         [0.588055  , 0.6641348 , 0.5117934 ],
         [0.63595104, 0.80376625, 0.5820173 ],
         [0.59039855, 0.5363361 , 0.46274075],
         [0.75294846, 0.8168471 , 0.52782726],
         [0.7618448 , 0.36229166, 0.48607445],
         [0.8864328 , 0.91721976, 0.23484388],
         [0.8864305 , 0.26026797, 0.13810208],
         [0.8405983 , 0.8048035 , 0.05706173],
         [0.71531314, 0.5416248 , 0.10091946],
         [0.84768534, 0.49462116, 0.12050214],
         [0.84226525, 0.21473715, 0.07177103],
         [0.83141154, 0.7795328 , 0.05651763],
         [0.84563744, 0.28721422, 0.04540712],
         [0.8052138 , 0.6221494 , 0.07318342],
         [0.7024039 , 0.5272614 , 0.07673904]]]], dtype=float32)

In [5]:
# Create folders for dataset
DATA_PATH = os.path.join('dataset') 

# Actions that we try to detect
actions = np.array(['jumping-jack', 'squat'])

# 30 videos worth of data
no_sequences = 30

# Videos are going to be 62 frames in length
sequence_length = 30

for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [4]:
DATA_PATH = os.path.join('dataset') 

# Actions that we try to detect
actions = np.array(['jumping-jack', 'squat'])

# 30 videos worth of data
no_sequences = 30

# Videos are going to be 62 frames in length
sequence_length = 30

In [7]:

cap = cv2.VideoCapture(0)
action = "jumping-jack"
if action == "jumping-jack": 
    for sequence in range(no_sequences):
        for frame_num in range(sequence_length):
            # Read feed
            ret, frame = cap.read()
            # display to screen
            # Calculate and display
            input_image = tf.expand_dims(frame, axis=0)
            input_image = tf.image.resize_with_pad(input_image, input_size, input_size)
            # Run model inference.
            keypoints_with_scores = movenet(input_image)

            
            # Visualize the predictions with image.
            display_image = tf.expand_dims(frame, axis=0)
            display_image = tf.cast(tf.image.resize_with_pad(
                display_image, 1280, 1280), dtype=tf.int32)
            output_overlay = draw_prediction_on_image(
                np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)

            if frame_num == 0: 
                cv2.putText(output_overlay, 'STARTING COLLECTION', (120,200), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                cv2.putText(output_overlay, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('OpenCV Feed', output_overlay)
                cv2.waitKey(3000)
            else: 
                cv2.putText(output_overlay, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('OpenCV Feed', output_overlay)
            keypoints = np.array(keypoints_with_scores.flatten())
            npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
            np.save(npy_path, keypoints)
            
            # Break
        
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
cap.release()
cv2.destroyAllWindows()

In [5]:
# For squat
action = "squat"
cap = cv2.VideoCapture(0)
if action == "squat": 
    for sequence in range(0,10):
        for frame_num in range(20):
            # Read feed
            ret, frame = cap.read()
            # display to screen
            # Calculate and display
            input_image = tf.expand_dims(frame, axis=0)
            input_image = tf.image.resize_with_pad(input_image, input_size, input_size)
            # Run model inference.
            keypoints_with_scores = movenet(input_image)

            
            # Visualize the predictions with image.
            display_image = tf.expand_dims(frame, axis=0)
            display_image = tf.cast(tf.image.resize_with_pad(
                display_image, 1280, 1280), dtype=tf.int32)
            output_overlay = draw_prediction_on_image(
                np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)

            if frame_num == 0: 
                cv2.putText(output_overlay, 'Pause for 5 secs', (120,200), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                cv2.putText(output_overlay, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('OpenCV Feed', output_overlay)
                cv2.waitKey(5000)
                cv2.putText(output_overlay, 'Start!', (120,200), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                cv2.waitKey(1000)
            else: 
                cv2.putText(output_overlay, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('OpenCV Feed', output_overlay)
            keypoints = np.array(keypoints_with_scores.flatten())
            npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
            np.save(npy_path, keypoints)
            
            # Break
        
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
cap.release()
cv2.destroyAllWindows()

In [16]:
action = "x"
cap = cv2.VideoCapture(0)
if action == "x": 
    for sequence in range(10,30):
        for frame_num in range(sequence_length):
            # Read feed
            ret, frame = cap.read()
            # display to screen
            # Calculate and display
            input_image = tf.expand_dims(frame, axis=0)
            input_image = tf.image.resize_with_pad(input_image, input_size, input_size)
            # Run model inference.
            keypoints_with_scores = movenet(input_image)

            
            # Visualize the predictions with image.
            display_image = tf.expand_dims(frame, axis=0)
            display_image = tf.cast(tf.image.resize_with_pad(
                display_image, 1280, 1280), dtype=tf.int32)
            output_overlay = draw_prediction_on_image(
                np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)

            if frame_num == 0: 
                cv2.putText(output_overlay, 'STARTING COLLECTION', (120,200), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                cv2.putText(output_overlay, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('OpenCV Feed', output_overlay)
                cv2.waitKey(3000)
            else: 
                cv2.putText(output_overlay, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('OpenCV Feed', output_overlay)
            keypoints = np.array(keypoints_with_scores.flatten())
            npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
            np.save(npy_path, keypoints)
            
            # Break
        
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

In [7]:
for i in range(0,10):
    print(i)

0
1
2
3
4
5
6
7
8
9


In [4]:
# Rebuild and load model
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

log_dir = os.path.join('Logs')
actions = np.array(['downward-dog','jumping-jack', 'leg-up', 'squat'])
tb_callback = TensorBoard(log_dir=log_dir)

model1 = Sequential()

model1.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,51)))
model1.add(LSTM(128, return_sequences=True, activation='relu'))
model1.add(LSTM(64, return_sequences=False, activation='relu'))
model1.add(Dense(64, activation='relu'))
model1.add(Dense(32, activation='relu'))
model1.add(Dense(actions.shape[0], activation='softmax'))

model1.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [5]:
model1.load_weights('djls.h5')

In [11]:
colors = [(245,117,16), (117,245,16), (16,117,245), (255,0,0)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (128, 0, 128), 2, cv2.LINE_AA)
        
    return output_frame

In [13]:
cap = cv2.VideoCapture(0)
sequence = []
sentence = []
threshold = 0.95
while cap.isOpened():
        # Read feed
        ret, frame = cap.read()
        # display to screen
        # Calculate and display
        input_image = tf.expand_dims(frame, axis=0)
        input_image = tf.image.resize_with_pad(input_image, input_size, input_size)

        # Run model inference.
        keypoints_with_scores = movenet(input_image)

        # Visualize the predictions with image.
        display_image = tf.expand_dims(frame, axis=0)
        display_image = tf.cast(tf.image.resize_with_pad(
            display_image, 1280, 1280), dtype=tf.int32)
        output_overlay = draw_prediction_on_image(
            np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)

        # Prediction
        keypoints = np.array(keypoints_with_scores.flatten())
        sequence.insert(0,keypoints)
        sequence = sequence[:30]
        
        if len(sequence) == 30:
            res = model1.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
        
            # Viz
            if res[np.argmax(res)] > threshold:
                if len(sentence) > 0:
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5:
                sentence = sentence[-5:]
            output_overlay = prob_viz(res, actions, output_overlay, colors)
        
        cv2.rectangle(output_overlay, (0,0), (1280, 40), (245, 117, 16), -1)
        cv2.putText(output_overlay, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)
        
        cv2.imshow('img',output_overlay)
        # Break
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()

jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
squat
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack
jumping-jack


In [10]:
cap.release()
cv2.destroyAllWindows()

In [7]:
cv2.rectangle(output_overlay, (0,0), (640, 40), (245, 117, 16), -1)
cv2.imshow('img',output_overlay)