In [1]:
import itertools
import numpy as np
import os
import pandas as pd
import tempfile
import tqdm
import sys

from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import tensorflow as tf
from tensorflow import keras

# Load MoveNet Thunder model
from script.data import BodyPart
from script.ml import Movenet

In [2]:
# Define function to run pose estimation using MoveNet Thunder.  You'll apply MoveNet's cropping algorithm and run inference
# multiple times on the input image to improve pose estimation accuracy.
move_net = Movenet('./model/movenet_thunder')
def detect(input_tensor, inference_count=3, per=None):
  # Detect pose using the full input image
  move_net.detect(input_tensor.numpy(), reset_crop_region=True)
  # Repeatedly using previous detection result to identify the region of interest and only cropping that region to improve
  # detection accuracy
  for _ in range(inference_count - 1):
    per = move_net.detect(input_tensor.numpy(), reset_crop_region=False)

  return per

In [3]:
def get_center_point(landmarks, left_bodypart, right_bodypart):
  """Calculates the center point of the two given landmarks."""
  left = tf.gather(landmarks, left_bodypart.value, axis=1)
  right = tf.gather(landmarks, right_bodypart.value, axis=1)
  return left * 0.5 + right * 0.5

def get_pose_size(landmarks, torso_size_multiplier=2.5):
  # Hips center
  hips_center = get_center_point(landmarks, BodyPart.LEFT_HIP, BodyPart.RIGHT_HIP)
  # Shoulders center
  shoulders_center = get_center_point(landmarks, BodyPart.LEFT_SHOULDER, BodyPart.RIGHT_SHOULDER)
  # Torso size as the minimum body size
  torso_size = tf.linalg.norm(shoulders_center - hips_center)
  # Pose center
  pose_center_new = tf.expand_dims(get_center_point(landmarks, BodyPart.LEFT_HIP, BodyPart.RIGHT_HIP), axis=1)
  # Broadcast the pose center to the same size as the landmark vector to perform substraction
  pose_center_new = tf.broadcast_to(pose_center_new, [tf.size(landmarks) // (17*2), 17, 2])
  # Dist to pose center
  d = tf.gather(landmarks - pose_center_new, 0, axis=0, name="dist_to_pose_center")
  # Normalize scale                                                   Max dist to pose center
  return tf.maximum(torso_size * torso_size_multiplier, tf.reduce_max(tf.linalg.norm(d, axis=0)))

def normalize_pose_landmarks(landmarks):
  # Move landmarks so that the pose center becomes (0,0)
  pose_center = tf.expand_dims(get_center_point(landmarks, BodyPart.LEFT_HIP, BodyPart.RIGHT_HIP), axis=1)
  # Broadcast the pose center to the same size as the landmark vector to perform substraction
  pose_center = tf.broadcast_to(pose_center, [tf.size(landmarks) // (17*2), 17, 2])
  landmarks = landmarks - pose_center
  # Scale the landmarks to a constant pose size
  return landmarks / get_pose_size(landmarks)

def landmarks_to_embedding(landmarks_and_scores):
  """Converts the input landmarks into a pose embedding."""
  # Reshape the flat input into a matrix with shape=(17, 3)
  reshaped_inputs = keras.layers.Reshape((17, 3))(landmarks_and_scores)
  # Normalize landmarks 2D Flatten the normalized landmark coordinates into a vector
  return keras.layers.Flatten()(normalize_pose_landmarks(reshaped_inputs[:, :, :2]))

In [4]:
from sklearn.model_selection import train_test_split

class MoveNetClassifier(object):
    def __init__(self, class_names):
        self.model = None
        self.class_names = class_names

    def build_model(self, model_path=None):
        if model_path:
            self.model = keras.models.load_model(model_path)
        else:
            inputs = keras.Input(shape=(51))
            embedding = landmarks_to_embedding(inputs)

            layer = keras.layers.Dense(128, activation=tf.nn.relu6)(embedding)
            layer = keras.layers.Dropout(0.5)(layer)
            layer = keras.layers.Dense(64, activation=tf.nn.relu6)(layer)
            layer = keras.layers.Dropout(0.5)(layer)
            outputs = keras.layers.Dense(len(self.class_names), activation="softmax")(layer)

            model = keras.Model(inputs, outputs)
            model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

            self.model = model

    def train(self, X_data, y_data, test_size=0.15, monitor="val_accuracy", patience=20, batch_size=16, epochs=200,
              cp_path="best.weight.hdf5", save_path=None):
        # Split training data (X, y) into (X_train, y_train) and (X_val, y_val)
        X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size=test_size)
        # Add a checkpoint callback to store the checkpoint that has the highest validation accuracy.
        checkpoint = keras.callbacks.ModelCheckpoint(cp_path, monitor=monitor, verbose=1, save_best_only=True, mode='max')
        early_stopping = keras.callbacks.EarlyStopping(monitor=monitor, patience=patience)
        # Start training
        his = self.model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
                                 callbacks=[checkpoint, early_stopping], validation_data=(X_val, y_val))
        if save_path:
            self.model.save(save_path)
        return his

    def evaluate(self, X, y):
        return self.model.evaluate(X, y)

    def predict(self, X):
        return self.model.predict(X)

    def save_tflite(self, save_path):
        converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        model = converter.convert()

        with open(f'{save_path}/pose_classifier.tflite', 'wb') as f:
          f.write(model)
        print('Model size: %dKB' % (len(model) / 1024))

In [5]:
def load_pose_landmarks(csv_path):
  # Load the CSV file
  dataframe = pd.read_csv(csv_path)
  # Extract the list of class names
  filename = dataframe.pop('file_name').unique()
  # Extract the labels
  y = keras.utils.to_categorical(dataframe.pop('class_no'))
  # Convert the input features and labels into the correct format for training.
  X = dataframe.astype('float64')
  return filename, X, y

In [6]:
import csv

class MoveNetMark(object):
  """Helper class to preprocess pose sample images for classification."""
  def __init__(self, images_in_folder, csvs_out_path):
    self._images_in_folder = images_in_folder
    self._csvs_out_path = csvs_out_path

  def process(self, per_pose_class_limit=None, detection_threshold=0.1):
    # Create a temp dir to store the pose CSVs per class
    temp_folder = tempfile.mkdtemp()
    # Get list of pose classes and print image statistics
    class_names = sorted([n for n in os.listdir(self._images_in_folder) if not n.startswith('.')])
    # Loop through the classes and preprocess its images
    messages = []
    for pose_class_name in class_names:
      print('Preprocessing', pose_class_name, file=sys.stderr)
      # Paths for the pose class.
      images_in_folder = os.path.join(self._images_in_folder, pose_class_name)
      # Detect landmarks in each image and write it to a CSV file
      with open(os.path.join(temp_folder, pose_class_name + '.csv'), 'w') as csv_out_file:
        csv_out_writer = csv.writer(csv_out_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        # Get list of images
        image_names = sorted([n for n in os.listdir(images_in_folder) if not n.startswith('.')])
        if per_pose_class_limit is not None:
          image_names = image_names[:per_pose_class_limit]
        # Detect pose landmarks from each image
        valid_image_count = 0
        for image_name in tqdm.tqdm(image_names):
          image_path = os.path.join(images_in_folder, image_name)
          try:
            image = tf.io.decode_jpeg(tf.io.read_file(image_path))
            _, _, channel = image.shape
          except:
            messages.append('Skipped ' + image_path + '. Invalid image.')
            continue
          # Skip images that isn't RGB because Movenet requires RGB images
          if channel != 3:
            messages.append('Skipped ' + image_path + '. Image isn\'t in RGB format.')
            continue
          person = detect(image)
          # Save landmarks if all landmarks were detected
          if not min([keypoint.score for keypoint in person.keypoints]) >= detection_threshold:
            messages.append('Skipped ' + image_path + '. No pose was confidentlly detected.')
            continue
          # Get landmarks and scale it to the same size as the input image
          pose_landmarks = np.array( [[keypoint.coordinate.x, keypoint.coordinate.y, keypoint.score]
                                      for keypoint in person.keypoints], dtype=np.float32)
          # Write the landmark coordinates to its per-class CSV file
          csv_out_writer.writerow([image_name] + pose_landmarks.flatten().astype(np.str).tolist())
          valid_image_count += 1
        if not valid_image_count:
          raise RuntimeError('No valid images found for the "{}" class.'.format(pose_class_name))
    # Print the error message collected during preprocessing.
    print('\n'.join(messages))
    # Combine all per-class CSVs into a single output file
    total_df = None
    for class_index, class_name in enumerate(class_names):
      per_class_df = pd.read_csv(os.path.join(temp_folder, class_name + '.csv'), header=None)
      # Add the labels
      per_class_df['class_no'] = [class_index]*len(per_class_df)
      per_class_df['class_name'] = [class_name]*len(per_class_df)
      total_df = per_class_df if total_df is None else pd.concat([total_df, per_class_df], axis=0)
    list_name = [[bodypart.name + '_x', bodypart.name + '_y', bodypart.name + '_score'] for bodypart in BodyPart]
    header_name = ['file_name']
    for columns_name in list_name:
      header_name += columns_name
    total_df.rename({total_df.columns[i]: header_name[i] for i in range(len(header_name))}, axis=1, inplace=True)
    total_df.to_csv(self._csvs_out_path, index=False)

In [9]:
class MoveNetPreprocessor(object):
  """Helper class to preprocess pose sample images for classification."""

  def __init__(self, images_in_folder, csvs_out_path):
    self._images_in_folder = images_in_folder
    self._csvs_out_path = csvs_out_path
    self._messages = []

    # Create a temp dir to store the pose CSVs per class
    self._csvs_out_folder_per_class = tempfile.mkdtemp()

    # Get list of pose classes and print image statistics
    self._pose_class_names = sorted([n for n in os.listdir(self._images_in_folder) if not n.startswith('.')])

  def process(self, per_pose_class_limit=None, detection_threshold=0.1):
    # Loop through the classes and preprocess its images
    for pose_class_name in self._pose_class_names:
      print('Preprocessing', pose_class_name, file=sys.stderr)

      # Paths for the pose class.
      images_in_folder = os.path.join(self._images_in_folder, pose_class_name)
      csv_out_path = os.path.join(self._csvs_out_folder_per_class, pose_class_name + '.csv')

      # Detect landmarks in each image and write it to a CSV file
      with open(csv_out_path, 'w') as csv_out_file:
        csv_out_writer = csv.writer(csv_out_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        # Get list of images
        image_names = sorted([n for n in os.listdir(images_in_folder) if not n.startswith('.')])
        if per_pose_class_limit is not None:
          image_names = image_names[:per_pose_class_limit]

        valid_image_count = 0

        # Detect pose landmarks from each image
        for image_name in tqdm.tqdm(image_names):
          image_path = os.path.join(images_in_folder, image_name)

          try:
            image = tf.io.read_file(image_path)
            image = tf.io.decode_jpeg(image)
          except:
            self._messages.append('Skipped ' + image_path + '. Invalid image.')
            continue
          else:
            image = tf.io.read_file(image_path)
            image = tf.io.decode_jpeg(image)
            image_height, image_width, channel = image.shape

          # Skip images that isn't RGB because Movenet requires RGB images
          if channel != 3:
            self._messages.append('Skipped ' + image_path + '. Image isn\'t in RGB format.')
            continue
          person = detect(image)

          # Save landmarks if all landmarks were detected
          min_landmark_score = min(
              [keypoint.score for keypoint in person.keypoints])
          should_keep_image = min_landmark_score >= detection_threshold
          if not should_keep_image:
            self._messages.append('Skipped ' + image_path + '. No pose was confidentlly detected.')
            continue

          valid_image_count += 1

          # Get landmarks and scale it to the same size as the input image
          pose_landmarks = np.array([[keypoint.coordinate.x, keypoint.coordinate.y, keypoint.score]
                                    for keypoint in person.keypoints], dtype=np.float32)

          # Write the landmark coordinates to its per-class CSV file
          coordinates = pose_landmarks.flatten().astype(np.str).tolist()
          csv_out_writer.writerow([image_name] + coordinates)

        if not valid_image_count:
          raise RuntimeError('No valid images found for the "{}" class.'.format(pose_class_name))

    # Print the error message collected during preprocessing.
    print('\n'.join(self._messages))

    # Combine all per-class CSVs into a single output file
    all_landmarks_df = self._all_landmarks_as_dataframe()
    all_landmarks_df.to_csv(self._csvs_out_path, index=False)

  def class_names(self):
    """List of classes found in the training dataset."""
    return self._pose_class_names

  def _all_landmarks_as_dataframe(self):
    """Merge all per-class CSVs into a single dataframe."""
    total_df = None
    for class_index, class_name in enumerate(self._pose_class_names):
      csv_out_path = os.path.join(self._csvs_out_folder_per_class, class_name + '.csv')
      per_class_df = pd.read_csv(csv_out_path, header=None)

      # Add the labels
      per_class_df['class_no'] = [class_index]*len(per_class_df)
      per_class_df['class_name'] = [class_name]*len(per_class_df)

      # Append the folder name to the filename column (first column)
      per_class_df[per_class_df.columns[0]] = (os.path.join(class_name, '') + per_class_df[per_class_df.columns[0]].astype(str))

      if total_df is None:
        # For the first class, assign its data to the total dataframe
        total_df = per_class_df
      else:
        # Concatenate each class's data into the total dataframe
        total_df = pd.concat([total_df, per_class_df], axis=0)

    list_name = [[bodypart.name + '_x', bodypart.name + '_y', bodypart.name + '_score'] for bodypart in BodyPart]
    header_name = ['file_name']
    for columns_name in list_name:
      header_name += columns_name
    header_map = {total_df.columns[i]: header_name[i] for i in range(len(header_name))}

    total_df.rename(header_map, axis=1, inplace=True)

    return total_df

In [10]:
with open(f'./pose_labels.txt', 'r') as csv_out_file:
  class_names = list(csv_out_file)

images_ROOT = "./data/yoga_poses/"
images_in_train_folder = os.path.join(images_ROOT, 'train')
images_in_test_folder = os.path.join(images_ROOT, 'test')

csvs_out_train_path = tempfile.mkdtemp() + "train_data.csv"
csvs_out_test_path = tempfile.mkdtemp() + "test_data.csv"

# movenetmark = MoveNetMark(images_in_folder=images_in_train_folder, csvs_out_path=csvs_out_train_path)
# movenetmark.process()
# movenetmark = MoveNetMark(images_in_folder=images_in_test_folder, csvs_out_path=csvs_out_test_path)
# movenetmark.process()
movenetmark = MoveNetPreprocessor(images_in_folder=images_in_train_folder, csvs_out_path=csvs_out_train_path)
movenetmark.process()
movenetmark = MoveNetPreprocessor(images_in_folder=images_in_test_folder, csvs_out_path=csvs_out_test_path)
movenetmark.process()

Preprocessing chair
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
100%|██████████| 200/200 [00:12<00:00, 16.29it/s]
Preprocessing cobra
100%|██████████| 200/200 [00:12<00:00, 16.31it/s]
Preprocessing dog
100%|██████████| 200/200 [00:12<00:00, 16.33it/s]
Preprocessing tree
100%|██████████| 200/200 [00:12<00:00, 16.45it/s]
Preprocessing warrior
100%|██████████| 200/200 [00:12<00:00, 16.41it/s]
Preprocessing chair


Skipped ./data/yoga_poses/train\chair\girl3_chair091.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair092.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair093.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair094.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair096.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair097.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair099.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair100.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair104.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair106.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/train\chair\girl3_chair110.jpg. No pose was 

100%|██████████| 84/84 [00:05<00:00, 16.36it/s]
Preprocessing cobra
100%|██████████| 116/116 [00:07<00:00, 16.34it/s]
Preprocessing dog
100%|██████████| 90/90 [00:05<00:00, 16.36it/s]
Preprocessing tree
100%|██████████| 96/96 [00:05<00:00, 16.35it/s]
Preprocessing warrior
100%|██████████| 109/109 [00:06<00:00, 16.39it/s]

Skipped ./data/yoga_poses/test\cobra\guy3_cobra048.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra050.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra051.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra052.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra053.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra054.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra055.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra056.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra057.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra058.jpg. No pose was confidentlly detected.
Skipped ./data/yoga_poses/test\cobra\guy3_cobra059.jpg. No pose was confidentlly detected.




In [None]:
# Load the data
_, X, y = load_pose_landmarks(csvs_out_train_path)
# Load the test data
_, X_test, y_test = load_pose_landmarks(csvs_out_test_path)

classifier = MoveNetClassifier(class_names)
classifier.build_model()
# Start training
history = classifier.train(X, y, cp_path="./model/weights.best.hdf5", save_path="./model/classifier/")
# Evaluate the model using the TEST dataset
loss, accuracy = classifier.evaluate(X_test, y_test)

In [None]:
# Visualize the training history to see whether you're overfitting.
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['TRAIN', 'VAL'], loc='lower right')
plt.show()

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
  """Plots the confusion matrix."""
  if normalize:
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    print("Normalized confusion matrix")
  else:
    print('Confusion matrix, without normalization')

  plt.imshow(cm, interpolation='nearest', cmap=cmap)
  plt.title(title)
  plt.colorbar()
  tick_marks = np.arange(len(classes))
  plt.xticks(tick_marks, classes, rotation=55)
  plt.yticks(tick_marks, classes)
  fmt = '.2f' if normalize else 'd'
  thresh = cm.max() / 2.
  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

  plt.ylabel('True label')
  plt.xlabel('Predicted label')
  plt.tight_layout()

# Classify pose in the TEST dataset using the trained model
y_pred = classifier.predict(X_test)

# Convert the prediction result to class name
y_pred_label = [class_names[i] for i in np.argmax(y_pred, axis=1)]
y_true_label = [class_names[i] for i in np.argmax(y_test, axis=1)]

# Plot the confusion matrix
cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))
plot_confusion_matrix(cm, class_names, title ='Confusion Matrix of Pose Classification Model')

# Print the classification report
print('\nClassification Report:\n', classification_report(y_true_label, y_pred_label))

In [None]:
classifier.save_tflite("./model/")