In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# class check
import os
from PIL import Image

classes_directory = '/content/drive/MyDrive/train'

class_images = {}
for class_name in os.listdir(classes_directory):
    class_folder = os.path.join(classes_directory, class_name)
    if os.path.isdir(class_folder):
        class_images[class_name] = []
        for filename in os.listdir(class_folder):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                image_path = os.path.join(class_folder, filename)
                class_images[class_name].append(image_path)
for class_name, images in class_images.items():
    print(f"Class: {class_name}")
    print(f"Number of images: {len(images)}")
    print(images[0])
    sample_image = Image.open(images[0])
    sample_image.show()

Class: chair
Number of images: 400
/content/drive/MyDrive/train/chair/girl1_chair085_flipped.jpg
Class: warrior
Number of images: 400
/content/drive/MyDrive/train/warrior/girl1_warrior053.jpg
Class: shoudler_stand
Number of images: 48
/content/drive/MyDrive/train/shoudler_stand/byron-yoga-salamba-sarvangasana-940px_flipped.jpg
Class: traingle
Number of images: 45
/content/drive/MyDrive/train/traingle/f0330204-800px-wm.jpg
Class: no_pose
Number of images: 26
/content/drive/MyDrive/train/no_pose/11.png
Class: tree
Number of images: 418
/content/drive/MyDrive/train/tree/girl1_tree089.jpg
Class: dog
Number of images: 400
/content/drive/MyDrive/train/dog/girl1_dog036_flipped.jpg
Class: cobra
Number of images: 400
/content/drive/MyDrive/train/cobra/girl1_cobra091.jpg


In [None]:
import tensorflow as tf
import numpy as np

In [None]:
import enum
from typing import List, NamedTuple
import numpy as np


class BodyPart(enum.Enum):
  NOSE = 0
  LEFT_EYE = 1
  RIGHT_EYE = 2
  LEFT_EAR = 3
  RIGHT_EAR = 4
  LEFT_SHOULDER = 5
  RIGHT_SHOULDER = 6
  LEFT_ELBOW = 7
  RIGHT_ELBOW = 8
  LEFT_WRIST = 9
  RIGHT_WRIST = 10
  LEFT_HIP = 11
  RIGHT_HIP = 12
  LEFT_KNEE = 13
  RIGHT_KNEE = 14
  LEFT_ANKLE = 15
  RIGHT_ANKLE = 16


class Point(NamedTuple):
  x: float
  y: float


class Rectangle(NamedTuple):
  start_point: Point
  end_point: Point


class KeyPoint(NamedTuple):
  body_part: BodyPart
  coordinate: Point
  score: float


class Person(NamedTuple):
  keypoints: List[KeyPoint]
  bounding_box: Rectangle
  score: float
  id: int = None


def person_from_keypoints_with_scores(
    keypoints_with_scores: np.ndarray,
    image_height: float,
    image_width: float,
    keypoint_score_threshold: float = 0.1) -> Person:

  kpts_x = keypoints_with_scores[:, 1]
  kpts_y = keypoints_with_scores[:, 0]
  scores = keypoints_with_scores[:, 2]

  keypoints = []
  for i in range(scores.shape[0]):
    keypoints.append(
        KeyPoint(
            BodyPart(i),
            Point(int(kpts_x[i] * image_width), int(kpts_y[i] * image_height)),
            scores[i]))

  start_point = Point(
      int(np.amin(kpts_x) * image_width), int(np.amin(kpts_y) * image_height))
  end_point = Point(
      int(np.amax(kpts_x) * image_width), int(np.amax(kpts_y) * image_height))
  bounding_box = Rectangle(start_point, end_point)

  scores_above_threshold = list(
      filter(lambda x: x > keypoint_score_threshold, scores))
  person_score = np.average(scores_above_threshold)

  return Person(keypoints, bounding_box, person_score)


class Category(NamedTuple):
  label: str
  score: float


In [None]:
import numpy as np
from PIL import Image, ImageDraw, ImageColor

class Pose:
  KEYPOINTS = (
  'nose',
  'left eye',
  'right eye',
  'left ear',
  'right ear',
  'left shoulder',
  'right shoulder',
  'left elbow',
  'right elbow',
  'left wrist',
  'right wrist',
  'left hip',
  'right hip',
  'left knee',
  'right knee',
  'left ankle',
  'right ankle')

  EDGES = (
      ('nose', 'left eye'),
      ('nose', 'right eye'),
      ('nose', 'left ear'),
      ('nose', 'right ear'),
      ('left ear', 'left eye'),
      ('right ear', 'right eye'),
      ('left eye', 'right eye'),
      ('left shoulder', 'right shoulder'),
      ('left shoulder', 'left elbow'),
      ('left shoulder', 'left hip'),
      ('right shoulder', 'right elbow'),
      ('right shoulder', 'right hip'),
      ('left elbow', 'left wrist'),
      ('right elbow', 'right wrist'),
      ('left hip', 'right hip'),
      ('left hip', 'left knee'),
      ('right hip', 'right knee'),
      ('left knee', 'left ankle'),
      ('right knee', 'right ankle'),
  )

  def __init__(self, model_path):
    self.tflite_interpreter = tf.lite.Interpreter(model_path=model_path)
    self.tflite_interpreter.allocate_tensors()

    self.input_details = self.tflite_interpreter.get_input_details()
    self.output_details = self.tflite_interpreter.get_output_details()

  @staticmethod
  def _sigmoid(z):
    return 1/(1 + np.exp(-z))

  def calc(self, img):
    self.tflite_interpreter.set_tensor(self.input_details[0]['index'], np.expand_dims(np.asarray(img).astype('float32')/ 128.0 - 1.0, axis=0))
    self.tflite_interpreter.invoke()

    output_tensor = [self.tflite_interpreter.get_tensor(self.output_details[i]["index"]) for i in range(len(self.output_details))]
    heatmapsShape = output_tensor[0].shape
    offsetsShape = output_tensor[1].shape
    displacementsFwdShape = output_tensor[2].shape
    displacementsBwdShape = output_tensor[3].shape
    heatmaps = np.asarray(output_tensor[0])
    offsets = np.asarray(output_tensor[1])
    height = heatmaps[0].shape[0]
    width = heatmaps[0].shape[1]
    numKeypoints = heatmaps[0][0][0].size
    keypointPositions = []
    for keypoint in range(numKeypoints):
      maxVal = heatmaps[0][0][0][keypoint]
      maxRow = 0
      maxCol = 0
      for row in range(height):
        for col in range(width):
          if (heatmaps[0][row][col][keypoint] > maxVal):
            maxVal = heatmaps[0][row][col][keypoint]
            maxRow = row
            maxCol = col
      keypointPositions.append((maxRow, maxCol))
    output_dic = {}
    total_score = 0.0
    for idx, (bodypart, (positionY,positionX)) in enumerate(zip(Pose.KEYPOINTS, keypointPositions)):
      output_dic[bodypart] = {}
      output_dic[bodypart]['x'] = int(positionX / (width - 1) * img.width + offsets[0][positionY][positionX][idx + numKeypoints])
      output_dic[bodypart]['y'] = int(positionY / (height - 1) * img.height + offsets[0][positionY][positionX][idx])
      output_dic[bodypart]['score'] = self._sigmoid(heatmaps[0][positionY][positionX][idx])
      total_score += output_dic[bodypart]['score']

    output_dic['total_score'] = total_score / len(Pose.KEYPOINTS)

    return output_dic

  def draw_pose(self, pose, img, threshold=0.5, marker_color='green', color='yellow', marker_size=5, thickness=2):
    draw = ImageDraw.Draw(img)

    for p1, p2 in Pose.EDGES:
        if (pose[p1]['score'] < threshold) or (pose[p2]['score'] < threshold): continue
        draw.line((pose[p1]['x'], pose[p1]['y'], pose[p2]['x'], pose[p2]['y']), fill=color, width=thickness)

    for label, keypoint in pose.items():
      if label=='total_score' : break
      if keypoint['score'] < threshold: continue
      draw.ellipse((int(keypoint['x']-marker_size/2), int(keypoint['y']-marker_size/2), int(keypoint['x']+marker_size/2), int(keypoint['y']+marker_size/2)), fill=marker_color)

    return img

In [None]:
# https://www.tensorflow.org/lite/models/pose_estimation/overview
!wget "https://storage.googleapis.com/download.tensorflow.org/models/tflite/posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite"

--2024-01-07 10:13:48--  https://storage.googleapis.com/download.tensorflow.org/models/tflite/posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.136.207, 142.250.148.207, 209.85.200.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.136.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13269068 (13M) [application/octet-stream]
Saving to: ‘posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite’


2024-01-07 10:13:48 (134 MB/s) - ‘posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite’ saved [13269068/13269068]



In [None]:
import cv2
import numpy as np
from PIL import Image

In [None]:
pose = Pose("posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite")
total_iter = 3

In [None]:
import os
from PIL import Image
import csv
def process_image(image):
    pose_output = pose.calc(image)
    return pose_output
classes_directory = '/content/drive/MyDrive/train'
output_dicts = []
class_labels = {}
for class_index, class_name in enumerate(os.listdir(classes_directory)):
    class_folder = os.path.join(classes_directory, class_name)
    if os.path.isdir(class_folder):
        class_labels[class_name] = class_index
        for filename in os.listdir(class_folder):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                image_path = os.path.join(class_folder, filename)
                img = Image.open(image_path)
                new_size = (257, 257)
                resized_img = img.resize(new_size)
                output_dict = process_image(resized_img)
                output_dicts.append((image_path, output_dict, class_name))
key_order = ['nose', 'left eye', 'right eye', 'left ear', 'right ear', 'left shoulder', 'right shoulder',
             'left elbow', 'right elbow', 'left wrist', 'right wrist', 'left hip', 'right hip', 'left knee',
             'right knee', 'left ankle', 'right ankle']
with open('training_data1.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['image_path'] + [f"{key}_{attr}" for key in key_order for attr in ['x', 'y', 'score']] + ['class'])
    for image_path, output_dict, class_name in output_dicts:
        class_index = class_labels[class_name]
        writer.writerow([image_path] + [output_dict[key][attr] if key in output_dict else '0' for key in key_order for attr in ['x', 'y', 'score']
        ] + [class_index, class_name])