1. Import Libraries & Dependencies

In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp


2. Keypoints using MediaPipe Holistic

In [3]:
mp_holistic = mp.solutions.holistic
mp_drawing_utils = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

In [4]:
def pose_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False       # Read-only to reduce memory size (Pass by reference)
    results = model.process(image)
    image.flags.writeable = True
    image= cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [5]:
def draw_landmarks(image, landmarks):

    # # Face Detection is disabled to improve efficiency
    # # Draw face contour
    # mp_drawing_utils.draw_landmarks(
    #     image=image, 
    #     landmark_list=landmarks.face_landmarks, 
    #     connections=mp_holistic.FACEMESH_TESSELATION, 
    #     landmark_drawing_spec=mp_drawing_utils.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
    #     connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
    # )

    # Draw body pose
    mp_drawing_utils.draw_landmarks(
        image=image, 
        landmark_list=landmarks.pose_landmarks, 
        connections=mp_holistic.POSE_CONNECTIONS, 
        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
    )

    # Draw left hand
    mp_drawing_utils.draw_landmarks(
        image=image, 
        landmark_list=landmarks.left_hand_landmarks, 
        connections=mp_holistic.HAND_CONNECTIONS, 
        landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style(), 
        connection_drawing_spec=mp_drawing_styles.get_default_hand_connections_style()
    )
    # Draw right  hand
    mp_drawing_utils.draw_landmarks(
        image=image, 
        landmark_list=landmarks.right_hand_landmarks, 
        connections=mp_holistic.HAND_CONNECTIONS, 
        landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style(), 
        connection_drawing_spec=mp_drawing_styles.get_default_hand_connections_style()
    )

In [6]:
# Webcam
cam = cv2.VideoCapture(0)

# Webcam Loop
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as model:
    while cam.isOpened():

        # Read Frame
        ret, frame = cam.read()
        if not ret:
            print("No frames detected")
            continue
        
        # Pose Detection
        image, landmarks = pose_detection(frame, model)
        draw_landmarks(frame, landmarks)

        # Screen Output
        cv2.imshow('Sign Language Recognition Prototype', frame)

        # Release condition
        keyPressed = cv2.waitKey(10)
        if keyPressed == 27:
            # Close Application
            cam.release()
            cv2.destroyAllWindows()
            break

In [7]:
cam.release()
cv2.destroyAllWindows()

In [8]:
dir(landmarks.left_hand_landmarks)

['__bool__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__']

3. Extract Landmark Values

In [9]:
# print(landmarks.pose_landmarks.landmark)
# print(landmarks.left_hand_landmarks)
print(landmarks.right_hand_landmarks)

landmark {
  x: 0.316459983587265
  y: 0.37347567081451416
  z: 2.9983161198288144e-07
}
landmark {
  x: 0.33888858556747437
  y: 0.40816083550453186
  z: -0.01863882690668106
}
landmark {
  x: 0.37121155858039856
  y: 0.4353296756744385
  z: -0.022908641025424004
}
landmark {
  x: 0.40239593386650085
  y: 0.4460475742816925
  z: -0.025542449206113815
}
landmark {
  x: 0.4264013469219208
  y: 0.44075465202331543
  z: -0.027009854093194008
}
landmark {
  x: 0.41381341218948364
  y: 0.3921612799167633
  z: -0.003548434004187584
}
landmark {
  x: 0.4480164349079132
  y: 0.4028260111808777
  z: -0.009437289088964462
}
landmark {
  x: 0.46893349289894104
  y: 0.4061458706855774
  z: -0.01621823199093342
}
landmark {
  x: 0.48725253343582153
  y: 0.4064740240573883
  z: -0.020891625434160233
}
landmark {
  x: 0.4142712652683258
  y: 0.36386197805404663
  z: 0.0004412683192640543
}
landmark {
  x: 0.45010948181152344
  y: 0.35958895087242126
  z: -0.0025609279982745647
}
landmark {
  x: 0.472

In [10]:

def extractAllLandmarks(landmarks):
    pose_lm = []
    left_hand_lm = []
    right_hand_lm = []

    def extractLandmarks(landmarksList, includeVisibility=False):
        output = []
        if includeVisibility:
            output = [[each.x, each.y, each.z, each.visibility] for each in landmarksList.landmark]
        else:
            output = [[each.x, each.y, each.z] for each in landmarksList.landmark]
        return np.array(output).flatten()

    pose_lm         = extractLandmarks(landmarks.pose_landmarks, True) if (landmarks.pose_landmarks) else np.zeros(132)
    left_hand_lm    = extractLandmarks(landmarks.left_hand_landmarks) if (landmarks.left_hand_landmarks) else np.zeros(63)
    right_hand_lm   = extractLandmarks(landmarks.right_hand_landmarks) if (landmarks.right_hand_landmarks) else np.zeros(63)

    return np.concatenate([pose_lm, left_hand_lm, right_hand_lm])

xLandmarks = extractAllLandmarks(landmarks)
print(xLandmarks.shape)

(258,)


4. Extract Extra Features

In [18]:
# Calculate the center 
def distance(pointsArr):
    # return np.linalg.norm(np.array(a) - np.array(b))
    pass

testData = np.random.random(size=(258 // 3, 1 * 3)) * 6 - 3

print(min(xLandmarks), max(xLandmarks))
print(np.min(testData), np.max(testData))
print(testData.shape)

def centroid(arr):
    length = arr.shape[0]
    sum_x = np.sum(arr[:, 0])
    sum_y = np.sum(arr[:, 1])
    sum_z = np.sum(arr[:, 2])
    return sum_x/length, sum_y/length, sum_z/length


-0.6686346530914307 2.849335193634033
-2.984873787396042 2.9848196908894895
(86, 3)
npTime:  1.4300816999999597
pyTime:  5.217582299999947
pyTime2:  2.417076199999997
