1. Import Libraries & Dependencies

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [2]:
from cvzone import FPS

In [3]:
mp.solutions

<module 'mediapipe.python.solutions' from 'c:\\Users\\USER\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mediapipe\\python\\solutions\\__init__.py'>

2. Keypoints using MediaPipe Holistic

In [4]:
mp_holistic = mp.solutions.holistic
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_drawing_utils = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

In [5]:
def pose_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False       # Read-only to reduce memory size (Pass by reference)
    results = model.process(image)
    image.flags.writeable = True
    image= cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [6]:
def draw_landmarks(image, landmarks):

    # # Face Detection is disabled to improve efficiency
    # # Draw face contour
    # mp_drawing_utils.draw_landmarks(
    #     image=image, 
    #     landmark_list=landmarks.face_landmarks, 
    #     connections=mp_holistic.FACEMESH_TESSELATION, 
    #     landmark_drawing_spec=mp_drawing_utils.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
    #     connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
    # )

    # Draw body pose
    mp_drawing_utils.draw_landmarks(
        image=image, 
        landmark_list=landmarks.pose_landmarks, 
        connections=mp_holistic.POSE_CONNECTIONS, 
        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
    )

    # Draw left hand
    mp_drawing_utils.draw_landmarks(
        image=image, 
        landmark_list=landmarks.left_hand_landmarks, 
        connections=mp_holistic.HAND_CONNECTIONS, 
        landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style(), 
        connection_drawing_spec=mp_drawing_styles.get_default_hand_connections_style()
    )
    # Draw right  hand
    mp_drawing_utils.draw_landmarks(
        image=image, 
        landmark_list=landmarks.right_hand_landmarks, 
        connections=mp_holistic.HAND_CONNECTIONS, 
        landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style(), 
        connection_drawing_spec=mp_drawing_styles.get_default_hand_connections_style()
    )

In [7]:
from time import time

In [23]:
# Webcam
cam = cv2.VideoCapture(0, cv2.CAP_DSHOW)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
timeStats = []
fpsReader = FPS()
# Webcam Loop
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as model:
    
    initialTime = time()
    while cam.isOpened():

        startTime = time()
        # Read Frame
        ret, frame = cam.read()
        if not ret:
            print("No frames detected")
            continue
        
        # Pose Detection
        image, landmarks = pose_detection(frame, model)
        draw_landmarks(frame, landmarks)

        fps, frame = fpsReader.update(frame,pos=(50,80),color=(0,255,0),scale=5,thickness=5)


        # Screen Output
        cv2.imshow('Sign Language Recognition Prototype', frame)

        timeStats.append(time() - startTime)

        # Release condition
        keyPressed = cv2.waitKey(10)
        if keyPressed == 27:
            # Close Application
            cam.release()
            cv2.destroyAllWindows()
            break
        
        if time() - initialTime > 10:
            cam.release()
            cv2.destroyAllWindows()
            break

In [24]:
np.array(timeStats[1:]).mean()

0.06561559677124024

In [20]:
np.array(timeStats[1:]).mean()

0.013344293290918524

In [18]:
np.array(timeStats[1:]).mean()

0.07079772651195526

In [8]:
cam.release()
cv2.destroyAllWindows()

In [9]:
dir(landmarks.left_hand_landmarks)

['ByteSize',
 'Clear',
 'ClearExtension',
 'ClearField',
 'CopyFrom',
 'DESCRIPTOR',
 'DiscardUnknownFields',
 'Extensions',
 'FindInitializationErrors',
 'FromString',
 'HasExtension',
 'HasField',
 'IsInitialized',
 'ListFields',
 'MergeFrom',
 'MergeFromString',
 'ParseFromString',
 'RegisterExtension',
 'SerializePartialToString',
 'SerializeToString',
 'SetInParent',
 'UnknownFields',
 'WhichOneof',
 '_CheckCalledFromGeneratedFile',
 '_SetListener',
 '__class__',
 '__deepcopy__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 '_extensions_by_name',
 '_extensions_by_number',
 'landmark']

3. Feature Extraction (Extract Landmark Values + Calculate Extra Features)

In [43]:

def featureExtraction(landmarks):
    pose_lm = []
    left_hand_lm = []
    right_hand_lm = []
    
    # Calculate centroid from a list of xyz coordinates
    def centroid(arr):
        length = arr.shape[0]
        sum_x = np.sum(arr[:, 0])
        sum_y = np.sum(arr[:, 1])
        sum_z = np.sum(arr[:, 2])
        return sum_x/length, sum_y/length, sum_z/length

    # Perform all landmark extraction operations
    def extractLandmarks(landmarksList, defaultZeros, includeVisibility=False):
        if not landmarksList:
            return np.zeros(defaultZeros), (0, 0, 0)
        
        output = []
        if includeVisibility:
            output = [[each.x, each.y, each.z, each.visibility] for each in landmarksList.landmark]
        else:
            output = [[each.x, each.y, each.z] for each in landmarksList.landmark]
        output = np.array(output)
        return output.flatten(), centroid(output)

    pose_lm, pose_centroid      = extractLandmarks(landmarks.pose_landmarks, defaultZeros=132, includeVisibility=True)
    left_hand_lm, lh_centroid   = extractLandmarks(landmarks.left_hand_landmarks, defaultZeros=63)
    right_hand_lm, rh_centroid  = extractLandmarks(landmarks.right_hand_landmarks, defaultZeros=63)

    return np.concatenate([pose_lm, left_hand_lm, right_hand_lm]), [pose_centroid, lh_centroid, rh_centroid]

xLandmarks, centroids = featureExtraction(landmarks)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[(0.5015471338322668, 1.335449796734434, -0.3009474404384629), (0.5195341791425433, 0.32102417378198533, -0.013014709007466405), (0, 0, 0)]


4. Extract Extra Features

In [12]:
# Calculate the center 
def distance(pointsArr):
    # return np.linalg.norm(np.array(a) - np.array(b))
    pass

testData = np.random.random(size=(258 // 3, 1 * 3)) * 6 - 3

print(min(xLandmarks), max(xLandmarks))
print(np.min(testData), np.max(testData))
print(testData.shape)




-0.6764288544654846 2.961961507797241
-2.991369457452593 2.960440034291155
(86, 3)


In [26]:
print(xLandmarks[:9])
print(landmarks.pose_landmarks.landmark[:3])
import timeit
test = xLandmarks.reshape(258 // 3, 3)
print(test[:9])
print(test.shape)

[ 0.45312276  0.78516418 -0.46312538  0.99993253  0.48683429  0.73243636
 -0.42282629  0.99985081  0.50434029]
[x: 0.4531227648258209
y: 0.7851641774177551
z: -0.4631253778934479
visibility: 0.9999325275421143
, x: 0.4868342876434326
y: 0.7324363589286804
z: -0.42282629013061523
visibility: 0.9998508095741272
, x: 0.5043402910232544
y: 0.7309010624885559
z: -0.42261797189712524
visibility: 0.9998418092727661
]
[[ 0.45312276  0.78516418 -0.46312538]
 [ 0.99993253  0.48683429  0.73243636]
 [-0.42282629  0.99985081  0.50434029]
 [ 0.73090106 -0.42261797  0.99984181]
 [ 0.52128971  0.73023671 -0.4223803 ]
 [ 0.99985176  0.45039302  0.73809391]
 [-0.38673082  0.99984783  0.44226074]
 [ 0.73923188 -0.38610789  0.99982017]
 [ 0.43313536  0.74061579 -0.38619661]]
(86, 3)
