In [1]:
import cv2
from time import time
import numpy as np
from itertools import chain
import traceback
from cvzone import FPS

In [2]:
from cvzone.HandTrackingModule import HandDetector
from cvzone.FaceDetectionModule import FaceDetector
from cvzone.PoseModule import PoseDetector

In [3]:
from collections import deque
from concurrent.futures import ThreadPoolExecutor

In [4]:
# Flatten a 2d np array into 1d array
def flatten2dList(arr, dataType=int):
    return np.fromiter(chain.from_iterable(arr), dataType)

In [5]:
# Get the largest absolute value in an np array
def getAbsLargestVal(arr):
    return np.max(np.abs(arr))

In [6]:
# Offset and normalize the landmark list
# Returns a 1d numpy array
def preprocess_landmarks(landmark_list):    
    landmark_list = np.array(landmark_list, dtype=float)
    origin = landmark_list[0]
    
    # Offset every point with respect to the first point
    # Convert to 1D-array
    new_landmark_list = (landmark_list - origin).ravel()
    
    # Get highest absolute value
    largest_value = getAbsLargestVal(new_landmark_list)
    
    # Normalization
    if largest_value != 0:
        return new_landmark_list / largest_value
    return new_landmark_list

In [7]:
# Offset and normalize a BBOX list (BBOX = Bounding Box, used in face and hand detection)
# Returns a 1d numpy array
def preprocess_bbox(bbox, frameSize):
    bbox = np.array(bbox, dtype=float)
    # Convert 3rd and 4th element into coordinates instead of width/height
    bbox[2] = bbox[0] + bbox[2]
    bbox[3] = bbox[1] + bbox[3]

    # Normalize against frame size
    bbox[0] /= frameSize[0]
    bbox[1] /= frameSize[1]
    bbox[2] /= frameSize[0]
    bbox[3] /= frameSize[1]

    return bbox

In [8]:
# Normalize a center vertex (a list of 2 elements)
# Returns a 1d numpy array
def preprocess_center(center, frameSize):
    center = np.array(center, dtype=float)
    center[0] /= frameSize[0]
    center[1] /= frameSize[1]
    return center

In [9]:
# Preprocess (Offset and normalize) the body's landmark list, bbox and center
def preprocess_body_part(bodyPart, frameSize):
    bodyPart['lmList'] = preprocess_landmarks(bodyPart['lmList'])
    bodyPart['bbox'] = preprocess_bbox(bodyPart['bbox'], frameSize)
    bodyPart['center'] = preprocess_center(bodyPart['center'], frameSize)
    return bodyPart

In [10]:
# Function to generate empty/placeholder data for a hand 
# Used when a hand is not detected in frame
def generate_empty_hand(type):
    return {
        'lmList': np.zeros(21 * 3, dtype=int), 
        'bbox': np.zeros(4, dtype=float), 
        'center': np.zeros(2, dtype=float), 
        'type': type
    }

In [11]:
# Select the best matching face, aka the one with the best score (clarity)
# and closest to the center of the screen
# Since the Neural network will be design to only accept one face
def select_best_matching_face(faces, frameSize):
    if not faces or len(faces) == 0:
        return False
    elif len(faces) == 1:
        return faces[0]
    
    def difference(a, b):
        return ((a[0] - b[0])**2) + ((a[1] - b[1])**2)
    
    frameCenter = (frameSize[0] / 2, frameSize[1] / 2)

    best_score = faces[0]
    best_center = faces[0]
    center_diff = difference(faces[0]['center'], frameCenter)

    for each in faces:
        if difference(each['center'], frameCenter) < center_diff:
            best_center = each
        if each['score'][0] > best_score['score'][0]:
            best_score = each
    
    if best_center['score'][0] > 0.5:
        return best_center
    return best_score

In [12]:
# Flatten everything
def flattenDetectionResult(obj):
    return np.concatenate([obj['lmList'], obj['bbox'], obj['center']])

In [13]:


# Detects hands, face & pose, 
# convert them into normalized landmark/keypoint coordinates in a 1D-array, 
# and also returns the frame with the landmark connections drawn onto it

# Serial/Unparallelised version (Old version)
def featureExtractionV3(handDetector, faceDetector, poseDetector, frame, draw=True):
    def detectHands(handDetector, frame, frameSize, draw):
        results = None
        # Hand Detection
        if (draw):
            results, frame = handDetector.findHands(frame, draw=draw)
        else:
            results = handDetector.findHands(frame, draw=draw)

        if not results:
            results = [generate_empty_hand('Left'), generate_empty_hand('Right')]
        elif len(results) == 1:
            if (results[0]['type'] == 'Left'):
                results[0] = preprocess_body_part(results[0], frameSize)
                results.append(generate_empty_hand('Right'))
            else:
                results[0] = preprocess_body_part(results[0], frameSize)
                results.insert(0, generate_empty_hand('Left'))                         
        else:
            results[0] = preprocess_body_part(results[0], frameSize)
            results[1] = preprocess_body_part(results[1], frameSize)
        return results

    # Pose Detection
    # **We only use the first 23 out of the total 33 landmark points 
    #   as those represent the lower half body and are irrelevant to sign language interpretation
    def detectPose(poseDetector, frame, draw):
        frame = poseDetector.findPose(frame, draw=draw)
        results, _ = poseDetector.findPosition(frame, bboxWithHands=False)
        if results:
            results = preprocess_landmarks(results[:23])
        else:
            results = np.zeros(23, dtype=int)
        return results
    
    # Face Detection
    def detectFace(faceDetector, frame, frameSize, draw):
        frame, results = faceDetector.findFaces(frame, draw=draw)
        if results:
            results = select_best_matching_face(results, frameSize)
            results['bbox'] = preprocess_bbox(results['bbox'], frameSize)
            results['center'] = preprocess_center(results['center'], frameSize)
        else:
            results = {
                'bbox': np.zeros(4, dtype=float), 
                'center': np.zeros(2, dtype=float)
            }
        return results

    frameSize = (frame.shape[1], frame.shape[0])
    with ThreadPoolExecutor() as executor:
        t1 = executor.submit(detectHands, handDetector, frame, frameSize, draw)
        t2 = executor.submit(detectPose, poseDetector, frame, draw)
        t3 = executor.submit(detectFace, faceDetector, frame, frameSize, draw)
        
        # Convert results into 1D-array
        detectionResults = flatten2dList([
            flattenDetectionResult(t1.result()[0]), 
            flattenDetectionResult(t1.result()[1]), 
            t2.result(), 
            t3.result()['bbox'],
            t3.result()['center'],
            t3.result()['center'] - t1.result()[0]['center'],
            t3.result()['center'] - t1.result()[1]['center']
        ], dataType=float)

        return detectionResults, frame

In [39]:
# Detectors
handDetector = HandDetector(detectionCon=0.5, maxHands=2)
faceDetector = FaceDetector(minDetectionCon=0.5)
poseDetector = PoseDetector(detectionCon=0.5)

In [41]:
from constants import FRAMES_PER_TRAINING

In [40]:
cam = cv2.VideoCapture(0, cv2.CAP_DSHOW)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

fpsReader = FPS()


try:
    keypointsHistory = deque()
    predictionHistory = deque()
    timestampHistory = deque()

    detectionThreshold = 1.0

    lastPredictionTime = time()
    predictionCooldown = 1
    
    while True:

        # Read from camera
        success, frame = cam.read()

        # Pose Detection
        detectionResults, frame = featureExtractionV3(
            handDetector, faceDetector, poseDetector, frame)
        
        timestampHistory.append(int(time() * 1000))
        
        # Semantic Prediction
        keypointsHistory.append(detectionResults)
        if len(keypointsHistory) > FRAMES_PER_TRAINING:
            keypointsHistory.popleft()
            timestampHistory.popleft()
            # Run prediction
        
        cv2.putText(frame, ', '.join(predictionHistory), (15, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
        
        fps, frame = fpsReader.update(frame,pos=(50,80),color=(0,255,0),scale=5,thickness=5)

        cv2.imshow("Sign Language Recognition Prototype", frame)     


        keyPressed = cv2.waitKey(10)
        if (keyPressed == 27):
            raise Exception("Finished")
        


except Exception as e:
    print(e)
    traceback.print_exc()
finally:
    cam.release()
    cv2.destroyAllWindows()


Finished


Traceback (most recent call last):
  File "C:\Users\USER\AppData\Local\Temp\ipykernel_24372\1185570273.py", line 29, in <module>
    raise Exception("Finished")
Exception: Finished


In [17]:
frame.shape

(720, 1280, 3)

In [30]:
"""
Hand Tracking Module
By: Computer Vision Zone
Website: https://www.computervision.zone/
"""

import cv2
import mediapipe as mp
import math
from concurrent.futures import ThreadPoolExecutor

class MyHandDetector:
    """
    Finds Hands using the mediapipe library. Exports the landmarks
    in pixel format. Adds extra functionalities like finding how
    many fingers are up or the distance between two fingers. Also
    provides bounding box info of the hand found.
    """

    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, minTrackCon=0.5):
        """
        :param mode: In static mode, detection is done on each image: slower
        :param maxHands: Maximum number of hands to detect
        :param detectionCon: Minimum Detection Confidence Threshold
        :param minTrackCon: Minimum Tracking Confidence Threshold
        """
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.minTrackCon = minTrackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=self.mode, max_num_hands=self.maxHands,
                                        min_detection_confidence=self.detectionCon,
                                        min_tracking_confidence=self.minTrackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]
        self.fingers = []
        self.lmList = []

    def findHands(self, img, draw=True, flipType=True):
        """
        Finds hands in a BGR image.
        :param img: Image to find the hands in.
        :param draw: Flag to draw the output on the image.
        :return: Image with or without drawings
        """

        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)
        allHands = []
        h, w, c = img.shape

        if self.results.multi_hand_landmarks:
            for handType, handLms in zip(self.results.multi_handedness, self.results.multi_hand_landmarks):
                myHand = {}
                ## lmList
                mylmList = []
                xList = []
                yList = []
                for id, lm in enumerate(handLms.landmark):
                    px, py, pz = int(lm.x * w), int(lm.y * h), int(lm.z * w)
                    mylmList.append([px, py, pz])
                    xList.append(px)
                    yList.append(py)

                ## bbox
                xmin, xmax = min(xList), max(xList)
                ymin, ymax = min(yList), max(yList)
                boxW, boxH = xmax - xmin, ymax - ymin
                bbox = xmin, ymin, boxW, boxH
                cx, cy = bbox[0] + (bbox[2] // 2), \
                         bbox[1] + (bbox[3] // 2)

                myHand["lmList"] = mylmList
                myHand["bbox"] = bbox
                myHand["center"] = (cx, cy)

                if flipType:
                    if handType.classification[0].label == "Right":
                        myHand["type"] = "Left"
                    else:
                        myHand["type"] = "Right"
                else:
                    myHand["type"] = handType.classification[0].label
                allHands.append(myHand)

                ## draw
                if draw:

                    global x, y
                    x = handLms
                    y = self.mpHands.HAND_CONNECTIONS
                    self.mpDraw.draw_landmarks(img, handLms,
                                               self.mpHands.HAND_CONNECTIONS)
                    cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
                                  (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
                                  (255, 0, 255), 2)
                    cv2.putText(img, myHand["type"], (bbox[0] - 30, bbox[1] - 30), cv2.FONT_HERSHEY_PLAIN,
                                2, (255, 0, 255), 2)
        
        if draw:
            return allHands, img
        else:
            return allHands
        

    def fingersUp(self, myHand):
        """
        Finds how many fingers are open and returns in a list.
        Considers left and right hands separately
        :return: List of which fingers are up
        """
        myHandType = myHand["type"]
        myLmList = myHand["lmList"]
        if self.results.multi_hand_landmarks:
            fingers = []
            # Thumb
            if myHandType == "Right":
                if myLmList[self.tipIds[0]][0] > myLmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)
            else:
                if myLmList[self.tipIds[0]][0] < myLmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)

            # 4 Fingers
            for id in range(1, 5):
                if myLmList[self.tipIds[id]][1] < myLmList[self.tipIds[id] - 2][1]:
                    fingers.append(1)
                else:
                    fingers.append(0)
        return fingers

    def findDistance(self, p1, p2, img=None):
        """
        Find the distance between two landmarks based on their
        index numbers.
        :param p1: Point1
        :param p2: Point2
        :param img: Image to draw on.
        :param draw: Flag to draw the output on the image.
        :return: Distance between the points
                 Image with output drawn
                 Line information
        """

        x1, y1 = p1
        x2, y2 = p2
        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
        length = math.hypot(x2 - x1, y2 - y1)
        info = (x1, y1, x2, y2, cx, cy)
        if img is not None:
            cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
            cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
            cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
            cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
            return length, info, img
        else:
            return length, info


In [31]:

hd = MyHandDetector(detectionCon=0.5, maxHands=2)
global x, y

In [32]:
cam = cv2.VideoCapture(0, cv2.CAP_DSHOW)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

fpsReader = FPS()

timeStats = []

try:
    initialTime = time()
    while True:
        startTime = time()

        # Read from camera
        success, frame = cam.read()
        nFrame = np.copy(frame)
        
        # Pose Detection
        hands, frame = hd.findHands(frame, draw=True)
        
        
        fps, frame = fpsReader.update(frame,pos=(50,80),color=(0,255,0),scale=5,thickness=5)

        cv2.imshow("Sign Language Recognition Prototype", frame)     

        timeStats.append(time() - startTime)

        keyPressed = cv2.waitKey(10)
        if (keyPressed == 27):
            raise Exception("Finished")
        


except Exception as e:
    print(e)
    traceback.print_exc()
finally:
    cam.release()
    cv2.destroyAllWindows()


Finished


Traceback (most recent call last):
  File "C:\Users\USER\AppData\Local\Temp\ipykernel_13996\1663665762.py", line 30, in <module>
    raise Exception("Finished")
Exception: Finished


In [34]:
type(x)

mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList

# Testing CSV

In [5]:
import csv

with open('../reference/list.csv', 'r') as file:
    reader = csv.reader(file)
    
    # Iterate through each row in the CSV file
    for row in reader:
        # Each row is a list of values
        print(f"{row[0]},{row[1]}")


0,hello
1,good/thank
2,help
3,I/me
4,please
5,sorry
6,welcome
7,welcome
8,ok
9,what
10,what
11,what
12,thank you very much
13,deaf
14,do not
15,feel
16,eat/food
17,eat a lot
18,tired
19,because
20,sick
21,drink
22,drink
23,apple
24,banana
25,drive
26,again
27,also
28,ask
29,yes
30,no
31,man
32,man
33,woman
34,woman
35,he/she
36,bad
37,have/has/had (formal)
38,have/has/had (informal)
39,when
40,where
41,which
42,who
43,why
44,how
45,you
46,boy
47,girl
48,friend
49,finish/complete
50,finish/complete
51,forget
52,forget
53,give
54,give you
55,give me
56,go
57,get
58,understand/comprehend
59,use
60,will
61,with
62,wait
63,work
64,they
65,their
66,school
67,write
68,send text/message
69,email
70,email
71,home
72,but
73,should
74,not
75,my
76,name
77,like
78,say
79,cold
80,hot
81,family
82,mother
83,father
84,many
85,few
86,now
87,later
88,today
89,tomorrow
90,yesterday
91,same/also
92,remember
93,your
94,more
95,meet
96,see
97,slow
98,fast/quick
99,some
100,store/shop
101,take
102,take/brin

# 16/11/2023

In [2]:
from constants import KEYPOINTS_PATH
import os
import numpy as np

foundEmpty = 0

for each in os.listdir(KEYPOINTS_PATH):
    thisDir = os.path.join(KEYPOINTS_PATH, each)
    
    for f in os.listdir(thisDir):
        thisF = os.path.join(thisDir, f)
        if os.path.splitext(thisF)[1].lower() == '.npy':

            load_data = np.load(thisF)
            for i, x in enumerate(load_data):
                if (np.sum(x) >= -0.00001 and np.sum(x) <= 0.00001):
                    foundEmpty += 1
                    print(each, i)
                    print(x)
            

foundEmpty


0

# 22/11/223

In [18]:
import numpy as np

ogFrame = np.load('sampleRawFrame.npy')
newFrame = np.load('../test_img_2.npy')

In [19]:
import cv2

In [21]:
while True:

    cv2.imshow("Title", cv2.flip(newFrame, 1))
    
    keyPressed = cv2.waitKey(10)
    # Stop Program when pressed 'Esc'
    if (keyPressed == 27):
        cv2.destroyAllWindows() 
        break
    

In [11]:
ogFrame.shape

(720, 1280, 3)

In [10]:
newFrame.shape

(360, 640, 3)

In [16]:
from concurrent.futures import ThreadPoolExecutor

def func():
    for i in range(10):
        ...
    return

with ThreadPoolExecutor() as executor:
    t1 = executor.submit(func)
    print(t1)
    print(type(t1))
    print(executor._threads)
    for t in executor._threads:
        print(type(t), t)

<Future at 0x2300f7d7b80 state=finished returned NoneType>
<class 'concurrent.futures._base.Future'>
{<Thread(ThreadPoolExecutor-2_0, started 22684)>}
<class 'threading.Thread'> <Thread(ThreadPoolExecutor-2_0, started 22684)>


In [17]:
from threading import Thread

t = Thread(target=func)
t.start()

print(type(t), t)


<class 'threading.Thread'> <Thread(Thread-5, stopped 19944)>
