In [1]:
import mediapipe as mp
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.models import load_model
import cv2

In [2]:
mp_drawings = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [2]:
def HandTracker():
    """
    Function that returns hand tracker object
    """
    mp_drawings = mp.solutions.drawing_utils
    mp_hands = mp.solutions.hands
    hand_tracker = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.8, min_tracking_confidence=0.5)
    return hand_tracker

In [3]:
hand_tracker = HandTracker()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [4]:
def drawLandMarks(results, image):
    """
    Function that draws land marks
    """
    mp_drawings = mp.solutions.drawing_utils
    mp_hands = mp.solutions.hands
    if results.multi_hand_landmarks:
        for num, hand in enumerate(results.multi_hand_landmarks):
            mp_drawings.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS,
            # Joints Color
            mp_drawings.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
            # Line Color
            mp_drawings.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    return image

In [23]:
stream = cv2.VideoCapture(0)

while stream.isOpened():
    ret, frame = stream.read()
    
    image = cv2.flip(frame, 1)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = hand_tracker.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    image = drawLandMarks(results, image)
    cv2.imshow("Data Collection", image)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

stream.release()
cv2.destroyAllWindows()

In [5]:
def extract_keypoints(results):
    '''
    Extract Hand Keypoints
    '''
    hd_keypoints = np.array([[res.x, res.y, res.z] for res in results.multi_hand_landmarks[0].landmark]).flatten() if results.multi_hand_landmarks else np.zeros(21*3)
    return hd_keypoints

In [59]:
result_test = extract_keypoints(results)

In [8]:
import os

In [9]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join(os.getcwd() ,'Hand Gestures') 

# Actions that we try to detect
actions = np.array(['Up', 'Down', 'Right', 'Left', 'Stop'])
# actions = np.array(['Stop'])

# Thirty videos worth of data
no_sequences = 30

# Videos are going to be 30 frames in length
sequence_length = 30

In [10]:
actions

array(['Up', 'Down', 'Right', 'Left', 'Stop'], dtype='<U5')

In [50]:
dt = np.load('/home/sultan/Videos/MediaPipe/Hand Gestures/Stop/12/23.npy')
# print(dt)

In [51]:
print(len(dt))

63


In [16]:
!rm -rf DATA_PATH

In [10]:
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

Collecting Data

In [11]:
lstm = load_model('model.h5')

2022-02-25 17:11:52.285126: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-25 17:11:52.286871: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Data Collection

In [46]:
stream = cv2.VideoCapture(0)

for action in actions:
    for sequence in range(no_sequences):
        for frame_num in range(sequence_length):
            ret, frame = stream.read()
    
            image = cv2.flip(frame, 1)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            results = hand_tracker.process(image)
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            image = drawLandMarks(results, image)
            if frame_num == 0:
                cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('Data Collection', image)
                cv2.waitKey(5000)
            else:
                cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('Data Collection', image)
                
            # NEW Export keypoints
            keypoints = extract_keypoints(results)
            npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
            np.save(npy_path, keypoints)
            
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    stream.release()
    cv2.destroyAllWindows()

In [47]:
stream.release()
cv2.destroyAllWindows()

Preprocessing

In [1]:
!pip install sklearn

Collecting sklearn
  Using cached sklearn-0.0-py2.py3-none-any.whl
Collecting scikit-learn
  Using cached scikit_learn-1.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.4 MB)
Collecting joblib>=0.11
  Using cached joblib-1.1.0-py2.py3-none-any.whl (306 kB)
Collecting threadpoolctl>=2.0.0
  Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn, sklearn
Successfully installed joblib-1.1.0 scikit-learn-1.0.2 sklearn-0.0 threadpoolctl-3.1.0


In [2]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [4]:
import numpy as np
import os

actions = np.array(['Up', 'Down', 'Right', 'Left', 'Stop'])

In [7]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join(os.getcwd() ,'Hand Gestures') 

# Actions that we try to detect
actions = np.array(['Up', 'Down', 'Right', 'Left', 'Stop'])
# actions = np.array(['Stop'])

# Thirty videos worth of data
no_sequences = 30

# Videos are going to be 30 frames in length
sequence_length = 30

In [5]:
label_map = {label:num for num, label in enumerate(actions)}

In [6]:
label_map

{'Up': 0, 'Down': 1, 'Right': 2, 'Left': 3, 'Stop': 4}

In [10]:
sequences, labels = list(), list()
for action in actions:
    for sequence in range(no_sequences):
        res_data = list()
        for frame_num in range(sequence_length):
            res = os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num))
            __res = np.load(res)
            res_data.append(__res)
        sequences.append(res_data)
        labels.append(label_map[action])

In [11]:
print(len(sequences))

150


In [12]:
print(len(labels))

150


In [13]:
lb = to_categorical(labels).astype(int)

In [15]:
sequences = np.array(sequences)
labls = np.array(lb)

In [16]:
sequences.shape

(150, 30, 63)

In [18]:
labls.shape

(150, 5)

In [19]:
x_train, x_test, y_train, y_test = train_test_split(sequences, labls, test_size=0.05)

In [20]:
x_train.shape

(142, 30, 63)

In [21]:
np.save("x_train", x_train)

In [22]:
np.save("x_test", x_test)

In [23]:
np.save("y_train", y_train)

In [24]:
np.save("y_test", y_test)

Detections

In [12]:
# 1. New detection variables
sequence = []
sentence = []
threshold = 0.8

In [None]:
st = cv2.VideoCapture(0)

while st.isOpened():
    ret, frame = st.read()
    
    image = cv2.flip(frame, 1)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = hand_tracker.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    image = drawLandMarks(results, image)
    
    # 2. Prediction logic
    keypoints = extract_keypoints(results)
    sequence.append(keypoints)
    sequence = sequence[-30:]
    
    if len(sequence) == 30:
        res = lstm.predict(np.expand_dims(sequence, axis=0))[0]
        print(actions[np.argmax(res)])
    
    # Show to screen
    cv2.imshow('OpenCV Feed', image)

    # Break gracefully
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
        
st.release()
cv2.destroyAllWindows()

2022-02-25 17:12:13.998656: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Up
Up
Up
Up
Up
Up
Up
Up
Up
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Stop
Up
Up
Up
Up
Right
Right
Right
Right
Right
Right
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Right
Down
Down
Down
Down
Down
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Up
Down
Down
Down
Down
Down
Down
Down
Down
Right
Right
Right
Right
Right
Right
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Down
Right
Right
Down
Down
