In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False                  # non - writeable mode
    results = model.process(image)                 # prediction
    image.flags.writeable = True                   # writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results


In [4]:
def draw_styled_landmarks(img, results):
    mp_drawing.draw_landmarks(img, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(80,44,121),thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(img, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121,44,250),thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

In [5]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])


In [15]:
DATA_PATH = os.path.join('PHRASES_DATASET')
actions = np.array(['hello', 'bye', 'yes', 'no', 'please'])
no_sequence = 40
sequence_length = 20
WIDTH = 640
HEIGHT = 480

In [16]:
for action in actions:
    for sequence in range(no_sequence):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [13]:
cap = cv2.VideoCapture(0)

# Access mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action in actions:
        for sequence in range(29, no_sequence):
            cv2.waitKey(1000)
            for frame_num in range(sequence_length):
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                # Landmarks
                draw_styled_landmarks(image, results)
                
                if frame_num == 0:
                    cv2.waitKey(400)
    
                    cv2.putText(image, 'frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

                else:
                    cv2.putText(image, 'frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)


                
                # keypoint extraction
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)
                
                # Show the frame
                cv2.imshow('OpenCV Feed', image)
                

                # Release on q
                if cv2.waitKey(10) & 0xFF == ord('q'):
                             break
    cap.release()
    cv2.destroyAllWindows()

In [12]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [13]:
label_map = {label: num for num, label in enumerate(actions)}

In [14]:
label_map

{'hello': 0, 'bye': 1, 'yes': 2, 'no': 3, 'please': 4}

In [17]:
import random

def get_random_list(sequence_length=90, sequence=60):   
    num_list = []

    while len(num_list) < sequence:
        num = random.randint(0, sequence_length-1)
        if num not in num_list:
            num_list.append(num)

    return num_list


In [17]:
sequences, labels = [], []

for action in actions:
    for sequence in get_random_list(40, 20):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), str(frame_num) + '.npy'))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [18]:
np.array(sequences).shape

(150, 30, 1662)

In [19]:
np.array(labels).shape

(150,)

In [20]:
X = np.array(sequences)
y = to_categorical(labels).astype(int)

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.05)

In [22]:
y_test.shape

(8, 5)

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [18]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(20,258)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))




In [27]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [29]:
model.fit(X_train, y_train, epochs=300)

Epoch 1/300


2023-05-14 22:03:42.806241: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 7

KeyboardInterrupt: 

In [11]:
model.save('')



In [20]:
model.load_weights('/Users/ablayaman/Desktop/diploma_asl_project/ASL_PHRASES/phrases_052023.h5')

In [21]:
import random
import pyperclip
def generate_colors(n_colors):
    colors = []
    for i in range(n_colors):
        r = random.randint(0, 255)
        g = random.randint(0, 255)
        b = random.randint(0, 255)
        colors.append([r,g,b])
    return colors

In [22]:
colors = generate_colors(len(actions))
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, f'{actions[num]} {str(prob*100)[:4]}%', (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [23]:
sequence = []
confirmed = []
predictions = []
sentence = []
threshold = 0.7
counter = 0
cap = cv2.VideoCapture(0)
cap.set(3, WIDTH)
cap.set(4, HEIGHT)
sentence_len = 0
# mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        
        # detections
        image, results = mediapipe_detection(frame, holistic)        
        # landmarks
        draw_styled_landmarks(image, results)
        
#       Make Predictions 
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-20:]
        if len(sequence) == 20:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            predictions.append(np.argmax(res))

            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold:
                    if len(sentence) == 0:
                        sentence.append(actions[np.argmax(res)])
                    else:
                        sentence[-1] = actions[np.argmax(res)]
#                     else:
#                         sentence.append(actions[np.argmax(res)])
            if cv2.waitKey(1) == 13:
                sentence.append(actions[np.argmax(res)])
                confirmed = sentence[:-1]
                counter += 1
            if len(sentence) > 10: 
                sentence = sentence[-10:]

            image = prob_viz(res, actions, image, colors)
            cv2.rectangle(image, (0,0), (1280, 50), (245, 117, 16), -1)
            cv2.putText(image, ' '.join(sentence), (3,30), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            cv2.putText(image, ' '.join(confirmed), (3,30), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, ( 0, 255, 0), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('ASL Detection', image)

        # Break
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        elif cv2.waitKey(1) & 0xFF == ord('c'):
            pyperclip.copy(' '.join(sentence))
        elif cv2.waitKey(1) & 0xFF == ord('s'):
            if len(sentence) != 0:
                save_signed_text(' '.join(sentence[:-1]))
            
            
            
    cap.release()
    cv2.destroyAllWindows()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
2023-05-17 17:12:37.986966: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz






KeyboardInterrupt: 