# 1. Import and Install Dependencies

In [None]:
!pip install tensorflow==2.4.1 opencv-python==4.1.2.30 mediapipe

In [1]:
import cv2
import numpy as np
import os
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [4]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                              mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                              )

    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                              )

    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                              )

    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                             )

# 3. Extract Keypoint Values

In [5]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33 * 4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([pose, lh, rh])

# 4. Setup Folders for Collection

In [6]:
DATA_PATH = os.path.join('Dataset') 

signs = np.array(['ا', 'ب', 'ت'])

no_videos = 40

video_length = 30

In [None]:
for sign in signs:
    end = no_videos
    video = 1
    while video < end + 1:
        try:
            os.makedirs(os.path.join(DATA_PATH, sign, str(video)))
        except:
            end += 1
        video += 1

# 5. Collect Keypoint Values for Training and Testing

In [None]:
pause = False
cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.75, min_tracking_confidence=0.75) as holistic:
    
    for sign in signs:

        print(sign)
        total_no_videos = len(os.listdir(os.path.join(DATA_PATH, sign)))
        for video in range(1, total_no_videos + 1):

            for frame_num in range(1, video_length + 1):
                
                npy_path = os.path.join(DATA_PATH, sign, str(video), str(frame_num))
                if os.path.exists(npy_path + '.npy'):
                    break
                
                if cv2.waitKey(10) & 0xFF == ord('p'):
                    pause = True
                    
                if pause:
                    while True:
                        if cv2.waitKey(10) & 0xFF == ord('p'):
                            pause = False
                            break

                ret, frame = cap.read()

                image, results = mediapipe_detection(frame, holistic)

                draw_landmarks(image, results)
                
                if frame_num == 1: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(sign, video), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(1500)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(sign, video), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

                    cv2.imshow('OpenCV Feed', image)
                
                keypoints = extract_keypoints(results)
                np.save(npy_path, keypoints)

    cap.release()
    cv2.destroyAllWindows()

In [None]:
cap.release()
cv2.destroyAllWindows()

# 6. Preprocess Data and Create Labels and Features

In [7]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

KeyboardInterrupt: 

In [8]:
label_map = {label:num for num, label in enumerate(signs)}

In [9]:
videos, labels = [], []
for sign in signs:
    total_no_videos = len(os.listdir(os.path.join(DATA_PATH, sign)))
    
    for video in range(1, total_no_videos + 1):
        window = []
        
        for frame_num in range(1, video_length + 1):
            res = np.load(os.path.join(DATA_PATH, sign, str(video), "{}.npy".format(frame_num)))
            res = np.array(res[-126:])
            window.append(res)
        
        videos.append(window)
        labels.append(label_map[sign])

In [10]:
x = np.array(videos)
x.shape

(240, 30, 126)

In [11]:
y = to_categorical(labels).astype(int)

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

(216, 30, 126)
(216, 6)


# 7. Build and Train LSTM Neural Network

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [14]:
log_dir = os.path.join('Log')
tb_callback = TensorBoard(log_dir=log_dir)

In [9]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30, 126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(signs.shape[0], activation='softmax'))

In [16]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [17]:
history = model.fit(x_train, y_train, epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

Epoch 146/2000
Epoch 147/2000
Epoch 148/2000
Epoch 149/2000
Epoch 150/2000
Epoch 151/2000
Epoch 152/2000
Epoch 153/2000
Epoch 154/2000
Epoch 155/2000
Epoch 156/2000
Epoch 157/2000
Epoch 158/2000
Epoch 159/2000
Epoch 160/2000
Epoch 161/2000
Epoch 162/2000
Epoch 163/2000
Epoch 164/2000
Epoch 165/2000
Epoch 166/2000
Epoch 167/2000
Epoch 168/2000
Epoch 169/2000
Epoch 170/2000
Epoch 171/2000
Epoch 172/2000
Epoch 173/2000
Epoch 174/2000
Epoch 175/2000
Epoch 176/2000
Epoch 177/2000
Epoch 178/2000
Epoch 179/2000
Epoch 180/2000
Epoch 181/2000
Epoch 182/2000
Epoch 183/2000
Epoch 184/2000
Epoch 185/2000
Epoch 186/2000
Epoch 187/2000
Epoch 188/2000
Epoch 189/2000
Epoch 190/2000
Epoch 191/2000
Epoch 192/2000
Epoch 193/2000
Epoch 194/2000
Epoch 195/2000
Epoch 196/2000
Epoch 197/2000
Epoch 198/2000
Epoch 199/2000
Epoch 200/2000
Epoch 201/2000
Epoch 202/2000
Epoch 203/2000
Epoch 204/2000
Epoch 205/2000
Epoch 206/2000
Epoch 207/2000
Epoch 208/2000
Epoch 209/2000
Epoch 210/2000
Epoch 211/2000
Epoch 212/

Epoch 290/2000
Epoch 291/2000
Epoch 292/2000
Epoch 293/2000
Epoch 294/2000
Epoch 295/2000
Epoch 296/2000
Epoch 297/2000
Epoch 298/2000
Epoch 299/2000
Epoch 300/2000
Epoch 301/2000
Epoch 302/2000
Epoch 303/2000
Epoch 304/2000
Epoch 305/2000
Epoch 306/2000
Epoch 307/2000
Epoch 308/2000
Epoch 309/2000
Epoch 310/2000
Epoch 311/2000
Epoch 312/2000
Epoch 313/2000
Epoch 314/2000
Epoch 315/2000
Epoch 316/2000
Epoch 317/2000
Epoch 318/2000
Epoch 319/2000
Epoch 320/2000
Epoch 321/2000
Epoch 322/2000
Epoch 323/2000
Epoch 324/2000
Epoch 325/2000
Epoch 326/2000
Epoch 327/2000
Epoch 328/2000
Epoch 329/2000
Epoch 330/2000
Epoch 331/2000
Epoch 332/2000
Epoch 333/2000
Epoch 334/2000
Epoch 335/2000
Epoch 336/2000
Epoch 337/2000
Epoch 338/2000
Epoch 339/2000
Epoch 340/2000
Epoch 341/2000
Epoch 342/2000
Epoch 343/2000
Epoch 344/2000
Epoch 345/2000
Epoch 346/2000
Epoch 347/2000
Epoch 348/2000
Epoch 349/2000
Epoch 350/2000
Epoch 351/2000
Epoch 352/2000
Epoch 353/2000
Epoch 354/2000
Epoch 355/2000
Epoch 356/

KeyboardInterrupt: 

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 30, 64)            48896     
                                                                 
 lstm_1 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 6)                 198       
                                                                 
Total params: 203,558
Trainable params: 203,558
Non-trai

# 8. Save Weights

In [19]:
model.save('model.h5')

In [20]:
del model

In [11]:
model.load_weights('model.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [12]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [13]:
y_predict = model.predict(x_test)

NameError: name 'x_test' is not defined

In [26]:
y_true = np.argmax(y_test, axis=1).tolist()
y_predict = np.argmax(y_predict, axis=1).tolist()

In [27]:
multilabel_confusion_matrix(y_true, y_predict)

array([[[19,  0],
        [ 0,  5]],

       [[18,  0],
        [ 0,  6]],

       [[22,  1],
        [ 0,  1]],

       [[19,  0],
        [ 1,  4]],

       [[19,  0],
        [ 0,  5]],

       [[22,  0],
        [ 0,  2]]], dtype=int64)

In [28]:
accuracy_score(y_true, y_predict)

0.9583333333333334

# 11. Test in Real Time

In [14]:
def prob_viz(res, signs, input_frame):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), (16,117,245), -1)
        cv2.putText(output_frame, signs[num], (10, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,255,255), 2, cv2.LINE_AA)
        cv2.putText(output_frame, str(round(prob * 100, 2)) + '%', (5, 60+num*40), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [None]:
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.7, min_tracking_confidence=0.7) as holistic:
    while cap.isOpened():

        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, holistic)
        
        draw_landmarks(image, results)
        
        keypoints = extract_keypoints(results)
        keypoints = keypoints[-126:]
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0), verbose=0)[0]
            predictions.append(np.argmax(res))
            
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if signs[np.argmax(res)] != sentence[-1]:
                            print(signs[np.argmax(res)])
                            sentence.append(signs[np.argmax(res)])
                    else:
                        print(signs[np.argmax(res)])
                        sentence.append(signs[np.argmax(res)])

            if len(sentence) > 10: 
                sentence = sentence[-10:]
            
            image = prob_viz(res, signs, image)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        cv2.imshow('OpenCV Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

ل
س
12
1
ل
ب
12
س
12
س
ل
12
س
12
1


In [None]:
cap.release()
cv2.destroyAllWindows()