In [285]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp 

In [286]:
mp_holist = mp.solutions.holistic 
mp_draw = mp.solutions.drawing_utils

In [287]:
def mediapipe_detection(img, model):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img.flags.writeable = False                 
    result = model.process(img)                 # Make prediction
    img.flags.writeable = True                   
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 
    return img, result

In [288]:
def draw_landmarks(img, result):
    mp_draw.draw_landmarks(img, result.face_landmarks, mp_holist.FACEMESH_CONTOURS) # Draw face connections
    mp_draw.draw_landmarks(img, result.pose_landmarks, mp_holist.POSE_CONNECTIONS) # Draw pose connections
    mp_draw.draw_landmarks(img, result.left_hand_landmarks, mp_holist.HAND_CONNECTIONS) # Draw left hand connections
    mp_draw.draw_landmarks(img, result.right_hand_landmarks, mp_holist.HAND_CONNECTIONS) # Draw right hand connections

In [289]:
def draw_styled_landmarks(img, result):
    mp_draw.draw_landmarks(img, result.face_landmarks, mp_holist.FACEMESH_CONTOURS, 
                             mp_draw.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), # color the joint 
                             mp_draw.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) #color the connection
                             ) 
    # mp_draw.draw_landmarks(img, result.face_landmarks, mp_holist.FACEMESH_CONTOURS, 
    #                          mp_draw.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), # color the joint 
    #                          mp_draw.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) #color the connection
    #                          ) 
    
    mp_draw.draw_landmarks(img, result.pose_landmarks, mp_holist.POSE_CONNECTIONS,
                             mp_draw.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_draw.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    mp_draw.draw_landmarks(img, result.left_hand_landmarks, mp_holist.HAND_CONNECTIONS, 
                             mp_draw.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_draw.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    mp_draw.draw_landmarks(img, result.right_hand_landmarks, mp_holist.HAND_CONNECTIONS, 
                             mp_draw.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_draw.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [290]:
mp_holist.POSE_CONNECTIONS

frozenset({(0, 1),
           (0, 4),
           (1, 2),
           (2, 3),
           (3, 7),
           (4, 5),
           (5, 6),
           (6, 8),
           (9, 10),
           (11, 12),
           (11, 13),
           (11, 23),
           (12, 14),
           (12, 24),
           (13, 15),
           (14, 16),
           (15, 17),
           (15, 19),
           (15, 21),
           (16, 18),
           (16, 20),
           (16, 22),
           (17, 19),
           (18, 20),
           (23, 24),
           (23, 25),
           (24, 26),
           (25, 27),
           (26, 28),
           (27, 29),
           (27, 31),
           (28, 30),
           (28, 32),
           (29, 31),
           (30, 32)})

In [291]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        draw_styled_landmarks(image, results)

        cv2.imshow('OpenCV Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()



<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

In [292]:
cap.release()
cv2.destroyAllWindows()

In [293]:
results.pose_landmarks.landmark[0].visibility

0.9989380240440369

In [294]:
len(results.pose_landmarks.landmark)

33

In [295]:
def extract_keypoints(results):
    pose=np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    left_hnd=np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hnd=np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    face=np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    return np.concatenate([pose,left_hnd,right_hnd,face])
# concatenating for the model to detect the sign language

In [296]:
extract_keypoints(results).shape

(1662,)

In [297]:
import os 
video_dir = "C:/Users/araya/Desktop/keypoints/video_extract"
video_list = []
video_list = os.listdir(video_dir)

len(video_list)

7

In [298]:
video_list

['กฎกระทรวง.mp4',
 'กฎหมายรัฐธรรมนูญ.mp4',
 'กรมอนามัย.mp4',
 'กระโดด.mp4',
 'เขิน.mp4',
 'แนะนำ.mp4',
 'โกหก.mp4']

In [299]:
# Path for exported data, numpy arrays
Model_Data=os.path.join('Data for different actions')

actions = np.array(video_list)

no_of_seqs = 30

# 30 frames in length
seq_length = 30

In [300]:
actions

array(['กฎกระทรวง.mp4', 'กฎหมายรัฐธรรมนูญ.mp4', 'กรมอนามัย.mp4',
       'กระโดด.mp4', 'เขิน.mp4', 'แนะนำ.mp4', 'โกหก.mp4'], dtype='<U20')

In [301]:
# just creating the folders and sub folders

for action in actions: 
    try: 
        os.makedirs(os.path.join(Model_Data, action))
    except:
        pass

Collecting keypoint values for Training nd Testing

In [302]:
# Define the directory where your videos are stored
directory = "C:/Users/araya/Desktop/keypoints/video_extract"

In [303]:
directory

'C:/Users/araya/Desktop/keypoints/video_extract'

In [304]:
for filename in actions:
    print(directory + '/' + filename)

C:/Users/araya/Desktop/keypoints/video_extract/กฎกระทรวง.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กฎหมายรัฐธรรมนูญ.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กรมอนามัย.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กระโดด.mp4
C:/Users/araya/Desktop/keypoints/video_extract/เขิน.mp4
C:/Users/araya/Desktop/keypoints/video_extract/แนะนำ.mp4
C:/Users/araya/Desktop/keypoints/video_extract/โกหก.mp4


In [305]:
# Set mediapipe model 
for action in actions:
    video_path = os.path.join("C:/Users/araya/Desktop/keypoints/video_extract", action)
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        continue

    with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        for seq in range(no_of_seqs):
            for frame_num in range(seq_length):

                ret, frame = cap.read()
                if not ret:
                    print(f"End of video {video_path}")
                    break
                
                img, results = mediapipe_detection(frame, holistic)
                draw_styled_landmarks(img, results)

                if frame_num == 0: 
                    cv2.putText(img, 'DATA COLLECTION STARTED', (120,200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('OpenCV Window', img)
                    cv2.waitKey(2000)  # 2 seconds delay for setup
                else: 
                    cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('OpenCV Window', img)

                keypoints = extract_keypoints(results)
                npy_path = os.path.join(Model_Data, action, f"frame_{frame_num}.npy")
                os.makedirs(os.path.dirname(npy_path), exist_ok=True)
                np.save(npy_path, keypoints)

                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break

            if not ret:
                break

    cap.release()
    cv2.destroyAllWindows()

End of video C:/Users/araya/Desktop/keypoints/video_extract\กฎกระทรวง.mp4
End of video C:/Users/araya/Desktop/keypoints/video_extract\กฎหมายรัฐธรรมนูญ.mp4
End of video C:/Users/araya/Desktop/keypoints/video_extract\กรมอนามัย.mp4
End of video C:/Users/araya/Desktop/keypoints/video_extract\กระโดด.mp4
End of video C:/Users/araya/Desktop/keypoints/video_extract\เขิน.mp4
End of video C:/Users/araya/Desktop/keypoints/video_extract\แนะนำ.mp4
End of video C:/Users/araya/Desktop/keypoints/video_extract\โกหก.mp4


In [241]:
# # Loop through all files in the directory
# for filename in os.listdir(directory):
#     # Check if the file is a video by checking its extension
#     if filename.endswith(('.mp4', '.avi', '.mkv', '.mov')):
#         print(f"Processing {filename}...")
#         video_path = os.path.join(directory, filename)

#         class_name = os.path.splitext(os.path.basename(video_path))[0]
        
#         cap = cv2.VideoCapture(video_path)
#         with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
#             for action in actions:
#                 for seq in range(no_of_seqs):
#                     for frame_num in range(seq_length):
#                         ret, frame = cap.read()
#                         if not ret:
#                             print("Error: Failed to read frame.")
#                             break  # Exit the loop if frame read fails
                        
#                         img, results = mediapipe_detection(frame, holistic)
#                         draw_styled_landmarks(img, results)

#                         # logic is for the formatting portion
#                         if frame_num == 0: 
#                             cv2.putText(img, 'DATA COLLECTION STARTED', (120,200), 
#                                     cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
#                             cv2.putText(img, 'Collecting frames for - {} and Sequence Number - {}'.format(action, seq), (15,12), 
#                                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
#                             # Show to screen
#                             cv2.imshow('OpenCV Window', img)
#                             # providing the break for adjusting the posture
#                             cv2.waitKey(2000) # 2 sec
#                         else: 
#                             cv2.putText(img, 'Collecting frames for - {} and Sequence Number - {}'.format(action, seq), (15,12), 
#                                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
#                             # Show to screen
#                             cv2.imshow('OpenCV Window', img)

#                         keypoints = extract_keypoints(results)
#                         npy_path = os.path.join(Model_Data, action, str(seq), str(frame_num))
#                         np.save(npy_path, keypoints)

#                         if cv2.waitKey(10) & 0xFF == ord('q'):
#                             break

#         cap.release()
#         cv2.destroyAllWindows()


In [306]:
cap.release()
cv2.destroyAllWindows()

In [307]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [308]:
labelMap = {label:num for num, label in enumerate(actions)}

In [309]:
labelMap

{'กฎกระทรวง.mp4': 0,
 'กฎหมายรัฐธรรมนูญ.mp4': 1,
 'กรมอนามัย.mp4': 2,
 'กระโดด.mp4': 3,
 'เขิน.mp4': 4,
 'แนะนำ.mp4': 5,
 'โกหก.mp4': 6}

In [310]:
seqs, labels = [], []

for action in actions:
    for seq in range(no_of_seqs):
        window = []
        for frame_num in range(seq_length):
            res = np.load(os.path.join(Model_Data, action, f"frame_{frame_num}.npy")) 
            window.append(res)
        seqs.append(window)

        labels.append(labelMap[action])

In [311]:
np.array(seqs).shape

(210, 30, 1662)

In [312]:
X_data = np.array(seqs)

In [313]:
X_data.shape

(210, 30, 1662)

In [314]:
# changing the labels from 0,1,2 to categorical data for easier accessebility
Y_label = to_categorical(labels).astype(int)
Y_label

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [315]:
# splitting
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_label, test_size=0.05)
X_test.shape

(11, 30, 1662)

### Building LSTM

In [316]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [317]:
# adding the logs folder
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [318]:
# neural network

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))

# adding 64 units for dense layer
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

  super().__init__(**kwargs)


In [319]:
# eg
eg_res = [.7, 0.2, 0.1]
actions[np.argmax(eg_res)]

'กฎกระทรวง.mp4'

In [320]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [321]:
model.fit(X_train, Y_train, epochs=300, callbacks=[tb_callback])
# tensorboard --logdir=.

Epoch 1/300
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - categorical_accuracy: 0.1205 - loss: 1.9942
Epoch 2/300
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - categorical_accuracy: 0.1464 - loss: 1.9509
Epoch 3/300
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - categorical_accuracy: 0.2020 - loss: 1.9293
Epoch 4/300
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - categorical_accuracy: 0.2276 - loss: 1.9096
Epoch 5/300
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - categorical_accuracy: 0.2921 - loss: 1.8199
Epoch 6/300
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - categorical_accuracy: 0.3540 - loss: 1.8986
Epoch 7/300
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - categorical_accuracy: 0.2906 - loss: 1.7712
Epoch 8/300
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - cate

<keras.src.callbacks.history.History at 0x1e28b61c380>

In [322]:
model.summary()

In [323]:
res=model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


In [341]:
# again the actions with the max value provided by softmax is returned
actions[np.argmax(res[0])]

'กระโดด.mp4'

In [342]:
actions[np.argmax(Y_test[0])]

'กระโดด.mp4'

In [326]:
X_test

array([[[ 0.47638127,  0.19754326, -1.32645559, ...,  0.54877836,
          0.16237868,  0.01577924],
        [ 0.47429261,  0.19735849, -1.32296157, ...,  0.54754186,
          0.16199362,  0.01551868],
        [ 0.47315758,  0.19736215, -1.31736887, ...,  0.54612029,
          0.16153097,  0.01508246],
        ...,
        [ 0.48569536,  0.19889912, -1.3363111 , ...,  0.55318135,
          0.16275555,  0.01583626],
        [ 0.48244408,  0.19824874, -1.32999432, ...,  0.55133295,
          0.16267788,  0.01602019],
        [ 0.47920948,  0.19772166, -1.32880759, ...,  0.54987949,
          0.16222292,  0.01524676]],

       [[ 0.5043326 ,  0.23381612, -1.24909925, ...,  0.56405097,
          0.20149724,  0.01034496],
        [ 0.50158727,  0.23035756, -1.24011362, ...,  0.5624519 ,
          0.19958371,  0.00978365],
        [ 0.50042224,  0.22890861, -1.23436153, ...,  0.56124794,
          0.19810659,  0.00918289],
        ...,
        [ 0.51122499,  0.24414708, -1.2697736 , ...,  

In [327]:
Y_test

array([[0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 1]])

### Evaluate

In [328]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [329]:
Y_hat = model.predict(X_train)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


In [330]:
Y_true = np.argmax(Y_train, axis=1).tolist()
# one hot encoding
Y_hat = np.argmax(Y_hat, axis=1).tolist()

In [331]:
Y_hat

[1,
 3,
 1,
 1,
 6,
 4,
 5,
 1,
 3,
 2,
 1,
 2,
 0,
 2,
 3,
 3,
 4,
 0,
 0,
 4,
 2,
 5,
 6,
 2,
 6,
 1,
 2,
 1,
 0,
 4,
 3,
 5,
 5,
 4,
 4,
 5,
 0,
 5,
 5,
 2,
 6,
 5,
 2,
 3,
 3,
 2,
 4,
 0,
 1,
 2,
 1,
 0,
 0,
 0,
 0,
 2,
 6,
 1,
 5,
 1,
 4,
 3,
 5,
 4,
 3,
 6,
 0,
 6,
 2,
 5,
 1,
 5,
 4,
 3,
 2,
 0,
 6,
 4,
 4,
 4,
 6,
 4,
 2,
 6,
 0,
 2,
 0,
 6,
 0,
 3,
 0,
 6,
 6,
 3,
 5,
 6,
 2,
 5,
 3,
 6,
 3,
 1,
 5,
 5,
 4,
 1,
 2,
 2,
 2,
 4,
 0,
 3,
 6,
 4,
 5,
 1,
 6,
 1,
 1,
 6,
 2,
 1,
 0,
 3,
 6,
 3,
 1,
 0,
 1,
 1,
 4,
 6,
 1,
 6,
 0,
 5,
 4,
 5,
 2,
 5,
 1,
 1,
 5,
 4,
 6,
 1,
 3,
 4,
 0,
 3,
 2,
 4,
 0,
 0,
 1,
 1,
 5,
 6,
 5,
 1,
 0,
 1,
 1,
 5,
 3,
 5,
 3,
 0,
 3,
 4,
 6,
 6,
 0,
 0,
 4,
 0,
 5,
 4,
 4,
 2,
 2,
 3,
 3,
 0,
 3,
 5,
 2,
 4,
 0,
 2,
 2,
 5,
 2,
 2,
 2,
 4,
 6,
 6,
 3]

In [332]:
# confution matrix
multilabel_confusion_matrix(Y_true, Y_hat)

array([[[169,   0],
        [  0,  30]],

       [[169,   0],
        [  0,  30]],

       [[169,   0],
        [  0,  30]],

       [[173,   0],
        [  0,  26]],

       [[171,   0],
        [  0,  28]],

       [[171,   0],
        [  0,  28]],

       [[172,   0],
        [  0,  27]]], dtype=int64)

In [333]:
accuracy_score(Y_true, Y_hat)

1.0