In [176]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp 

In [177]:
mp_holist = mp.solutions.holistic 
mp_draw = mp.solutions.drawing_utils

In [178]:
def mediapipe_detection(img, model):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img.flags.writeable = False                 
    result = model.process(img)                 # Make prediction
    img.flags.writeable = True                   
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 
    return img, result

In [179]:
def draw_landmarks(img, result):
    mp_draw.draw_landmarks(img, result.face_landmarks, mp_holist.FACEMESH_CONTOURS) # Draw face connections
    mp_draw.draw_landmarks(img, result.pose_landmarks, mp_holist.POSE_CONNECTIONS) # Draw pose connections
    mp_draw.draw_landmarks(img, result.left_hand_landmarks, mp_holist.HAND_CONNECTIONS) # Draw left hand connections
    mp_draw.draw_landmarks(img, result.right_hand_landmarks, mp_holist.HAND_CONNECTIONS) # Draw right hand connections

In [180]:
def draw_styled_landmarks(img, result):
    mp_draw.draw_landmarks(img, result.face_landmarks, mp_holist.FACEMESH_CONTOURS, 
                             mp_draw.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), # color the joint 
                             mp_draw.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) #color the connection
                             ) 
    # mp_draw.draw_landmarks(img, result.face_landmarks, mp_holist.FACEMESH_CONTOURS, 
    #                          mp_draw.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), # color the joint 
    #                          mp_draw.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) #color the connection
    #                          ) 
    
    mp_draw.draw_landmarks(img, result.pose_landmarks, mp_holist.POSE_CONNECTIONS,
                             mp_draw.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_draw.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    mp_draw.draw_landmarks(img, result.left_hand_landmarks, mp_holist.HAND_CONNECTIONS, 
                             mp_draw.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_draw.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    mp_draw.draw_landmarks(img, result.right_hand_landmarks, mp_holist.HAND_CONNECTIONS, 
                             mp_draw.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_draw.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [181]:
mp_holist.POSE_CONNECTIONS

frozenset({(0, 1),
           (0, 4),
           (1, 2),
           (2, 3),
           (3, 7),
           (4, 5),
           (5, 6),
           (6, 8),
           (9, 10),
           (11, 12),
           (11, 13),
           (11, 23),
           (12, 14),
           (12, 24),
           (13, 15),
           (14, 16),
           (15, 17),
           (15, 19),
           (15, 21),
           (16, 18),
           (16, 20),
           (16, 22),
           (17, 19),
           (18, 20),
           (23, 24),
           (23, 25),
           (24, 26),
           (25, 27),
           (26, 28),
           (27, 29),
           (27, 31),
           (28, 30),
           (28, 32),
           (29, 31),
           (30, 32)})

In [182]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        draw_styled_landmarks(image, results)

        cv2.imshow('OpenCV Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()



<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>


In [183]:
cap.release()
cv2.destroyAllWindows()

In [184]:
results.pose_landmarks.landmark[0].visibility

0.997908890247345

In [185]:
len(results.pose_landmarks.landmark)

33

In [186]:
def extract_keypoints(results):
    pose=np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    left_hnd=np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hnd=np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    face=np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    return np.concatenate([pose,left_hnd,right_hnd,face])
# concatenating for the model to detect the sign language

In [187]:
extract_keypoints(results).shape

(1662,)

In [188]:
import os 
video_dir = "C:/Users/araya/Desktop/keypoints/video_extract"
video_list = []
video_list = os.listdir(video_dir)

len(video_list)

40

In [189]:
video_list

['กฎกระทรวง.mp4',
 'กฎหมายรัฐธรรมนูญ.mp4',
 'กรมอนามัย.mp4',
 'กรรม.mp4',
 'กรรมสิทธิ์.mp4',
 'กระโดด.mp4',
 'กล้วยบวชชี.mp4',
 'กล้วยเชื่อม.mp4',
 'กังวล.mp4',
 'กีฬา.mp4',
 'น้อง.mp4',
 'เขิน.mp4',
 'เขื่อนดิน.mp4',
 'เขื่อนสิริกิติ์.mp4',
 'เข้าใจผิด.mp4',
 'เคย.mp4',
 'เครียด.mp4',
 'เครื่องปั่นดิน.mp4',
 'เครื่องหมายการค้า.mp4',
 'เจอ.mp4',
 'เจ้าหนี้.mp4',
 'เช่าซื้อ.mp4',
 'เช่าทรัพย์.mp4',
 'เซอร์เบีย.mp4',
 'เซเนกัล.mp4',
 'เซ็ง.mp4',
 'เดิน.mp4',
 'เดิมพัน.mp4',
 'เพลีย.mp4',
 'เมื่อย.mp4',
 'เม็กซิโก.mp4',
 'เฮโรอีน.mp4',
 'แกมเบีย.mp4',
 'แซมเบีย.mp4',
 'โกหก.mp4',
 'โจทก์.mp4',
 'โชจู.mp4',
 'ใกล้.mp4',
 'ไดโนเสาร์.mp4',
 'ไอซ์.mp4']

In [190]:
# Path for exported data, numpy arrays
Model_Data=os.path.join('Data for different actions')

actions = np.array(video_list)

no_of_seqs = 1

# 30 frames in length
seq_length = 160

In [191]:
actions

array(['กฎกระทรวง.mp4', 'กฎหมายรัฐธรรมนูญ.mp4', 'กรมอนามัย.mp4',
       'กรรม.mp4', 'กรรมสิทธิ์.mp4', 'กระโดด.mp4', 'กล้วยบวชชี.mp4',
       'กล้วยเชื่อม.mp4', 'กังวล.mp4', 'กีฬา.mp4', 'น้อง.mp4', 'เขิน.mp4',
       'เขื่อนดิน.mp4', 'เขื่อนสิริกิติ์.mp4', 'เข้าใจผิด.mp4', 'เคย.mp4',
       'เครียด.mp4', 'เครื่องปั่นดิน.mp4', 'เครื่องหมายการค้า.mp4',
       'เจอ.mp4', 'เจ้าหนี้.mp4', 'เช่าซื้อ.mp4', 'เช่าทรัพย์.mp4',
       'เซอร์เบีย.mp4', 'เซเนกัล.mp4', 'เซ็ง.mp4', 'เดิน.mp4',
       'เดิมพัน.mp4', 'เพลีย.mp4', 'เมื่อย.mp4', 'เม็กซิโก.mp4',
       'เฮโรอีน.mp4', 'แกมเบีย.mp4', 'แซมเบีย.mp4', 'โกหก.mp4',
       'โจทก์.mp4', 'โชจู.mp4', 'ใกล้.mp4', 'ไดโนเสาร์.mp4', 'ไอซ์.mp4'],
      dtype='<U21')

In [117]:
# just creating the folders and sub folders

for action in actions: 
    try: 
        os.makedirs(os.path.join(Model_Data, action))
    except:
        pass

In [118]:
# import os
# augment_dir = "C:/Users/araya/Desktop/augments"

# augment_list = []
# augment_list = os.listdir(augment_dir)
# augment_list

In [119]:
# actions = list(actions)

In [120]:
# for x in augment_list:
#     # print(x)
#     actions.append(x)
# actions

In [121]:
# actions = np.array(actions)
# actions

Collecting keypoint values for Training nd Testing

In [192]:
# Define the directory where your videos are stored
directory = "C:/Users/araya/Desktop/keypoints/video_extract"

In [193]:
directory

'C:/Users/araya/Desktop/keypoints/video_extract'

In [194]:
for filename in actions:
    print(directory + '/' + filename)

C:/Users/araya/Desktop/keypoints/video_extract/กฎกระทรวง.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กฎหมายรัฐธรรมนูญ.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กรมอนามัย.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กรรม.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กรรมสิทธิ์.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กระโดด.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กล้วยบวชชี.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กล้วยเชื่อม.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กังวล.mp4
C:/Users/araya/Desktop/keypoints/video_extract/กีฬา.mp4
C:/Users/araya/Desktop/keypoints/video_extract/น้อง.mp4
C:/Users/araya/Desktop/keypoints/video_extract/เขิน.mp4
C:/Users/araya/Desktop/keypoints/video_extract/เขื่อนดิน.mp4
C:/Users/araya/Desktop/keypoints/video_extract/เขื่อนสิริกิติ์.mp4
C:/Users/araya/Desktop/keypoints/video_extract/เข้าใจผิด.mp4
C:/Users/araya/Desktop/keypoints/video_extract/เคย.mp4
C:/Users/araya/Desktop/keypoints/video_e

In [125]:
# # Set mediapipe model 
# for action in actions:
#     video_path = os.path.join("C:/Users/araya/Desktop/keypoints/video_extract", action)
#     cap = cv2.VideoCapture(video_path)
#     cap.set(cv2.CAP_PROP_FPS, 60)
#     length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
#     print("LENGTH:" + str(length))
#     # keypoints = []

#     if not cap.isOpened():
#         print(f"Error opening video file: {video_path}")
#         continue

#     with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
#         for seq in range(no_of_seqs):
#             for frame_num in range(seq_length):

#                 ret, frame = cap.read()
#                 if not ret:
#                     print(f"End of video {video_path}")
#                     break
                
#                 img, results = mediapipe_detection(frame, holistic)
#                 draw_styled_landmarks(img, results)

#                 # print(frame_num)

#                 if frame_num == 0: 
#                     cv2.putText(img, 'DATA COLLECTION STARTED', (120,200), 
#                                 cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
#                     cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
#                                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
#                     cv2.imshow('OpenCV Window', img)
#                     cv2.waitKey(2000)  # 2 seconds delay for setup
#                 else: 
#                     cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
#                                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
#                     cv2.imshow('OpenCV Window', img)

#                 keypoints = extract_keypoints(results)
#                 # keypoints.append(results)
#                 npy_path = os.path.join(Model_Data, action, f"frame_{frame_num}.npy")
#                 os.makedirs(os.path.dirname(npy_path), exist_ok=True)
#                 np.save(npy_path, keypoints)

#                 if cv2.waitKey(1) & 0xFF == ord('q'):
#                     break

#             if not ret:
#                 break

#     cap.release()
#     cv2.destroyAllWindows()

In [126]:
# import numpy as np 

# X = [[1,2,3]]
# X.append([6,8,10])
# X.append([20,9,4])
# X

In [None]:
# Set mediapipe model 
for action in actions:
    video_path = os.path.join("C:/Users/araya/Desktop/keypoints/video_extract", action)
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_FPS, 60)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print("LENGTH:" + str(length))
    keypoints = []

    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        continue

    with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        for seq in range(no_of_seqs):
            for frame_num in range(seq_length):

                ret, frame = cap.read()
                if not ret:
                    print(f"End of video {video_path}")
                    break
                
                img, results = mediapipe_detection(frame, holistic)
                draw_styled_landmarks(img, results)

                # print(frame_num)

                if frame_num == 0: 
                    cv2.putText(img, 'DATA COLLECTION STARTED', (120,200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('OpenCV Window', img)
                    cv2.waitKey(2000)  # 2 seconds delay for setup
                else: 
                    cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('OpenCV Window', img)

                x = extract_keypoints(results)
                keypoints.append(x)
                npy_path = os.path.join(Model_Data, action, f"{action.split(".")[0]}.npy")
                os.makedirs(os.path.dirname(npy_path), exist_ok=True)
                np.save(npy_path, keypoints)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            if not ret:
                break

    cap.release()
    cv2.destroyAllWindows()

In [195]:
cap.release()
cv2.destroyAllWindows()

In [196]:
file_paths = []
for action in actions:
    video_path = os.path.join('Data for different actions/', action)
    # print(video_path)
    # print(action)
    file_paths.append(video_path + '/' + action.split(".")[0] + ".npy")
print(file_paths)

['Data for different actions/กฎกระทรวง.mp4/กฎกระทรวง.npy', 'Data for different actions/กฎหมายรัฐธรรมนูญ.mp4/กฎหมายรัฐธรรมนูญ.npy', 'Data for different actions/กรมอนามัย.mp4/กรมอนามัย.npy', 'Data for different actions/กรรม.mp4/กรรม.npy', 'Data for different actions/กรรมสิทธิ์.mp4/กรรมสิทธิ์.npy', 'Data for different actions/กระโดด.mp4/กระโดด.npy', 'Data for different actions/กล้วยบวชชี.mp4/กล้วยบวชชี.npy', 'Data for different actions/กล้วยเชื่อม.mp4/กล้วยเชื่อม.npy', 'Data for different actions/กังวล.mp4/กังวล.npy', 'Data for different actions/กีฬา.mp4/กีฬา.npy', 'Data for different actions/น้อง.mp4/น้อง.npy', 'Data for different actions/เขิน.mp4/เขิน.npy', 'Data for different actions/เขื่อนดิน.mp4/เขื่อนดิน.npy', 'Data for different actions/เขื่อนสิริกิติ์.mp4/เขื่อนสิริกิติ์.npy', 'Data for different actions/เข้าใจผิด.mp4/เข้าใจผิด.npy', 'Data for different actions/เคย.mp4/เคย.npy', 'Data for different actions/เครียด.mp4/เครียด.npy', 'Data for different actions/เครื่องปั่นดิน.mp4/เครื

In [197]:
def load_keypoint_sequences(file_paths):
    keypoint_sequences = []
    for file_path in file_paths:
        keypoints = np.load(file_path)
        keypoint_sequences.append(torch.tensor(keypoints, dtype=torch.float32))
    return keypoint_sequences

In [198]:
# Load the sequences
import torch
sequences = load_keypoint_sequences(file_paths)
sequences

[tensor([[ 0.5013,  0.2452, -1.2167,  ...,  0.5663,  0.2188,  0.0098],
         [ 0.4997,  0.2482, -1.4690,  ...,  0.5652,  0.2181,  0.0106],
         [ 0.4984,  0.2500, -1.4853,  ...,  0.5654,  0.2185,  0.0112],
         ...,
         [ 0.4861,  0.2513, -1.3416,  ...,  0.5572,  0.2177,  0.0091],
         [ 0.4873,  0.2514, -1.3574,  ...,  0.5575,  0.2172,  0.0097],
         [ 0.4883,  0.2516, -1.3579,  ...,  0.5577,  0.2170,  0.0101]]),
 tensor([[ 0.4922,  0.2382, -1.2850,  ...,  0.5578,  0.2124,  0.0094],
         [ 0.4920,  0.2405, -1.4288,  ...,  0.5571,  0.2116,  0.0099],
         [ 0.4920,  0.2409, -1.4093,  ...,  0.5567,  0.2122,  0.0098],
         ...,
         [ 0.4814,  0.2260, -1.3318,  ...,  0.5503,  0.1923,  0.0123],
         [ 0.4815,  0.2257, -1.3351,  ...,  0.5503,  0.1921,  0.0122],
         [ 0.4815,  0.2255, -1.3497,  ...,  0.5501,  0.1919,  0.0124]]),
 tensor([[ 0.5049,  0.2371, -1.2115,  ...,  0.5643,  0.2082,  0.0088],
         [ 0.5045,  0.2381, -1.1896,  ...,  0

In [199]:
# Pad the sequences to the same length
from torch.nn.utils.rnn import pad_sequence
padded_sequences = pad_sequence(sequences, batch_first=True)
pad_sequence
print(padded_sequences.shape) # (batch_size, max_sequence_length, num_keypoints)

torch.Size([40, 160, 1662])


In [200]:
labels = [action.split(".")[0] for action in actions]
labels

['กฎกระทรวง',
 'กฎหมายรัฐธรรมนูญ',
 'กรมอนามัย',
 'กรรม',
 'กรรมสิทธิ์',
 'กระโดด',
 'กล้วยบวชชี',
 'กล้วยเชื่อม',
 'กังวล',
 'กีฬา',
 'น้อง',
 'เขิน',
 'เขื่อนดิน',
 'เขื่อนสิริกิติ์',
 'เข้าใจผิด',
 'เคย',
 'เครียด',
 'เครื่องปั่นดิน',
 'เครื่องหมายการค้า',
 'เจอ',
 'เจ้าหนี้',
 'เช่าซื้อ',
 'เช่าทรัพย์',
 'เซอร์เบีย',
 'เซเนกัล',
 'เซ็ง',
 'เดิน',
 'เดิมพัน',
 'เพลีย',
 'เมื่อย',
 'เม็กซิโก',
 'เฮโรอีน',
 'แกมเบีย',
 'แซมเบีย',
 'โกหก',
 'โจทก์',
 'โชจู',
 'ใกล้',
 'ไดโนเสาร์',
 'ไอซ์']

In [201]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

labels = le.fit_transform(labels)
labels

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39], dtype=int64)

In [202]:
import torch
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence

# Create a custom dataset
class KeypointDataset(Dataset):
    def __init__(self, file_paths, labels):
        self.file_paths = file_paths
        self.labels = labels
    
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        keypoints = np.load(self.file_paths[idx])
        label = self.labels[idx]
        return torch.tensor(keypoints, dtype=torch.float32), label

In [203]:
# Create the dataset
dataset = KeypointDataset(file_paths, labels)

In [204]:
print(dataset.file_paths)
print(dataset.labels)

['Data for different actions/กฎกระทรวง.mp4/กฎกระทรวง.npy', 'Data for different actions/กฎหมายรัฐธรรมนูญ.mp4/กฎหมายรัฐธรรมนูญ.npy', 'Data for different actions/กรมอนามัย.mp4/กรมอนามัย.npy', 'Data for different actions/กรรม.mp4/กรรม.npy', 'Data for different actions/กรรมสิทธิ์.mp4/กรรมสิทธิ์.npy', 'Data for different actions/กระโดด.mp4/กระโดด.npy', 'Data for different actions/กล้วยบวชชี.mp4/กล้วยบวชชี.npy', 'Data for different actions/กล้วยเชื่อม.mp4/กล้วยเชื่อม.npy', 'Data for different actions/กังวล.mp4/กังวล.npy', 'Data for different actions/กีฬา.mp4/กีฬา.npy', 'Data for different actions/น้อง.mp4/น้อง.npy', 'Data for different actions/เขิน.mp4/เขิน.npy', 'Data for different actions/เขื่อนดิน.mp4/เขื่อนดิน.npy', 'Data for different actions/เขื่อนสิริกิติ์.mp4/เขื่อนสิริกิติ์.npy', 'Data for different actions/เข้าใจผิด.mp4/เข้าใจผิด.npy', 'Data for different actions/เคย.mp4/เคย.npy', 'Data for different actions/เครียด.mp4/เครียด.npy', 'Data for different actions/เครื่องปั่นดิน.mp4/เครื

In [205]:
# Collate function for padding
def collate_fn(batch):
    sequences, labels = zip(*batch)
    padded_sequences = pad_sequence(sequences, batch_first=True)
    return padded_sequences, torch.tensor(labels)

In [206]:
# Create the DataLoader
batch_size = 4
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
data_loader

<torch.utils.data.dataloader.DataLoader at 0x2dac81add30>

In [207]:
import torch
import torch.nn as nn
import torch.optim as optim

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate the LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Use the last time step's output for classification
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [208]:
# Set device (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [209]:
# Initialize the model, loss function, and optimizer
model = LSTMModel(input_size=1662, hidden_size=128, num_layers=2, num_classes=40).to(device)

In [210]:
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [211]:
# Training loop
# References : https://saturncloud.io/blog/calculating-the-accuracy-of-pytorch-models-every-epoch/#:~:text=In%20order%20to%20calculate%20the,tensor%20along%20a%20specified%20dimension
num_epochs = 1200
for epoch in range(num_epochs):
    total_correct = 0
    total_samples = 0
    model.train()
    for i, (sequences, labels) in enumerate(data_loader):
        # Move data to the device
        sequences = sequences.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(sequences)
        _, predicted = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    accuracy = 100 * total_correct /total_samples
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f} , Accuracy : {accuracy:.2f}%')

Epoch [1/1200], Loss: 3.7810 , Accuracy : 0.00%
Epoch [2/1200], Loss: 3.6636 , Accuracy : 2.50%
Epoch [3/1200], Loss: 3.5815 , Accuracy : 2.50%
Epoch [4/1200], Loss: 3.4915 , Accuracy : 2.50%
Epoch [5/1200], Loss: 3.7647 , Accuracy : 5.00%
Epoch [6/1200], Loss: 3.5749 , Accuracy : 2.50%
Epoch [7/1200], Loss: 3.5216 , Accuracy : 0.00%
Epoch [8/1200], Loss: 3.2230 , Accuracy : 0.00%
Epoch [9/1200], Loss: 3.4597 , Accuracy : 2.50%
Epoch [10/1200], Loss: 3.5898 , Accuracy : 2.50%
Epoch [11/1200], Loss: 3.5472 , Accuracy : 7.50%
Epoch [12/1200], Loss: 3.6885 , Accuracy : 2.50%
Epoch [13/1200], Loss: 2.8373 , Accuracy : 5.00%
Epoch [14/1200], Loss: 3.4267 , Accuracy : 5.00%
Epoch [15/1200], Loss: 3.9432 , Accuracy : 2.50%
Epoch [16/1200], Loss: 3.3971 , Accuracy : 5.00%
Epoch [17/1200], Loss: 3.4749 , Accuracy : 7.50%
Epoch [18/1200], Loss: 3.7209 , Accuracy : 7.50%
Epoch [19/1200], Loss: 3.8852 , Accuracy : 7.50%
Epoch [20/1200], Loss: 3.4751 , Accuracy : 10.00%
Epoch [21/1200], Loss: 3.359

In [212]:
pad_sequence(sequences, batch_first=True)

tensor([[[ 0.4868,  0.2821, -1.4668,  ...,  0.5711,  0.2335,  0.0116],
         [ 0.4861,  0.2786, -1.5812,  ...,  0.5702,  0.2334,  0.0129],
         [ 0.4856,  0.2769, -1.6059,  ...,  0.5699,  0.2333,  0.0131],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.5103,  0.2428, -1.1660,  ...,  0.5757,  0.2179,  0.0081],
         [ 0.5106,  0.2437, -1.2850,  ...,  0.5751,  0.2183,  0.0099],
         [ 0.5112,  0.2443, -1.2880,  ...,  0.5748,  0.2189,  0.0108],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.5134,  0.2614, -1.4426,  ...,  0.5818,  0.2272,  0.0153],
         [ 0.5130,  0.2604, -1.4262,  ...,  0

In [213]:
# Put the model in evaluation mode
model.eval()

# No need to track gradients during inference
with torch.no_grad():
    # Get the model's output (logits)
    outputs = model(padded_sequences)

# outputs = torch.softmax(outputs, dim=1)
# outputs = torch.max(outputs,1)

outputs


tensor([[ 8.4886, -2.2391, -0.2556,  ..., -2.0864,  3.2579, -3.2054],
        [-4.1864,  9.4720, -3.5149,  ...,  3.1613, -1.4555, -2.7038],
        [-0.8747, -2.0196, 11.1850,  ..., -6.3004,  0.1408,  0.7032],
        ...,
        [-6.0408,  0.3973, -9.3390,  ...,  8.6894, -4.4498,  0.4777],
        [-0.6473, -2.4444, -4.8697,  ..., -3.9147,  8.5207, -2.8640],
        [-4.8162, -3.5016, -3.8957,  ..., -0.6957, -2.9981,  6.3166]])

In [214]:
padded_sequences

tensor([[[ 0.5013,  0.2452, -1.2167,  ...,  0.5663,  0.2188,  0.0098],
         [ 0.4997,  0.2482, -1.4690,  ...,  0.5652,  0.2181,  0.0106],
         [ 0.4984,  0.2500, -1.4853,  ...,  0.5654,  0.2185,  0.0112],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.4922,  0.2382, -1.2850,  ...,  0.5578,  0.2124,  0.0094],
         [ 0.4920,  0.2405, -1.4288,  ...,  0.5571,  0.2116,  0.0099],
         [ 0.4920,  0.2409, -1.4093,  ...,  0.5567,  0.2122,  0.0098],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.5049,  0.2371, -1.2115,  ...,  0.5643,  0.2082,  0.0088],
         [ 0.5045,  0.2381, -1.1896,  ...,  0

In [215]:
len(padded_sequences)

40

In [320]:
file_paths = ["Data for different actions/กล้วยบวชชี_2.mp4/กล้วยบวชชี_2.npy"]

In [321]:
# Load the sequences
import torch
sequences = load_keypoint_sequences(file_paths)
# Change list to numpy array 
sequences = np.array(sequences)
# Change numpy array to tensor
sequences = torch.FloatTensor(sequences)
sequences = pad_sequence(sequences, batch_first=True)
sequences

tensor([[[ 0.5319,  0.2821, -1.6797,  ...,  0.6169,  0.2514,  0.0109],
         [ 0.5346,  0.2813, -1.7538,  ...,  0.6164,  0.2506,  0.0119],
         [ 0.5365,  0.2812, -1.7556,  ...,  0.6163,  0.2507,  0.0109],
         ...,
         [ 0.5340,  0.2852, -1.2054,  ...,  0.6196,  0.2422,  0.0160],
         [ 0.5345,  0.2806, -1.4284,  ...,  0.6202,  0.2413,  0.0160],
         [ 0.5356,  0.2758, -1.3837,  ...,  0.6207,  0.2407,  0.0163]]])

In [322]:
outputs = model(sequences)
outputs

tensor([[ 5.6489, -4.6696, -0.0753, -2.9835, -1.6236, -3.7760,  8.7149, -2.9589,
         -2.2924,  2.5370, -1.8308, -2.8557, -0.8328,  2.9264, -2.1582, -5.2478,
          0.3017, -1.6514, -4.3147, -2.6456, -0.1971,  0.1996, 10.4806, -2.8355,
          3.0094, -1.5649, -3.9936,  0.6804,  0.8259, -4.1843,  4.9341,  3.6058,
          0.0363, -3.5094, -2.1526,  2.2892,  0.4896, -3.9278,  3.6323, -1.7342]],
       grad_fn=<AddmmBackward0>)

In [323]:
labels = [action.split(".")[0] for action in actions]
labels

['กฎกระทรวง',
 'กฎหมายรัฐธรรมนูญ',
 'กรมอนามัย',
 'กรรม',
 'กรรมสิทธิ์',
 'กระโดด',
 'กล้วยบวชชี',
 'กล้วยเชื่อม',
 'กังวล',
 'กีฬา',
 'น้อง',
 'เขิน',
 'เขื่อนดิน',
 'เขื่อนสิริกิติ์',
 'เข้าใจผิด',
 'เคย',
 'เครียด',
 'เครื่องปั่นดิน',
 'เครื่องหมายการค้า',
 'เจอ',
 'เจ้าหนี้',
 'เช่าซื้อ',
 'เช่าทรัพย์',
 'เซอร์เบีย',
 'เซเนกัล',
 'เซ็ง',
 'เดิน',
 'เดิมพัน',
 'เพลีย',
 'เมื่อย',
 'เม็กซิโก',
 'เฮโรอีน',
 'แกมเบีย',
 'แซมเบีย',
 'โกหก',
 'โจทก์',
 'โชจู',
 'ใกล้',
 'ไดโนเสาร์',
 'ไอซ์']

### -------------------------------------------------------------------------------------------------------------------------------------------- ###

In [None]:
from torch.nn.utils.rnn import pad_sequence
import torch
# Load the sequences
import numpy as np 
import os
Model_Data=os.path.join('Data for different actions')
action = "กฎหมายรัฐธรรมนูญ.mp4"
a = np.load(os.path.join(Model_Data, action, "กฎหมายรัฐธรรมนูญ.npy"))
a = torch.from_numpy(a)

# Pad the sequences to the same length
padded_sequences = pad_sequence(a, batch_first=True)
len(padded_sequences)

In [803]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [804]:
labelMap = {label:num for num, label in enumerate(actions)}

In [None]:
labelMap

In [806]:
seqs, labels = [], []

for action in actions:
    for seq in range(no_of_seqs):
        window = []
        for frame_num in range(seq_length):
            res = np.load(os.path.join(Model_Data, action, f"frame_{frame_num}.npy")) 
            window.append(res)
        seqs.append(window)

        labels.append(labelMap[action])

In [None]:
np.array(seqs).shape

In [809]:
X_data = np.array(seqs)

In [None]:
X_data.shape

In [None]:
labels

In [None]:
# changing the labels from 0,1,2 to categorical data for easier accessebility
Y_label = to_categorical(labels).astype(int)
Y_label

In [None]:
Y_label.shape

In [None]:
# splitting
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_label, test_size=0.3)
X_test.shape

### Building LSTM

In [815]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [816]:
# adding the logs folder
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
# neural network

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))

# adding 64 units for dense layer
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [None]:
# eg
eg_res = [.7, 0.2, 0.1]
actions[np.argmax(eg_res)]

In [819]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
model.fit(X_train, Y_train, epochs=300, callbacks=[tb_callback])
# tensorboard --logdir=.

In [None]:
model.summary()

In [None]:
res=model.predict(X_test)

In [None]:
# again the actions with the max value provided by softmax is returned
actions[np.argmax(res[4])]

In [None]:
actions[np.argmax(Y_test[4])]

In [None]:
res[0]

In [None]:
Y_test[0]

### Evaluate

In [836]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [None]:
Y_hat = model.predict(X_train)

In [838]:
Y_true = np.argmax(Y_train, axis=1).tolist()
# one hot encoding
Y_hat = np.argmax(Y_hat, axis=1).tolist()

In [None]:
Y_hat

In [None]:
# confution matrix
multilabel_confusion_matrix(Y_true, Y_hat)

In [None]:
accuracy_score(Y_true, Y_hat)