In [385]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp 

In [386]:
mp_holist = mp.solutions.holistic 
mp_draw = mp.solutions.drawing_utils

In [387]:
def mediapipe_detection(img, model):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img.flags.writeable = False                 
    result = model.process(img)                 # Make prediction
    img.flags.writeable = True                   
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 
    return img, result

In [388]:
def draw_landmarks(img, result):
    mp_draw.draw_landmarks(img, result.face_landmarks, mp_holist.FACEMESH_CONTOURS) # Draw face connections
    mp_draw.draw_landmarks(img, result.pose_landmarks, mp_holist.POSE_CONNECTIONS) # Draw pose connections
    mp_draw.draw_landmarks(img, result.left_hand_landmarks, mp_holist.HAND_CONNECTIONS) # Draw left hand connections
    mp_draw.draw_landmarks(img, result.right_hand_landmarks, mp_holist.HAND_CONNECTIONS) # Draw right hand connections

In [389]:
def draw_styled_landmarks(img, result):
    mp_draw.draw_landmarks(img, result.face_landmarks, mp_holist.FACEMESH_CONTOURS, 
                             mp_draw.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), # color the joint 
                             mp_draw.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) #color the connection
                             ) 
    # mp_draw.draw_landmarks(img, result.face_landmarks, mp_holist.FACEMESH_CONTOURS, 
    #                          mp_draw.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), # color the joint 
    #                          mp_draw.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) #color the connection
    #                          ) 
    
    mp_draw.draw_landmarks(img, result.pose_landmarks, mp_holist.POSE_CONNECTIONS,
                             mp_draw.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_draw.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    mp_draw.draw_landmarks(img, result.left_hand_landmarks, mp_holist.HAND_CONNECTIONS, 
                             mp_draw.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_draw.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    mp_draw.draw_landmarks(img, result.right_hand_landmarks, mp_holist.HAND_CONNECTIONS, 
                             mp_draw.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_draw.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [None]:
mp_holist.POSE_CONNECTIONS

In [None]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        draw_styled_landmarks(image, results)

        cv2.imshow('OpenCV Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

In [392]:
cap.release()
cv2.destroyAllWindows()

In [None]:
results.pose_landmarks.landmark[0].visibility

In [None]:
len(results.pose_landmarks.landmark)

In [395]:
def extract_keypoints(results):
    pose=np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    left_hnd=np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hnd=np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    face=np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    return np.concatenate([pose,left_hnd,right_hnd,face])
# concatenating for the model to detect the sign language

In [None]:
extract_keypoints(results).shape

In [None]:
import os 
video_dir = "C:/Users/araya/Desktop/keypoints/video_extract"
video_list = []
video_list = os.listdir(video_dir)

len(video_list)

In [None]:
video_list

In [399]:
# Path for exported data, numpy arrays
Model_Data=os.path.join('Data for different actions')

actions = np.array(video_list)

no_of_seqs = 1

# 30 frames in length
seq_length = 160

In [None]:
actions

In [401]:
# just creating the folders and sub folders

for action in actions: 
    try: 
        os.makedirs(os.path.join(Model_Data, action))
    except:
        pass

Collecting keypoint values for Training nd Testing

In [402]:
# Define the directory where your videos are stored
directory = "C:/Users/araya/Desktop/keypoints/video_extract"

In [None]:
directory

In [None]:
txt = "hello, my name is Peter, I am 26 years old"

x = txt.split(", ")

print(x)

In [None]:
for filename in actions:
    print(directory + '/' + filename)

In [406]:
# # Set mediapipe model 
# for action in actions:
#     video_path = os.path.join("C:/Users/araya/Desktop/keypoints/video_extract", action)
#     cap = cv2.VideoCapture(video_path)
#     cap.set(cv2.CAP_PROP_FPS, 60)
#     length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
#     print("LENGTH:" + str(length))
#     # keypoints = []

#     if not cap.isOpened():
#         print(f"Error opening video file: {video_path}")
#         continue

#     with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
#         for seq in range(no_of_seqs):
#             for frame_num in range(seq_length):

#                 ret, frame = cap.read()
#                 if not ret:
#                     print(f"End of video {video_path}")
#                     break
                
#                 img, results = mediapipe_detection(frame, holistic)
#                 draw_styled_landmarks(img, results)

#                 # print(frame_num)

#                 if frame_num == 0: 
#                     cv2.putText(img, 'DATA COLLECTION STARTED', (120,200), 
#                                 cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
#                     cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
#                                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
#                     cv2.imshow('OpenCV Window', img)
#                     cv2.waitKey(2000)  # 2 seconds delay for setup
#                 else: 
#                     cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
#                                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
#                     cv2.imshow('OpenCV Window', img)

#                 keypoints = extract_keypoints(results)
#                 # keypoints.append(results)
#                 npy_path = os.path.join(Model_Data, action, f"frame_{frame_num}.npy")
#                 os.makedirs(os.path.dirname(npy_path), exist_ok=True)
#                 np.save(npy_path, keypoints)

#                 if cv2.waitKey(1) & 0xFF == ord('q'):
#                     break

#             if not ret:
#                 break

#     cap.release()
#     cv2.destroyAllWindows()

In [407]:
# import numpy as np 

# X = [[1,2,3]]
# X.append([6,8,10])
# X.append([20,9,4])
# X

In [None]:
# Set mediapipe model 
for action in actions:
    video_path = os.path.join("C:/Users/araya/Desktop/keypoints/video_extract", action)
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_FPS, 60)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print("LENGTH:" + str(length))
    keypoints = []

    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        continue

    with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        for seq in range(no_of_seqs):
            for frame_num in range(seq_length):

                ret, frame = cap.read()
                if not ret:
                    print(f"End of video {video_path}")
                    break
                
                img, results = mediapipe_detection(frame, holistic)
                draw_styled_landmarks(img, results)

                # print(frame_num)

                if frame_num == 0: 
                    cv2.putText(img, 'DATA COLLECTION STARTED', (120,200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('OpenCV Window', img)
                    cv2.waitKey(2000)  # 2 seconds delay for setup
                else: 
                    cv2.putText(img, f'Collecting frames for - {action} Sequence Number - {seq}', (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('OpenCV Window', img)

                x = extract_keypoints(results)
                keypoints.append(x)
                npy_path = os.path.join(Model_Data, action, f"{action.split(".")[0]}.npy")
                os.makedirs(os.path.dirname(npy_path), exist_ok=True)
                np.save(npy_path, keypoints)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            if not ret:
                break

    cap.release()
    cv2.destroyAllWindows()

In [339]:
# # Set mediapipe model 
# for action in actions:
#     video_path = os.path.join(video_dir, action)
#     cap = cv2.VideoCapture(video_path)

#     if not cap.isOpened():
#         print(f"Error opening video file: {video_path}")
#         continue

#     with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
#         frames = []

#         while True:
#             ret, frame = cap.read()
#             if not ret:
#                 break
            
#             img, results = mediapipe_detection(frame, holistic)
#             keypoints = extract_keypoints(results)
#             frames.append(keypoints)

#         # Padding or trimming to fixed sequence length
#         if len(frames) < seq_length:
#             padding = [np.zeros_like(frames[0]) for _ in range(seq_length - len(frames))]
#             frames.extend(padding)
#         else:
#             frames = frames[:seq_length]

#         # Save sequences
#         for i in range(len(frames)):
#             npy_path = os.path.join(Model_Data, action, f"frame_{i}.npy")
#             np.save(npy_path, frames[i])

#     cap.release()

# cv2.destroyAllWindows()

In [340]:
# # Loop through all files in the directory
# for filename in os.listdir(directory):
#     # Check if the file is a video by checking its extension
#     if filename.endswith(('.mp4', '.avi', '.mkv', '.mov')):
#         print(f"Processing {filename}...")
#         video_path = os.path.join(directory, filename)

#         class_name = os.path.splitext(os.path.basename(video_path))[0]
        
#         cap = cv2.VideoCapture(video_path)
#         with mp_holist.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
#             for action in actions:
#                 for seq in range(no_of_seqs):
#                     for frame_num in range(seq_length):
#                         ret, frame = cap.read()
#                         if not ret:
#                             print("Error: Failed to read frame.")
#                             break  # Exit the loop if frame read fails
                        
#                         img, results = mediapipe_detection(frame, holistic)
#                         draw_styled_landmarks(img, results)

#                         # logic is for the formatting portion
#                         if frame_num == 0: 
#                             cv2.putText(img, 'DATA COLLECTION STARTED', (120,200), 
#                                     cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
#                             cv2.putText(img, 'Collecting frames for - {} and Sequence Number - {}'.format(action, seq), (15,12), 
#                                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
#                             # Show to screen
#                             cv2.imshow('OpenCV Window', img)
#                             # providing the break for adjusting the posture
#                             cv2.waitKey(2000) # 2 sec
#                         else: 
#                             cv2.putText(img, 'Collecting frames for - {} and Sequence Number - {}'.format(action, seq), (15,12), 
#                                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
#                             # Show to screen
#                             cv2.imshow('OpenCV Window', img)

#                         keypoints = extract_keypoints(results)
#                         npy_path = os.path.join(Model_Data, action, str(seq), str(frame_num))
#                         np.save(npy_path, keypoints)

#                         if cv2.waitKey(10) & 0xFF == ord('q'):
#                             break

#         cap.release()
#         cv2.destroyAllWindows()


In [485]:
cap.release()
cv2.destroyAllWindows()

In [486]:
file_paths = []
for action in actions:
    video_path = os.path.join('Data for different actions/', action)
    # print(video_path)
    # print(action)
    file_paths.append(video_path + '/' + action.split(".")[0] + ".npy")
print(file_paths)

['Data for different actions/กฎกระทรวง.mp4/กฎกระทรวง.npy', 'Data for different actions/กฎหมายรัฐธรรมนูญ.mp4/กฎหมายรัฐธรรมนูญ.npy', 'Data for different actions/กรมอนามัย.mp4/กรมอนามัย.npy', 'Data for different actions/กรรม.mp4/กรรม.npy', 'Data for different actions/กรรมสิทธิ์.mp4/กรรมสิทธิ์.npy', 'Data for different actions/กระโดด.mp4/กระโดด.npy', 'Data for different actions/กล้วยบวชชี.mp4/กล้วยบวชชี.npy', 'Data for different actions/กล้วยเชื่อม.mp4/กล้วยเชื่อม.npy']


In [487]:
def load_keypoint_sequences(file_paths):
    keypoint_sequences = []
    for file_path in file_paths:
        keypoints = np.load(file_path)
        keypoint_sequences.append(torch.tensor(keypoints, dtype=torch.float32))
    return keypoint_sequences

In [488]:
# Load the sequences
sequences = load_keypoint_sequences(file_paths)
sequences

[tensor([[ 0.5013,  0.2452, -1.2167,  ...,  0.5663,  0.2188,  0.0098],
         [ 0.4997,  0.2482, -1.4690,  ...,  0.5652,  0.2181,  0.0106],
         [ 0.4984,  0.2500, -1.4853,  ...,  0.5654,  0.2185,  0.0112],
         ...,
         [ 0.4861,  0.2513, -1.3416,  ...,  0.5572,  0.2177,  0.0091],
         [ 0.4873,  0.2514, -1.3574,  ...,  0.5575,  0.2172,  0.0097],
         [ 0.4883,  0.2516, -1.3579,  ...,  0.5577,  0.2170,  0.0101]]),
 tensor([[ 0.4922,  0.2382, -1.2850,  ...,  0.5578,  0.2124,  0.0094],
         [ 0.4920,  0.2405, -1.4288,  ...,  0.5571,  0.2116,  0.0099],
         [ 0.4920,  0.2409, -1.4093,  ...,  0.5567,  0.2122,  0.0098],
         ...,
         [ 0.4814,  0.2260, -1.3318,  ...,  0.5503,  0.1923,  0.0123],
         [ 0.4815,  0.2257, -1.3351,  ...,  0.5503,  0.1921,  0.0122],
         [ 0.4815,  0.2255, -1.3497,  ...,  0.5501,  0.1919,  0.0124]]),
 tensor([[ 0.5049,  0.2371, -1.2115,  ...,  0.5643,  0.2082,  0.0088],
         [ 0.5045,  0.2381, -1.1896,  ...,  0

In [489]:
# Pad the sequences to the same length
padded_sequences = pad_sequence(sequences, batch_first=True)
pad_sequence
print(padded_sequences.shape) # (batch_size, max_sequence_length, num_keypoints)

torch.Size([8, 160, 1662])


In [490]:
labels = [action.split(".")[0] for action in actions]
labels

['กฎกระทรวง',
 'กฎหมายรัฐธรรมนูญ',
 'กรมอนามัย',
 'กรรม',
 'กรรมสิทธิ์',
 'กระโดด',
 'กล้วยบวชชี',
 'กล้วยเชื่อม']

In [491]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

labels = le.fit_transform(labels)
labels

array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)

In [492]:
import torch
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence

# Create a custom dataset
class KeypointDataset(Dataset):
    def __init__(self, file_paths, labels):
        self.file_paths = file_paths
        self.labels = labels
    
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        keypoints = np.load(self.file_paths[idx])
        label = self.labels[idx]
        return torch.tensor(keypoints, dtype=torch.float32), label

In [493]:
# Create the dataset
dataset = KeypointDataset(file_paths, labels)

In [513]:
print(dataset.file_paths)
print(dataset.labels)

['Data for different actions/กฎกระทรวง.mp4/กฎกระทรวง.npy', 'Data for different actions/กฎหมายรัฐธรรมนูญ.mp4/กฎหมายรัฐธรรมนูญ.npy', 'Data for different actions/กรมอนามัย.mp4/กรมอนามัย.npy', 'Data for different actions/กรรม.mp4/กรรม.npy', 'Data for different actions/กรรมสิทธิ์.mp4/กรรมสิทธิ์.npy', 'Data for different actions/กระโดด.mp4/กระโดด.npy', 'Data for different actions/กล้วยบวชชี.mp4/กล้วยบวชชี.npy', 'Data for different actions/กล้วยเชื่อม.mp4/กล้วยเชื่อม.npy']
[0 1 2 3 4 5 6 7]


In [494]:
# Collate function for padding
def collate_fn(batch):
    sequences, labels = zip(*batch)
    padded_sequences = pad_sequence(sequences, batch_first=True)
    return padded_sequences, torch.tensor(labels)

In [495]:
# Create the DataLoader
batch_size = 4
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
data_loader

<torch.utils.data.dataloader.DataLoader at 0x1a81f1dbcb0>

In [496]:
import torch
import torch.nn as nn
import torch.optim as optim

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate the LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Use the last time step's output for classification
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [497]:
# Set device (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [498]:
# Initialize the model, loss function, and optimizer
model = LSTMModel(input_size=1662, hidden_size=128, num_layers=2, num_classes=8).to(device)

In [499]:
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [500]:
# Training loop
num_epochs = 300
for epoch in range(num_epochs):
    model.train()
    for i, (sequences, labels) in enumerate(data_loader):
        # Move data to the device
        sequences = sequences.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(sequences)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/300], Loss: 2.1161
Epoch [2/300], Loss: 1.9582
Epoch [3/300], Loss: 2.1770
Epoch [4/300], Loss: 1.6733
Epoch [5/300], Loss: 1.6795
Epoch [6/300], Loss: 2.0610
Epoch [7/300], Loss: 1.7889
Epoch [8/300], Loss: 1.5011
Epoch [9/300], Loss: 2.2988
Epoch [10/300], Loss: 2.2428
Epoch [11/300], Loss: 1.6378
Epoch [12/300], Loss: 1.3712
Epoch [13/300], Loss: 1.6537
Epoch [14/300], Loss: 1.4455
Epoch [15/300], Loss: 1.6070
Epoch [16/300], Loss: 1.8169
Epoch [17/300], Loss: 1.3233
Epoch [18/300], Loss: 1.5108
Epoch [19/300], Loss: 0.7051
Epoch [20/300], Loss: 1.4103
Epoch [21/300], Loss: 1.3084
Epoch [22/300], Loss: 0.9015
Epoch [23/300], Loss: 1.3995
Epoch [24/300], Loss: 1.0576
Epoch [25/300], Loss: 1.0629
Epoch [26/300], Loss: 1.1937
Epoch [27/300], Loss: 0.9754
Epoch [28/300], Loss: 1.0815
Epoch [29/300], Loss: 0.8562
Epoch [30/300], Loss: 0.4761
Epoch [31/300], Loss: 0.6833
Epoch [32/300], Loss: 0.7307
Epoch [33/300], Loss: 0.7525
Epoch [34/300], Loss: 0.3035
Epoch [35/300], Loss: 3

In [501]:
pad_sequence(sequences, batch_first=True)

tensor([[[ 0.5134,  0.2614, -1.4426,  ...,  0.5818,  0.2272,  0.0153],
         [ 0.5130,  0.2604, -1.4262,  ...,  0.5810,  0.2273,  0.0147],
         [ 0.5126,  0.2599, -1.4278,  ...,  0.5810,  0.2276,  0.0150],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.5023,  0.2809, -1.6242,  ...,  0.5847,  0.2321,  0.0112],
         [ 0.5023,  0.2806, -1.6631,  ...,  0.5840,  0.2322,  0.0130],
         [ 0.5022,  0.2805, -1.6912,  ...,  0.5837,  0.2322,  0.0122],
         ...,
         [ 0.5037,  0.2791, -1.5789,  ...,  0.5834,  0.2224,  0.0129],
         [ 0.5044,  0.2774, -1.5686,  ...,  0.5831,  0.2220,  0.0130],
         [ 0.5052,  0.2722, -1.5527,  ...,  0.5827,  0.2216,  0.0132]],

        [[ 0.4868,  0.2821, -1.4668,  ...,  0.5711,  0.2335,  0.0116],
         [ 0.4861,  0.2786, -1.5812,  ...,  0

In [502]:
# Put the model in evaluation mode
model.eval()

# No need to track gradients during inference
with torch.no_grad():
    # Get the model's output (logits)
    outputs = model(padded_sequences)

# outputs = torch.softmax(outputs, dim=1)
# outputs = torch.max(outputs,1)

outputs


tensor([[ 5.2644,  3.4531, -1.2713, -2.3117, -3.7239, -3.0003, -2.7968,  2.8237],
        [-0.1658,  5.7221, -2.5147,  0.0424, -4.1849, -1.1155, -3.5870,  3.3477],
        [ 0.8228,  1.0697,  4.4864, -3.4782, -1.4098, -3.5199, -1.6029,  1.1104],
        [-2.3772, -0.7847, -2.9117,  4.4399, -1.3170,  3.7237, -1.7338,  1.3067],
        [-0.6764, -1.5457, -0.2841, -0.9745,  6.4210, -0.7854,  1.4269, -1.8412],
        [-2.6242, -1.3845, -3.0832,  4.3615, -0.6597,  5.0905, -1.1641,  0.5335],
        [ 0.7772, -1.0372, -0.8932, -2.2138,  0.1314, -2.0715,  5.5156, -2.6485],
        [-1.6479,  2.1180, -2.6393,  1.4775, -3.6063,  0.2648, -3.4498,  4.5402]])

In [None]:
padded_sequences

### -------------------------------------------------------------------------------------------------------------------------------------------- ###

In [None]:
from torch.nn.utils.rnn import pad_sequence
import torch
# Load the sequences
import numpy as np 
import os
Model_Data=os.path.join('Data for different actions')
action = "กฎหมายรัฐธรรมนูญ.mp4"
a = np.load(os.path.join(Model_Data, action, "กฎหมายรัฐธรรมนูญ.npy"))
a = torch.from_numpy(a)

# Pad the sequences to the same length
padded_sequences = pad_sequence(a, batch_first=True)
len(padded_sequences)

In [803]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [804]:
labelMap = {label:num for num, label in enumerate(actions)}

In [None]:
labelMap

In [806]:
seqs, labels = [], []

for action in actions:
    for seq in range(no_of_seqs):
        window = []
        for frame_num in range(seq_length):
            res = np.load(os.path.join(Model_Data, action, f"frame_{frame_num}.npy")) 
            window.append(res)
        seqs.append(window)

        labels.append(labelMap[action])

In [None]:
np.array(seqs).shape

In [809]:
X_data = np.array(seqs)

In [None]:
X_data.shape

In [None]:
labels

In [None]:
# changing the labels from 0,1,2 to categorical data for easier accessebility
Y_label = to_categorical(labels).astype(int)
Y_label

In [None]:
Y_label.shape

In [None]:
# splitting
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_label, test_size=0.3)
X_test.shape

### Building LSTM

In [815]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [816]:
# adding the logs folder
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
# neural network

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))

# adding 64 units for dense layer
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [None]:
# eg
eg_res = [.7, 0.2, 0.1]
actions[np.argmax(eg_res)]

In [819]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
model.fit(X_train, Y_train, epochs=300, callbacks=[tb_callback])
# tensorboard --logdir=.

In [None]:
model.summary()

In [None]:
res=model.predict(X_test)

In [None]:
# again the actions with the max value provided by softmax is returned
actions[np.argmax(res[4])]

In [None]:
actions[np.argmax(Y_test[4])]

In [None]:
res[0]

In [None]:
Y_test[0]

### Evaluate

In [836]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [None]:
Y_hat = model.predict(X_train)

In [838]:
Y_true = np.argmax(Y_train, axis=1).tolist()
# one hot encoding
Y_hat = np.argmax(Y_hat, axis=1).tolist()

In [None]:
Y_hat

In [None]:
# confution matrix
multilabel_confusion_matrix(Y_true, Y_hat)

In [None]:
accuracy_score(Y_true, Y_hat)