In [7]:
import os
os.chdir("c:/Personal/AI/Projects")

In [11]:
from src.data_processing.data_augmentation import Affine_Transformation
from src.utils.utils import load_config


In [12]:
import numpy as np
import pandas as pd
import glob
import os
import torch 
import random

from torch.utils.data import DataLoader,Dataset

In [4]:
ROWS_PER_FRAME = 543
MAX_LEN = 384

In [32]:
# Initial landmark points
face_landmarks = dict (
    silhouette=[
        10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
        397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
        172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109],

    lipsUpperOuter=[61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291],
    lipsLowerOuter=[146, 91, 181, 84, 17, 314, 405, 321, 375],
    lipsUpperInner=[78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],
    lipsLowerInner=[95, 88, 178, 87, 14, 317, 402, 318, 324],

    rightEyeUpper0=[246, 161, 160, 159, 158, 157, 173],
    rightEyeLower0=[33, 7, 163, 144, 145, 153, 154, 155, 133],
    rightEyeUpper1=[247, 30, 29, 27, 28, 56, 190],
    rightEyeLower1=[130, 25, 110, 24, 23, 22, 26, 112, 243],
    rightEyeUpper2=[113, 225, 224, 223, 222, 221, 189],
    rightEyeLower2=[226, 31, 228, 229, 230, 231, 232, 233, 244],
    rightEyeLower3=[143, 111, 117, 118, 119, 120, 121, 128, 245],

    rightEyebrowUpper=[156, 70, 63, 105, 66, 107, 55, 193],
    rightEyebrowLower=[35, 124, 46, 53, 52, 65],

    rightEyeIris=[473, 474, 475, 476, 477],

    leftEyeUpper0=[466, 388, 387, 386, 385, 384, 398],
    leftEyeLower0=[263, 249, 390, 373, 374, 380, 381, 382, 362],
    leftEyeUpper1=[467, 260, 259, 257, 258, 286, 414],
    leftEyeLower1=[359, 255, 339, 254, 253, 252, 256, 341, 463],
    leftEyeUpper2=[342, 445, 444, 443, 442, 441, 413],
    leftEyeLower2=[446, 261, 448, 449, 450, 451, 452, 453, 464],
    leftEyeLower3=[372, 340, 346, 347, 348, 349, 350, 357, 465],

    leftEyebrowUpper=[383, 300, 293, 334, 296, 336, 285, 417],
    leftEyebrowLower=[265, 353, 276, 283, 282, 295],

    leftEyeIris=[468, 469, 470, 471, 472],

    midwayBetweenEyes=[168],

    noseTip=[1],
    noseBottom=[2],
    noseRightCorner=[98],
    noseLeftCorner=[327],

    rightCheek=[205],
    leftCheek=[425]
)


hand_landmarks = dict (
    thumb=[1, 2, 3, 4],
    indexFinger=[5, 6, 7, 8],
    middleFinger=[9, 10, 11, 12],
    ringFinger=[13, 14, 15, 16],
    pinky=[17, 18, 19, 20],
    palmBase=[0]
)

pose_landmarks = dict(
    leftArm=[12, 14, 16],
    rightArm=[11, 13, 15],
    body=[11, 23, 24, 12]
)

pose_start_index= 0
face_start_index = 33
left_start_index = 501
right_start_index = 522

l_hand_indexes = []
for keypoint_name in hand_landmarks:
    for index in hand_landmarks[keypoint_name]:
        l_hand_indexes.append(index + left_start_index)
l_hand_indexes = list(set(l_hand_indexes))
l_hand_indexes.sort()
l_hand_indexes = np.array(l_hand_indexes)

r_hand_indexes = []
for keypoint_name in hand_landmarks:
    for index in hand_landmarks[keypoint_name]:
        r_hand_indexes.append(index + right_start_index)
r_hand_indexes = list(set(r_hand_indexes))
r_hand_indexes.sort()
r_hand_indexes = np.array(r_hand_indexes)

In [6]:
r_hand_indexes.shape

(21,)

In [7]:
aug_param = {}
aug_param["angle"] = 5.0       # Reduced rotation to 5 degrees
aug_param["scale"] = 0.9       # Scale range closer to 1 (0.9 to 1.1 range)
aug_param["shift_x"] = 0.05    # Reduced horizontal shift to 5%
aug_param["shift_y"] = 0.05    # Reduced vertical shift to 5%
aug_param["angle_skew_x"] = 0.5 # Reduced skew to 0.5 degrees
aug_param["angle_skew_y"] = 0.5 # Reduced skew to 0.5 degrees

In [13]:
config =load_config("./configs/config.yaml")

In [14]:
config["aug_param"]

{'angle': 5.0,
 'scale': 0.9,
 'shift_x': 0.05,
 'shift_y': 0.05,
 'angle_skew_x': 0.5,
 'angle_skew_y': 0.5}

In [16]:
from src.data_processing.dataset import SignData

In [40]:
actions = config["labels"]["actions"]

label_map = {label: num for num, label in enumerate(actions)}
label_map

path = config["paths"]["data_dir"] 

class SignData(Dataset):
    def __init__(self, path, label_map,frame_drop = 0.0 ,mode = "train", transform = None):
        super().__init__()
        self.path = path
        self.label_map = label_map
        self.mode = mode
        self.transform = transform
        self.dict = ["left_hand", "right_hand", "lips"] 
        self.frame_drop = frame_drop

    def __len__(self):
        return len(self.path)

    def apply_frame_drop(self, data, frame_drop):
        """
        Input:
        - data: numpy array of shape (T, P, 2), where T is the number of frames and P is the number of points.
        - frame_drop: float, percentage of frames to randomly drop.

        Output:
        - data: numpy array with frames dropped based on the given percentage.

        Description: Randomly drops frames from the data based on the specified frame drop percentage.
        """
        if frame_drop > 0.0:
            drop_mask = np.random.random(len(data)) >= frame_drop
            dropped_data = data[drop_mask]

            if len(dropped_data) >= 2:
                data = dropped_data
        return data

    def process_data(self, landmark_dict, aug_param= None, frame_drop = 0.0):
        if aug_param:
            landmark_dict["left_hand"] = augmentation(landmark_dict["left_hand"], aug_param)
            landmark_dict["right_hand"] = augmentation(landmark_dict["right_hand"], aug_param)
            landmark_dict["lips"] = augmentation(landmark_dict["lips"], aug_param)
        
            # Concatenate landmark data from all parts
        landmark = np.concatenate([landmark_dict[key] for key in self.dict], axis=1)
        landmark = self.apply_frame_drop(landmark, frame_drop)
        
        # Select only x, y coordinates
        landmark = landmark[:, :, :2]
        
        # Convert to tensor
        landmark = torch.tensor(landmark)
        
        # Normalize the data
        landmark = landmark - landmark[~torch.isnan(landmark)].mean(0, keepdims=True)
        landmark = landmark / landmark[~torch.isnan(landmark)].std(0, keepdims=True)
        
        # Handle NaN values
        landmark[torch.isnan(landmark)] = 0.0  # TxPx2
        landmark = torch.reshape(landmark, (landmark.shape[0], -1))

        # Permute to change shape
        landmark = torch.permute(landmark, (1, 0))  # 2P x T

        return landmark

    def __getitem__(self, idx):
        video = self.path[idx]
        

        if self.mode == "train" or self.mode == "valid":
            label_string = video.replace("\\","/").split("/")[2]   
            label = self.label_map[label_string]

            landmarks = np.load(video)
            landmarks = landmarks.reshape(-1, 543, 3)
            landmark_dict = take_all_landmarks_processing(landmarks, config["landmarks"]["face_landmarks"])

            landmark = self.process_data(landmark_dict, self.transform, frame_drop= self.frame_drop)
            
            label = torch.tensor(label, dtype = torch.float32)
            return landmark, label    

new_path = glob.glob(path+"/**/**/**.npy")
data = SignData(path= new_path,label_map=label_map, mode = "train", transform= config["aug_param"])

In [41]:
data_loader = DataLoader(data, shuffle= True, batch_size = 1, num_workers = 0)


In [43]:
iter_data = iter(data_loader)
a,b = next(iter_data)

In [44]:
a.shape

torch.Size([1, 164, 1])

In [28]:


def take_all_landmarks_processing(full_landmarks, face_landmarks):
    midwayBetweenEyes = full_landmarks[:, 168]
    mean_lips = np.nanmean(midwayBetweenEyes, axis = 0, keepdims = True)
    full_landmarks = full_landmarks - mean_lips
    left_hand = full_landmarks[:, l_hand_indexes]
    right_hand = full_landmarks[:, r_hand_indexes]
    lips_indexes = face_landmarks["lipsUpperOuter"] + face_landmarks["lipsLowerOuter"] + face_landmarks["lipsUpperInner"] + face_landmarks["lipsLowerInner"]
    lips = full_landmarks[:, lips_indexes]
    landmark_dict = dict(left_hand=left_hand, right_hand=right_hand, lips=lips)
    return landmark_dict

def augmentation(landmarks, aug_params):
    angle_rotation = random.gauss(0, aug_params["angle"]/2)
    scale = random.gauss(1, aug_params["scale"]/2)
    translation_x = random.gauss(0, aug_params["shift_x"]/2)
    translation_y = random.gauss(0, aug_params["shift_y"]/2)
    shift_x = random.gauss(0, aug_params["shift_x"]/2)
    shift_y = random.gauss(0, aug_params["shift_y"]/2)
    angle_skew_x = random.gauss(0, aug_params["angle_skew_x"]/2)
    angle_skew_y = random.gauss(0, aug_params["angle_skew_y"]/2)
    Affine = Affine_Transformation()
    Affine.random_rotation(-angle_rotation, angle_rotation)
    Affine.scaling(scale)
    Affine.translation(-translation_x, translation_y)
    Affine.skew_x_degree(angle_skew_x)
    Affine.skew_y_degree(angle_skew_y)
    aug_landmarks_z = landmarks[:, :, 2][:, :, None]
    aug_landmarks = landmarks[:, :, :2]
    aug_landmarks = Affine.transform(aug_landmarks)
    aug_landmarks = np.concatenate((aug_landmarks, aug_landmarks_z), axis=2)
    aug_landmarks = aug_landmarks + landmarks[:, 0][:, None]
    aug_landmarks = aug_landmarks.astype(np.float32)
    return aug_landmarks

In [63]:
landmark = take_all_landmarks_processing(window[0])

In [64]:
landmark["left_hand"] = augmentation(landmark["left_hand"], aug_param)
landmark["right_hand"] = augmentation(landmark["right_hand"], aug_param)
landmark["lips"] = augmentation(landmark["lips"], aug_param)

In [65]:
landmark

{'left_hand': array([[[-0.8495768 , -0.70004815, -0.0214212 ],
         [-0.8495768 , -0.70004815, -0.0214212 ],
         [-0.8495768 , -0.70004815, -0.0214212 ],
         ...,
         [-0.8495768 , -0.70004815, -0.0214212 ],
         [-0.8495768 , -0.70004815, -0.0214212 ],
         [-0.8495768 , -0.70004815, -0.0214212 ]],
 
        [[-0.8495768 , -0.70004815, -0.0214212 ],
         [-0.8495768 , -0.70004815, -0.0214212 ],
         [-0.8495768 , -0.70004815, -0.0214212 ],
         ...,
         [-0.8495768 , -0.70004815, -0.0214212 ],
         [-0.8495768 , -0.70004815, -0.0214212 ],
         [-0.8495768 , -0.70004815, -0.0214212 ]],
 
        [[ 0.3644674 , -0.18295538, -0.02142106],
         [ 0.33638862, -0.18021838, -0.02473261],
         [ 0.3109028 , -0.19533974, -0.02558183],
         ...,
         [ 0.31946716, -0.28076556, -0.0398979 ],
         [ 0.31065717, -0.28950483, -0.03993724],
         [ 0.302781  , -0.29478338, -0.04041681]],
 
        ...,
 
        [[ 0.4185163 