In [111]:
import os
import json
import cv2
import mediapipe as mp
import numpy as np
from PIL import Image
from IPython.display import display

from tqdm.auto import tqdm

In [112]:
letters = [l.split('.')[0].lower() for l in os.listdir("/home/ant/projects/psl/dataset/Videos/alphabet")]
vocabulary = ['<start>', '<eos>'] + letters
vocabulary = {l: i+1 for i, l in enumerate(vocabulary)}
vocabulary

{'<start>': 1,
 '<eos>': 2,
 'j': 3,
 'r': 4,
 'z': 5,
 't': 6,
 's': 7,
 'n': 8,
 'g': 9,
 'b': 10,
 'l': 11,
 'y': 12,
 'ch': 13,
 'u': 14,
 'ó': 15,
 'd': 16,
 'f': 17,
 'ż': 18,
 'k': 19,
 'e': 20,
 'cz': 21,
 'sz': 22,
 'o': 23,
 'ź': 24,
 'm': 25,
 'ń': 26,
 'ć': 27,
 'c': 28,
 'i': 29,
 'ł': 30,
 'ą': 31,
 'w': 32,
 'h': 33,
 'ś': 34,
 'rz': 35,
 'a': 36,
 'p': 37}

In [113]:
def landmarks_timeseries(video_path):
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands()

    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_rate = 0.5
    frames_to_skip = int(fps * frame_rate)
    landmarks_data = []
    current_frame = 0

    while cap.isOpened():
        # Set the position to the current frame
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
        ret, frame = cap.read()
        if not ret:
            break

        # Convert the BGR image to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame and get landmarks
        results = hands.process(rgb_frame)
        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0].landmark
            landmarks_data.append([[landmark.x, landmark.y, landmark.z] for landmark in hand_landmarks])

        current_frame += frames_to_skip

    cap.release()
    landmarks_data = np.array(landmarks_data).reshape(len(landmarks_data), -1)
    return landmarks_data



video_path = "/home/ant/projects/psl/dataset/Videos/alphabet/C.mp4"
lands = landmarks_timeseries(video_path)

I0000 00:00:1705528967.587493    3400 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1705528967.608517    4398 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.146.02), renderer: NVIDIA GeForce GTX 970/PCIe/SSE2


In [114]:
lands.shape

(6, 63)

In [115]:
videos_path = "/home/ant/projects/psl/dataset/Videos/alphabet"
labels = []
landmarks = []
for i, filename in enumerate(tqdm(os.listdir(videos_path))):
    if filename.endswith('.mp4'):
        video_path = os.path.join(videos_path, filename)
        # print(video_path)
        label = filename.split('.')[0].lower()
        label = ['<start>', label, '<eos>']
        label = [vocabulary[l] for l in label]
        labels.append(label)
        land = landmarks_timeseries(video_path)
        landmarks.append(land)
    if i > 2:
        break

  0%|          | 0/35 [00:00<?, ?it/s]I0000 00:00:1705528968.474072    3400 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1705528968.480680    4415 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.146.02), renderer: NVIDIA GeForce GTX 970/PCIe/SSE2


  3%|▎         | 1/35 [00:00<00:22,  1.51it/s]I0000 00:00:1705528969.135125    3400 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1705528969.141603    4432 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.146.02), renderer: NVIDIA GeForce GTX 970/PCIe/SSE2
  6%|▌         | 2/35 [00:01<00:23,  1.42it/s]I0000 00:00:1705528969.873171    3400 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1705528969.879696    4449 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.146.02), renderer: NVIDIA GeForce GTX 970/PCIe/SSE2
  9%|▊         | 3/35 [00:02<00:26,  1.22it/s]I0000 00:00:1705528970.822064    3400 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1705528970.828664    4466 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.146.02), renderer: NVIDIA GeForce GTX 970/PCIe/SSE2
  9%|▊         | 3/35 [00:03<00:33,  1.05s/it]


In [116]:
videos_path = "/home/ant/projects/psl/dataset/Videos/words"
labels_words = []
landmarks_words = []
for i, filename in enumerate(tqdm(os.listdir(videos_path))):
    if filename.endswith('.mp4'):
        video_path = os.path.join(videos_path, filename)
        # print(video_path)
        label = ['<start>'] + list(filename.split('.')[0].lower()) + ['<eos>']
        label = [vocabulary[l] for l in label]
        labels_words.append(label)
        land = landmarks_timeseries(video_path)
        landmarks_words.append(land)
    if i > 2:
        break

  0%|          | 0/375 [00:00<?, ?it/s]

I0000 00:00:1705528971.617498    3400 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1705528971.624636    4483 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.146.02), renderer: NVIDIA GeForce GTX 970/PCIe/SSE2
  0%|          | 1/375 [00:01<06:15,  1.00s/it]I0000 00:00:1705528972.621395    3400 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1705528972.628031    4500 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.146.02), renderer: NVIDIA GeForce GTX 970/PCIe/SSE2
  1%|          | 2/375 [00:03<10:04,  1.62s/it]I0000 00:00:1705528974.673917    3400 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1705528974.680550    4517 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.146.02), renderer: NVIDIA GeForce GTX 970/PCIe/SSE2
  1%|          | 3/375 [00:06<14:06,  2.28s/it]I0000 00:00:1705528977.727850    3400 gl_context_egl.cc:85] Successfully initia

In [117]:
labels_words

[[1, 36, 11, 36, 2],
 [1, 5, 12, 7, 19, 2],
 [1, 4, 23, 5, 16, 5, 29, 36, 30, 2],
 [1, 28, 20, 11, 2]]

In [118]:
for i, d in enumerate(landmarks_words):
    if i > 10:
        break
    print(d.shape)

(8, 63)
(12, 63)
(22, 63)
(9, 63)


In [132]:
def preprocess_data(labels_folder):
    labeled_with_landmarks_count = 0
    labeled_without_landmarks_count = 0
    data_rows = []
    labels = []
    fix = {
        'Ć': 'ć',
        'P': 'E',
        'SZ': 'B',
        'Ę': 'ę',
        'CH': 'H',
        'I': 'J',
        'Ł': 'ł',
        'Ń': 'ń',
        'Ó': 'O',
        'RZ': 'R',
        'Ś': 'ś',
        'Ź': 'ź',
        'Ż': 'ż',
    }

    for filename in tqdm(os.listdir(labels_folder)):
        if filename.endswith('.json'):
            with open(os.path.join(labels_folder, filename), 'r', encoding='utf-8') as json_file:
                data = json.load(json_file)
                if 'hand_landmarks' in data:
                    labeled_with_landmarks_count += 1

                    landmarks_data = data['hand_landmarks']
                    row = []
                    
                    for landmark_key in landmarks_data:
                        landmark = landmarks_data[landmark_key]
                        row.extend([landmark['x'], landmark['y'], landmark['z']])
                    
                    
                    l = data['label']
                    if l in fix:
                        l = fix[l] 
                    label = ['<start>', l.lower(), '<eos>']
                    try:
                        label = [vocabulary[l] for l in label]
                    except: # skip for ę, fix later
                        continue
                    data_rows.append(row)
                    labels.append(label)

                else:
                    labeled_without_landmarks_count += 1

    return data_rows, labels

In [133]:
labels_folder = '../dataset/labels'
data_static, labels_static = preprocess_data(labels_folder)
data_static = np.array(data_static)

  0%|          | 0/3626 [00:00<?, ?it/s]

100%|██████████| 3626/3626 [00:00<00:00, 17702.22it/s]


In [134]:
data_static_new = []
for d in data_static:
    d = np.repeat(d.reshape(1, -1), repeats=np.random.randint(2, 7), axis=0)
    data_static_new.append(d)
    

In [135]:
data = landmarks + landmarks_words + data_static_new 
all_labels = labels + labels_words + labels_static

In [136]:
import tensorflow as tf

In [144]:
class MaskedLoss(tf.keras.losses.Loss):
  def __init__(self):
    self.name = 'masked_loss'
    self.loss = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')

  def __call__(self, y_true, y_pred, *args, **kwargs):

    # Calculate the loss for each item in the batch.
    loss = self.loss(y_true, y_pred)

    # Mask off the losses on padding.
    mask = tf.cast(y_true != 0, tf.float32)
    loss *= mask

    # Return the total.
    return tf.reduce_sum(loss)

In [147]:
def masked_loss(y_true, y_pred):
    loss = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')
    loss = loss(y_true, y_pred)

    # Mask off the losses on padding.
    mask = tf.cast(y_true != 0, tf.float32)
    loss *= mask

    # Return the total.
    return tf.reduce_sum(loss)

In [138]:
padded_inputs = tf.keras.utils.pad_sequences(data, dtype="float32", padding="post")
padded_outputs = tf.keras.utils.pad_sequences(all_labels, dtype="int32", padding="post")

In [153]:
from tensorflow import keras


input_dim = 63
vocab_size = len(vocabulary)
model = keras.Sequential(
    [
        keras.Input(shape=(padded_inputs.shape[1], input_dim), dtype="float32"),
        keras.layers.Masking(),
        keras.layers.LSTM(32, return_sequences=False),
        keras.layers.RepeatVector(padded_outputs.shape[1]),
        keras.layers.LSTM(64, return_sequences=True),
        keras.layers.TimeDistributed(keras.layers.Dense(vocab_size)),
    ]
)
model.summary()

# tf.keras.losses.sparse_categorical_crossentropy
model.compile(loss=masked_loss,
              optimizer=tf.keras.optimizers.Adam(1e-3),
              metrics=['accuracy'])

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking_19 (Masking)        (None, 22, 63)            0         
                                                                 
 lstm_28 (LSTM)              (None, 32)                12288     
                                                                 
 repeat_vector_10 (RepeatVec  (None, 10, 32)           0         
 tor)                                                            
                                                                 
 lstm_29 (LSTM)              (None, 10, 64)            24832     
                                                                 
 time_distributed_9 (TimeDis  (None, 10, 37)           2405      
 tributed)                                                       
                                                                 
Total params: 39,525
Trainable params: 39,525
Non-tra

In [140]:
padded_inputs.shape, padded_outputs.shape

((3181, 22, 63), (3181, 10))

In [154]:
model.fit(padded_inputs, padded_outputs)

2024-01-17 22:10:14.916870: W tensorflow/core/common_runtime/type_inference.cc:339] Type inference failed. This indicates an invalid graph that escaped type checking. Error message: INVALID_ARGUMENT: expected compatible input types, but input 1:
type_id: TFT_OPTIONAL
args {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_TENSOR
    args {
      type_id: TFT_INT32
    }
  }
}
 is neither a subtype nor a supertype of the combined inputs preceding it:
type_id: TFT_OPTIONAL
args {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_TENSOR
    args {
      type_id: TFT_FLOAT
    }
  }
}

	while inferring type of node 'cond_40/output/_23'
2024-01-17 22:10:15.076903: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x2bf7b20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-01-17 22:10:15.076926: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA GeForce GTX 970, Compute Capability 5.2
2024-01-17 22:10:15

InternalError: Graph execution error:

Detected at node 'StatefulPartitionedCall_6' defined at (most recent call last):
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/traitlets/config/application.py", line 1077, in launch_instance
      app.start()
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 701, in start
      self.io_loop.start()
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
      handle._run()
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 534, in dispatch_queue
      await self.process_one()
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 523, in process_one
      await dispatch(*args)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 429, in dispatch_shell
      await result
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 767, in execute_request
      reply_content = await reply_content
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 429, in do_execute
      res = shell.run_cell(
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3051, in run_cell
      result = self._run_cell(
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3106, in _run_cell
      result = runner(coro)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3311, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3493, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_3400/283080546.py", line 1, in <module>
      model.fit(padded_inputs, padded_outputs)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/engine/training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/engine/training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/engine/training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/engine/training.py", line 1027, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 527, in minimize
      self.apply_gradients(grads_and_vars)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1140, in apply_gradients
      return super().apply_gradients(grads_and_vars, name=name)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 634, in apply_gradients
      iteration = self._internal_apply_gradients(grads_and_vars)
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1166, in _internal_apply_gradients
      return tf.__internal__.distribute.interim.maybe_merge_call(
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1216, in _distributed_apply_gradients_fn
      distribution.extended.update(
    File "/home/ant/miniconda3/envs/psl/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1211, in apply_grad_to_update_var
      return self._update_step_xla(grad, var, id(self._var_key(var)))
Node: 'StatefulPartitionedCall_6'
libdevice not found at ./libdevice.10.bc
	 [[{{node StatefulPartitionedCall_6}}]] [Op:__inference_train_function_55905]

In [51]:
model(padded_inputs[:2])[0][4]

<tf.Tensor: shape=(64,), dtype=float32, numpy=
array([-0.00617623,  0.07038333, -0.09975268,  0.04994547, -0.08556576,
       -0.01439674,  0.00178766,  0.12056605,  0.0055276 , -0.00410856,
        0.04292813,  0.01083074, -0.00749248, -0.02138753, -0.07583385,
       -0.00663034, -0.13346957,  0.08411147,  0.00640909,  0.02036927,
        0.06142245, -0.01554792,  0.04529171, -0.0598767 ,  0.02836076,
        0.08498958,  0.03581278, -0.08666255,  0.0232823 , -0.06233657,
        0.03195114,  0.04139042, -0.06258518,  0.19337499,  0.0564622 ,
       -0.0120612 , -0.06972692, -0.1224641 , -0.11664062, -0.0519244 ,
        0.0722313 ,  0.09256959, -0.06957745, -0.02159714, -0.10800963,
        0.09039993,  0.05151774,  0.06304382,  0.14077818, -0.07688762,
        0.02922654, -0.06657206, -0.03469027, -0.02914091, -0.01539665,
       -0.01945005, -0.03742137,  0.00620591, -0.02648269,  0.11653467,
        0.01066596, -0.2610688 , -0.04025134,  0.09088983], dtype=float32)>