In [1]:
import tensorflow_datasets as tfds
# import sign_language_datasets.datasets
from sign_language_datasets.utils.torch_dataset import TFDSTorchDataset
from sign_language_datasets.datasets.config import SignDatasetConfig


DATA_DIR = "/mnt/data3/tfds_slt"
config = SignDatasetConfig(name="rwth_phoenix2014_t_poses", version="3.0.0", include_video=False, include_pose="holistic")
rwth_phoenix2014_t = tfds.load(name='rwth_phoenix2014_t', builder_kwargs=dict(config=config), data_dir=DATA_DIR)

train_dataset = TFDSTorchDataset(rwth_phoenix2014_t["train"])
validation_dataset = TFDSTorchDataset(rwth_phoenix2014_t["validation"])
test_dataset = TFDSTorchDataset(rwth_phoenix2014_t["test"])

  from .autonotebook import tqdm as notebook_tqdm
2024-04-09 08:10:38.507800: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-09 08:10:38.507829: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-09 08:10:38.509013: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-09 08:10:38.530251: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
202

In [2]:
import os

import cv2
import numpy as np
from numpy.typing import NDArray


def load_video(path: str) -> tuple[NDArray[np.uint8], int, int]:
	cap = cv2.VideoCapture(path)
	frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
	frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	size = (int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)))
	video = np.empty((frame_count, *size, 3), np.dtype('uint8'))
	for i in range(frame_count):
		_, video[i] = cap.read()
	cap.release()
	return video, frame_rate, frame_count

def store_video(video: NDArray[np.uint8], frame_rate: int, name: str, dir: str = '.temp'):
	if not os.path.exists(dir):
		os.makedirs(dir)
	fourcc = cv2.VideoWriter_fourcc(*'mp4v') # type: ignore
	print(type(video))
	out = cv2.VideoWriter(f'{dir}/{name}', fourcc, frame_rate, (video.shape[2], video.shape[1]))
	for frame in video:
		out.write(cv2.resize(frame, (video.shape[2], video.shape[1])))
	out.release()
	cv2.destroyAllWindows()

In [3]:
def draw_keypoints(
		frame,
        frame_keypoints,
        size: int = 5,
        threshold: float = 0.5,
        color: int | None = None):
	'''Draw keypoints in the frame'''
	fr = frame.copy()
	for x, y, z in frame_keypoints:
		if x is None or y is None:
			continue
		else:
			fr[int(y)-size:int(y)+size:,int(x)-size:int(x)+size] = color if color is not None else 255-fr[int(y)-size:int(y)+size:,int(x)-size:int(x)+size]
	return fr

In [4]:
import itertools
from pose_format.utils.holistic import load_holistic
import numpy as np

poses = []
for datum in itertools.islice(train_dataset, 0, 5):
	print((datum.keys()))
	keypoints = datum['pose']['data']
	print(f"Pose shape: {keypoints.shape}")
	print(f"Text: {datum['text'].decode('utf-8')}")
	video = np.zeros((datum['pose']['data'].shape[0], 210, 260, 3), np.dtype('uint8'))
	for i_frame in range(datum['pose']['data'].shape[0]):
		video[i_frame] = draw_keypoints(video[i_frame], keypoints[i_frame].squeeze(), size=2)
	store_video(video, 25, f"{datum['text'].decode('utf-8')}.mp4")
	print()

dict_keys(['gloss', 'id', 'pose', 'signer', 'text'])
Pose shape: torch.Size([47, 1, 543, 3])
Text: guten abend liebe zuschauer
<class 'numpy.ndarray'>

dict_keys(['gloss', 'id', 'pose', 'signer', 'text'])
Pose shape: torch.Size([56, 1, 543, 3])
Text: im bergland fällt zunehmend schnee
<class 'numpy.ndarray'>

dict_keys(['gloss', 'id', 'pose', 'signer', 'text'])
Pose shape: torch.Size([70, 1, 543, 3])
Text: und der wind weht auch noch kräftig aus west bis nordwest
<class 'numpy.ndarray'>

dict_keys(['gloss', 'id', 'pose', 'signer', 'text'])
Pose shape: torch.Size([99, 1, 543, 3])
Text: die aussichten von montag bis mittwoch ändert sich das wetter kaum
<class 'numpy.ndarray'>

dict_keys(['gloss', 'id', 'pose', 'signer', 'text'])
Pose shape: torch.Size([123, 1, 543, 3])
Text: über dem bergland können sich einzelne quellwolken zeigen in küstennähe gibt es auch mal dichtere wolken
<class 'numpy.ndarray'>

