In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import json
import os
from tqdm import tqdm

In [3]:
files_to_label = pd.read_csv("/ASL/input/asl-signs/train.csv")
with open("/ASL/input/asl-signs/sign_to_prediction_index_map.json", "r") as f:
    label_to_index = json.load(f)

In [3]:
left_hand_landmarks = list(range(468, 489))
right_hand_landmarks = list(range(522, 543))

LANDMARK_IDX = [0,9,11,13,14,17,117,118,119,199,346,347,348] + list(range(468,543))
MAX_LENGTH = 64
ROWS_PER_FRAME = 543

NUM_SAMPLES = 1000

In [4]:
def load_relevant_data_subset( pq_path):
        data_columns = ['x', 'y', 'z']
        data = pd.read_parquet(pq_path, columns=data_columns)
        n_frames = int(len(data) / ROWS_PER_FRAME)
        data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
        data = data.astype(np.float32)
        return data

In [5]:
data = [load_relevant_data_subset(f"/ASL/input/asl-signs/{path}") for path in tqdm(files_to_label["path"].iloc[:NUM_SAMPLES])]

100%|██████████| 1000/1000 [00:03<00:00, 254.83it/s]


In [23]:
X = np.zeros((len(data), MAX_LENGTH, ROWS_PER_FRAME, 3))
for i, d in enumerate(data):
    X[i, :min(MAX_LENGTH, len(d))] = d[:MAX_LENGTH]

X = X[:, :, LANDMARK_IDX, :]

In [24]:
X = np.nan_to_num(X)

In [25]:
y = np.array([label_to_index[sign] for sign in files_to_label["sign"].iloc[:NUM_SAMPLES]])

In [26]:
from sklearn.model_selection import train_test_split

In [27]:
X = X.reshape(X.shape[0], X.shape[1], -1)

In [35]:
import tsfel
cfg = tsfel.get_features_by_domain()

# Extract features
X_extracted = tsfel.time_series_features_extractor(cfg, X[0].T)

*** Feature extraction started ***


  X_extracted = tsfel.time_series_features_extractor(cfg, X[0].T)



*** Feature extraction finished ***


In [34]:
X[0].T.shape

(264, 64)

In [36]:
X_extracted

Unnamed: 0,0_Absolute energy,0_Area under the curve,0_Autocorrelation,0_Centroid,0_ECDF Percentile Count_0,0_ECDF Percentile Count_1,0_ECDF Percentile_0,0_ECDF Percentile_1,0_ECDF_0,0_ECDF_1,...,0_Wavelet variance_0,0_Wavelet variance_1,0_Wavelet variance_2,0_Wavelet variance_3,0_Wavelet variance_4,0_Wavelet variance_5,0_Wavelet variance_6,0_Wavelet variance_7,0_Wavelet variance_8,0_Zero crossing rate
0,6.409156,0.118889,6.409156,0.113109,12.0,51.0,0.000000,0.535696,0.015625,0.03125,...,0.004807,0.016904,0.036843,0.066415,0.112094,0.186042,0.290382,0.413799,0.541209,1.0
1,3.286235,0.085036,3.286235,0.109524,12.0,51.0,0.000000,0.377211,0.015625,0.03125,...,0.002512,0.008997,0.019587,0.034361,0.056223,0.092086,0.143918,0.206251,0.271257,1.0
2,0.032450,0.008422,0.032450,0.123563,12.0,51.0,-0.040335,0.000000,0.015625,0.03125,...,0.000026,0.000089,0.000184,0.000351,0.000629,0.001052,0.001610,0.002242,0.002878,1.0
3,6.581890,0.120461,6.581890,0.113715,12.0,51.0,0.000000,0.543507,0.015625,0.03125,...,0.004910,0.017257,0.037703,0.068265,0.115730,0.192309,0.299912,0.426854,0.557698,1.0
4,1.688654,0.060962,1.688654,0.109653,12.0,51.0,0.000000,0.270500,0.015625,0.03125,...,0.001287,0.004615,0.010076,0.017682,0.028923,0.047363,0.074012,0.106054,0.139465,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,2.053810,0.044810,2.053810,0.083104,12.0,51.0,0.000000,0.000000,0.015625,0.03125,...,0.010056,0.017984,0.039195,0.073582,0.096883,0.106369,0.109908,0.113337,0.118449,3.0
260,0.108043,0.009694,0.108043,0.087758,12.0,51.0,0.000000,0.000000,0.015625,0.03125,...,0.000742,0.001456,0.002350,0.003654,0.004452,0.004842,0.005138,0.005476,0.005863,3.0
261,1.416629,0.034549,1.416629,0.035700,12.0,51.0,0.000000,0.000000,0.015625,0.03125,...,0.001264,0.010412,0.035729,0.065847,0.090669,0.108619,0.120668,0.128231,0.132551,3.0
262,1.940860,0.043562,1.940860,0.081671,12.0,51.0,0.000000,0.000000,0.015625,0.03125,...,0.009277,0.016417,0.036912,0.069962,0.092579,0.101699,0.104797,0.107651,0.112128,3.0


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
from data_tsfel import SignDataset

In [19]:
dataset = SignDataset(files_to_label, label_to_index, root_dir="/ASL/input/asl-signs/")

In [20]:
dataset[0][0].shape

  x_extracted = tsfel.time_series_features_extractor(self.cfg, X.T).values()


(43824,)

In [17]:
dataset[300][0].shape

  x_extracted = tsfel.time_series_features_extractor(self.cfg, X.T).values()


(43824,)