In [None]:
!pip install keras-utils

In [1]:
import os
import numpy as np

def load_data(base_path):
    data = {"train": [], "val": [], "test": []}
    labels = {"train": [], "val": [], "test": []}

    for split in ["train", "val", "test"]:
        split_path = os.path.join(base_path, split)

        if not os.path.exists(split_path):
            print(f"Warning: Split path {split_path} not found.")
            continue

        for label in os.listdir(split_path):
            label_path = os.path.join(split_path, label)

            if not os.path.isdir(label_path):
                continue

            for file in os.listdir(label_path):
                if file.endswith(".npz"):
                    file_path = os.path.join(label_path, file)

                    try:
                        # Load the npz file
                        keypoints = np.load(file_path)["keypoints"]
                        data[split].append(keypoints)
                        labels[split].append(label)
                    except Exception as e:
                        print(f"Error loading file {file_path}: {e}")

    return data, labels

# Usage
base_path = "video_key_points"
data, labels = load_data(base_path)

# Accessing data
X_train, y_train = data["train"], labels["train"]
X_val, y_val = data["val"], labels["val"]
X_test, y_test = data["test"], labels["test"]

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"Test samples: {len(X_test)}")


Training samples: 8313
Validation samples: 2253
Test samples: 1414


In [2]:
missing_labels_in_train = set(y_val) - set(y_train)
print(f"Labels in y_val not in y_train: {missing_labels_in_train}")

missing_labels_in_train = set(y_test) - set(y_train)
print(f"Labels in y_test not in y_train: {missing_labels_in_train}")


Labels in y_val not in y_train: {'meaning', 'post'}
Labels in y_test not in y_train: {'meaning', 'post'}


In [6]:
label_map = {label: idx for idx, label in enumerate(sorted(set(y_train)))}
for label in y_val:
    if label not in label_map:
        label_map[label] = len(label_map)

for label in y_test:
    if label not in label_map:
        label_map[label] = len(label_map)


In [7]:
y_train_encoded = [label_map[label] for label in y_train]
y_val_encoded = [label_map[label] for label in y_val]
y_test_encoded = [label_map[label] for label in y_test]


In [19]:
from tensorflow.keras.utils import to_categorical

num_classes = len(label_map)
y_train = to_categorical(y_train_encoded, num_classes=num_classes)
y_val = to_categorical(y_val_encoded, num_classes=num_classes)
y_test = to_categorical(y_test_encoded, num_classes=num_classes)


In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Pad sequences
MAX_SEQ_LENGTH = 50  # Example value; set based on your data
INPUT_DIM = X_train[0].shape[1]  # Example, assuming uniform feature dimension

X_train = pad_sequences(X_train, maxlen=MAX_SEQ_LENGTH, padding='post', dtype='float32')
X_val = pad_sequences(X_val, maxlen=MAX_SEQ_LENGTH, padding='post', dtype='float32')
X_test = pad_sequences(X_test, maxlen=MAX_SEQ_LENGTH, padding='post', dtype='float32')


In [None]:
# Reshape data to 3D
X_train = X_train.reshape(-1, MAX_SEQ_LENGTH, INPUT_DIM)
X_val = X_val.reshape(-1, MAX_SEQ_LENGTH, INPUT_DIM)
X_test = X_test.reshape(-1, MAX_SEQ_LENGTH, INPUT_DIM)


In [None]:
# from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# callbacks = [
#     ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss'),
#     EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# ]

# history = model.fit(
#     X_train, y_train,
#     validation_data=(X_val, y_val),
#     epochs=50,
#     batch_size=32,
#     callbacks=callbacks
# )


In [None]:
# # Pad sequences to ensure equal length
# from tensorflow.keras.preprocessing.sequence import pad_sequences

# MAX_SEQ_LENGTH = 30  # Define max sequence length
# INPUT_DIM = data[0].shape[1] if len(data) > 0 else 0  # Dimensionality of keypoints

# data_padded = pad_sequences(data, maxlen=MAX_SEQ_LENGTH, dtype="float32", padding="post", truncating="post")

# # One-hot encode labels
# from tensorflow.keras.utils import to_categorical

# labels_categorical = to_categorical(labels, num_classes=len(label_map))

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking

# Define the model
model = Sequential([
    Masking(mask_value=0.0, input_shape=(MAX_SEQ_LENGTH, INPUT_DIM)),
    LSTM(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
    LSTM(128, return_sequences=False, dropout=0.2, recurrent_dropout=0.2),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_map), activation='softmax')  # Output layer with one node per class
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=1
)

In [None]:
# Evaluate on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=32)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")


In [None]:
test_loss, test_accuracy = model.evaluate(X_train, y_train, batch_size=32)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")