In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional,GlobalAveragePooling1D, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
import cv2
import numpy as np
import os
import glob
import json
from tensorflow import keras
from sklearn.model_selection import train_test_split

    You are using a Python version 3.9 past its end of life. Google will update
    google-auth with critical bug fixes on a best-effort basis, but not
    with any other fixes or features. Please upgrade your Python version,
    and then update google-auth.
    
    You are using a Python version 3.9 past its end of life. Google will update
    google-auth with critical bug fixes on a best-effort basis, but not
    with any other fixes or features. Please upgrade your Python version,
    and then update google-auth.
    


In [2]:
# 1. Các biến cấu hình
DATA_PATH        = 'Data'                # thư mục gốc chứa các folder action
LABEL_MAP_PATH   = 'Logs/label_map.json'
BATCH_SIZE       = 32
AUTOTUNE         = tf.data.AUTOTUNE
VAL_SPLIT        = 0.1
TEST_SPLIT       = 0.1

# 2. Load label_map từ JSON
with open(LABEL_MAP_PATH, 'r', encoding='utf-8') as f:
    label_map = json.load(f)

# 3. Tạo danh sách tất cả các file .npz
file_pattern = os.path.join(DATA_PATH, '**', '*.npz')
all_files = glob.glob(file_pattern, recursive=True)
print(f"Found {len(all_files)} samples.")

# --- ĐOẠN SỬA: Lấy tên thư mục cha làm nhãn để phân tầng (stratify) ---
stratify_labels = [os.path.basename(os.path.dirname(p)) for p in all_files]

train_files, temp_files = train_test_split(
    all_files,
    test_size=VAL_SPLIT + TEST_SPLIT,
    shuffle=True,
    random_state=42,
    stratify=stratify_labels # Sửa ở đây
)

# Lấy nhãn cho tập tạm để chia tiếp Val/Test
temp_labels = [os.path.basename(os.path.dirname(p)) for p in temp_files]

val_files, test_files = train_test_split(
    temp_files,
    test_size=TEST_SPLIT / (VAL_SPLIT + TEST_SPLIT),
    shuffle=True,
    random_state=42,
    stratify=temp_labels # Sửa ở đây
)

print(f"  Train samples: {len(train_files)}")
print(f"    Val samples: {len(val_files)}")
print(f"   Test samples: {len(test_files)}")

# 4. Hàm parse mỗi file .npz
def _load_npz(path):
    npz_path = path.decode('utf-8')
    data = np.load(npz_path)
    seq   = data['sequence'].astype(np.float32)
    lbl   = np.int32(data['label'])
    return seq, lbl

def parse_fn(path):
    seq, lbl = tf.numpy_function(
        func=_load_npz,
        inp=[path],
        Tout=[tf.float32, tf.int32]
    )
    # Đảm bảo shape khớp với dữ liệu (60 frames, 201 landmarks)
    seq.set_shape([60, 201])
    lbl.set_shape([])
    return seq, lbl

def make_dataset(file_list, shuffle=False, repeat=True):
    ds = tf.data.Dataset.from_tensor_slices(file_list)
    if shuffle:
        ds = ds.shuffle(len(file_list), reshuffle_each_iteration=True)
    if repeat:
        ds = ds.repeat()
    ds = ds.map(parse_fn, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE, drop_remainder=True)
    ds = ds.prefetch(AUTOTUNE)
    return ds

# 6. Tạo các Dataset
train_ds = make_dataset(train_files, shuffle=True, repeat=True)
val_ds   = make_dataset(val_files, shuffle=False, repeat=True)
test_ds  = make_dataset(test_files, shuffle=False, repeat=False)

# 7. Compute steps
steps_per_epoch = len(train_files) // BATCH_SIZE
validation_steps = max(1, len(val_files) // BATCH_SIZE)

Found 15300 samples.
  Train samples: 12240
    Val samples: 1530
   Test samples: 1530


In [3]:
inputs = tf.keras.Input(shape=(60, 201))

# Khối LSTM thứ nhất
x = Bidirectional(LSTM(256, return_sequences=True, dropout=0.3))(inputs)
x = BatchNormalization()(x)

# Khối LSTM thứ hai
x = Bidirectional(LSTM(256, return_sequences=True, dropout=0.3))(x)
x = BatchNormalization()(x)

# Khối LSTM thứ ba
x = Bidirectional(LSTM(256, dropout=0.3))(x)
x = BatchNormalization()(x)

# Các lớp Dense
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)

x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)

# Lớp đầu ra
outputs = Dense(181, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
# Biên dịch mô hình
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [4]:
# 1. Tạo thư mục lưu checkpoint (nếu chưa có)
checkpoint_dir = 'Models/checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_path = os.path.join(checkpoint_dir, 'final_model.keras')

# 2. Khởi tạo callbacks
callbacks = [
    # Lưu mô hình với val_loss thấp nhất
    ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,  # lưu cả kiến trúc + weights
        verbose=1
    ),
    # Dừng training nếu 5 epoch liên tiếp không cải thiện val_loss
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    )
]

In [5]:
model.fit(
    train_ds,
    epochs=150,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    validation_steps=validation_steps,
    callbacks = callbacks
)

Epoch 1/150
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.0213 - loss: 5.1495
Epoch 1: val_loss improved from inf to 3.73248, saving model to Models/checkpoints\final_model.keras
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m828s[0m 2s/step - accuracy: 0.0213 - loss: 5.1484 - val_accuracy: 0.1004 - val_loss: 3.7325
Epoch 2/150
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.0863 - loss: 3.9910
Epoch 2: val_loss improved from 3.73248 to 2.78792, saving model to Models/checkpoints\final_model.keras
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m868s[0m 2s/step - accuracy: 0.0863 - loss: 3.9905 - val_accuracy: 0.2540 - val_loss: 2.7879
Epoch 3/150
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.2150 - loss: 3.0781
Epoch 3: val_loss improved from 2.78792 to 1.90867, saving model to Models/checkpoints\final_model.keras
[1m382/382[0m [32m━━━━

[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m747s[0m 2s/step - accuracy: 0.9391 - loss: 0.1927 - val_accuracy: 0.9880 - val_loss: 0.0412
Epoch 24/150
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9474 - loss: 0.1660
Epoch 24: val_loss improved from 0.02761 to 0.02733, saving model to Models/checkpoints\final_model.keras
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m750s[0m 2s/step - accuracy: 0.9474 - loss: 0.1660 - val_accuracy: 0.9914 - val_loss: 0.0273
Epoch 25/150
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9463 - loss: 0.1633
Epoch 25: val_loss did not improve from 0.02733
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m756s[0m 2s/step - accuracy: 0.9463 - loss: 0.1634 - val_accuracy: 0.9887 - val_loss: 0.0319
Epoch 26/150
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9529 - loss: 0.1544
Epoch 26: val_loss did not 

<keras.src.callbacks.history.History at 0x15b92a7db80>