In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional,GlobalAveragePooling1D, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
import cv2
import numpy as np
import os
import glob
import json
from tensorflow import keras
from sklearn.model_selection import train_test_split

    You are using a Python version 3.9 past its end of life. Google will update
    google-auth with critical bug fixes on a best-effort basis, but not
    with any other fixes or features. Please upgrade your Python version,
    and then update google-auth.
    
    You are using a Python version 3.9 past its end of life. Google will update
    google-auth with critical bug fixes on a best-effort basis, but not
    with any other fixes or features. Please upgrade your Python version,
    and then update google-auth.
    


In [2]:
# 1. Các biến cấu hình
DATA_PATH        = 'Data'                # thư mục gốc chứa các folder action
LABEL_MAP_PATH   = 'Logs/label_map.json'
BATCH_SIZE       = 32
AUTOTUNE         = tf.data.AUTOTUNE
VAL_SPLIT        = 0.1
TEST_SPLIT       = 0.1
# 2. Load label_map từ JSON
with open(LABEL_MAP_PATH, 'r', encoding='utf-8') as f:
    label_map = json.load(f)            # ví dụ: {"địa chỉ": 0, "miến điện": 1, ...}

# 3. Tạo danh sách tất cả các file .npz
#    Data structure: Data/<action_name>/*.npz
file_pattern = os.path.join(DATA_PATH, '**', '*.npz')
all_files = glob.glob(file_pattern, recursive=True)
print(f"Found {len(all_files)} samples.")

train_files, temp_files = train_test_split(
    all_files,
    test_size=VAL_SPLIT + TEST_SPLIT,  # ví dụ: 0.2 + 0.1 = 0.3
    shuffle=True,
    random_state=42,
    stratify=[os.path.basename(p).split('.')[0] for p in all_files]
)

val_files, test_files = train_test_split(
    temp_files,
    test_size=TEST_SPLIT / (VAL_SPLIT + TEST_SPLIT),  # ví dụ: 0.1 / 0.3 = 1/3
    shuffle=True,
    random_state=42,
    stratify=[os.path.basename(p).split('.')[0] for p in temp_files]
)

print(f"  Train samples: {len(train_files)}")
print(f"    Val samples: {len(val_files)}")
print(f"   Test samples: {len(test_files)}")

# 4. Hàm parse mỗi file .npz
def _load_npz(path):
    # path: scalar tf.string tensor
    npz_path = path.decode('utf-8')
    data = np.load(npz_path)
    seq   = data['sequence'].astype(np.float32)   # (60,126)
    lbl   = np.int32(data['label'])
    return seq, lbl

def parse_fn(path):
    seq, lbl = tf.numpy_function(
        func=_load_npz,
        inp=[path],
        Tout=[tf.float32, tf.int32]
    )
    # set shape để TF biết kích thước cố định
    seq.set_shape([60, 201])
    lbl.set_shape([])
    return seq, lbl
def make_dataset(file_list, shuffle=False, repeat=False):
    ds = tf.data.Dataset.from_tensor_slices(file_list)
    if shuffle:
        ds = ds.shuffle(len(file_list), reshuffle_each_iteration=True)
    if repeat:
        ds = ds.repeat()
    ds = ds.map(parse_fn, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE, drop_remainder=True)
    ds = ds.prefetch(AUTOTUNE)
    return ds

# 6. Tạo train_ds & val_ds
train_ds = make_dataset(train_files, shuffle=True, repeat=True)
val_ds   = make_dataset(val_files, shuffle=False, repeat=False)
test_ds  = make_dataset(test_files, shuffle=False, repeat=False)

# 7. Compute steps
steps_per_epoch = len(train_files) // BATCH_SIZE
validation_steps = len(val_files) // BATCH_SIZE

Found 10200 samples.
  Train samples: 8160
    Val samples: 1020
   Test samples: 1020


In [3]:
inputs = tf.keras.Input(shape=(60, 201))

# Khối LSTM thứ nhất
x = Bidirectional(LSTM(256, return_sequences=True, dropout=0.3))(inputs)
x = BatchNormalization()(x)

# Khối LSTM thứ hai
x = Bidirectional(LSTM(256, return_sequences=True, dropout=0.3))(x)
x = BatchNormalization()(x)

# Khối LSTM thứ ba
x = Bidirectional(LSTM(256, dropout=0.3))(x)
x = BatchNormalization()(x)

# Các lớp Dense
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)

x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)

# Lớp đầu ra
outputs = Dense(2764, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
# Biên dịch mô hình
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [4]:
# 1. Tạo thư mục lưu checkpoint (nếu chưa có)
checkpoint_dir = 'Models/checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_path = os.path.join(checkpoint_dir, 'final_model.keras')

# 2. Khởi tạo callbacks
callbacks = [
    # Lưu mô hình với val_loss thấp nhất
    ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,  # lưu cả kiến trúc + weights
        verbose=1
    ),
    # Dừng training nếu 5 epoch liên tiếp không cải thiện val_loss
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    )
]

In [5]:
model.fit(
    train_ds,
    epochs=100,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    validation_steps=validation_steps,
    callbacks = callbacks
)

Epoch 1/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.0300 - loss: 6.5561
Epoch 1: val_loss improved from inf to 3.38908, saving model to Models/checkpoints\final_model.keras
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m730s[0m 3s/step - accuracy: 0.0301 - loss: 6.5505 - val_accuracy: 0.1109 - val_loss: 3.3891
Epoch 2/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m545s[0m 2s/step - accuracy: 0.1127 - loss: 3.4135
Epoch 3/100


  self.gen.throw(typ, value, traceback)
  self._save_model(epoch=epoch, batch=None, logs=logs)
  current = self.get_monitor_value(logs)


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.2154 - loss: 2.8403
Epoch 3: val_loss improved from 3.38908 to 2.05502, saving model to Models/checkpoints\final_model.keras
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m491s[0m 2s/step - accuracy: 0.2156 - loss: 2.8396 - val_accuracy: 0.3982 - val_loss: 2.0550
Epoch 4/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m466s[0m 2s/step - accuracy: 0.3656 - loss: 2.1736
Epoch 5/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.4853 - loss: 1.6520
Epoch 5: val_loss improved from 2.05502 to 1.01919, saving model to Models/checkpoints\final_model.keras
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m487s[0m 2s/step - accuracy: 0.4854 - loss: 1.6517 - val_accuracy: 0.6855 - val_loss: 1.0192
Epoch 6/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m487s[0m 2s/step - accuracy: 0.5854 - loss: 1.2742
Epoch 7/100
[

[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m490s[0m 2s/step - accuracy: 0.9676 - loss: 0.0996
Epoch 37/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9775 - loss: 0.0708
Epoch 37: val_loss improved from 0.01368 to 0.01281, saving model to Models/checkpoints\final_model.keras
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m501s[0m 2s/step - accuracy: 0.9775 - loss: 0.0708 - val_accuracy: 0.9960 - val_loss: 0.0128
Epoch 38/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m487s[0m 2s/step - accuracy: 0.9793 - loss: 0.0812
Epoch 39/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9784 - loss: 0.0718
Epoch 39: val_loss did not improve from 0.01281
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m497s[0m 2s/step - accuracy: 0.9783 - loss: 0.0718 - val_accuracy: 0.9950 - val_loss: 0.0158
Epoch 40/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

Epoch 72/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m487s[0m 2s/step - accuracy: 0.9764 - loss: 0.0952
Epoch 73/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9825 - loss: 0.0545
Epoch 73: val_loss improved from 0.00848 to 0.00530, saving model to Models/checkpoints\final_model.keras
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m509s[0m 2s/step - accuracy: 0.9825 - loss: 0.0545 - val_accuracy: 0.9970 - val_loss: 0.0053
Epoch 74/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m484s[0m 2s/step - accuracy: 0.9819 - loss: 0.0667
Epoch 75/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9738 - loss: 0.1004
Epoch 75: val_loss did not improve from 0.00530
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m501s[0m 2s/step - accuracy: 0.9738 - loss: 0.1003 - val_accuracy: 0.9960 - val_loss: 0.0132
Epoch 76/100
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1878fffbc10>