In [1]:
import cv2
import mediapipe as mp
import numpy as np
import os
from tqdm import tqdm

# Khởi tạo MediaPipe Holistic
mp_holistic = mp.solutions.holistic

# Hàm trích xuất landmarks từ một khung hình
def extract_landmarks(image, holistic_model):
    results = holistic_model.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Pose landmarks (chỉ lấy x, y, z)
    pose_landmarks = [
        [lm.x, lm.y, lm.z]
        for lm in results.pose_landmarks.landmark
    ] if results.pose_landmarks else [[0, 0, 0]] * 33

    # Face landmarks
    face_landmarks = [
        [lm.x, lm.y, lm.z]
        for lm in results.face_landmarks.landmark
    ] if results.face_landmarks else [[0, 0, 0]] * 468

    # Combine tất cả landmarks
    combined_landmarks = np.concatenate((pose_landmarks, face_landmarks), axis=0)
    return combined_landmarks

# Hàm trích xuất landmarks từ video
def extract_landmarks_from_video(video_path, holistic_model):
    cap = cv2.VideoCapture(video_path)
    landmarks_data = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        landmarks = extract_landmarks(frame, holistic_model)
        landmarks_data.append(landmarks)
    cap.release()
    return np.array(landmarks_data)

# 1. Thêm nhiễu Gaussian
def add_gaussian_noise(landmarks, std_dev=0.01):
    noise = np.random.normal(0, std_dev, landmarks.shape)
    return landmarks + noise

# 2. Dịch chuyển khung hình (Translation)
def apply_translation(landmarks, max_translation=0.02):
    translation = np.random.uniform(-max_translation, max_translation, size=(1, landmarks.shape[1]))
    return landmarks + translation

# 3. Thay đổi tỉ lệ khung hình (Scaling)
def apply_scaling(landmarks, scale_range=(0.9, 1.1)):
    scale_factor = np.random.uniform(scale_range[0], scale_range[1])
    return landmarks * scale_factor

# 4. Lật khung hình theo trục Y (Horizontal flip)
def apply_horizontal_flip(landmarks):
    landmarks[:, 0] = 1 - landmarks[:, 0]  # Đảo ngược trục X
    return landmarks

# 5. Nhiễu ngẫu nhiên tổng hợp
def apply_random_noise(landmarks):
    noise = np.random.uniform(-0.02, 0.02, landmarks.shape)
    return landmarks + noise

# Hàm áp dụng tất cả augmentation
def apply_all_augmentations(landmarks):
    augmentations = [
        add_gaussian_noise,
        apply_scaling,
        apply_horizontal_flip,
        apply_random_noise,
    ]
    augmented_data = [aug(landmarks) for aug in augmentations]
    return augmented_data

# Hàm xử lý toàn bộ thư mục và lưu dữ liệu
def process_directory(data_dir, output_file):
    holistic_model = mp_holistic.Holistic(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
    all_sequences = []
    all_labels = []

    label_map = {action: idx for idx, action in enumerate(sorted(os.listdir(data_dir)))}

    for action in sorted(os.listdir(data_dir)):
        action_dir = os.path.join(data_dir, action)
        if not os.path.isdir(action_dir):
            continue

        print(f"Processing action: {action}")
        label = label_map[action]

        for video_file in tqdm(os.listdir(action_dir)):
            video_path = os.path.join(action_dir, video_file)
            if not video_file.endswith((".mp4", ".avi")):
                continue

            # Trích xuất landmarks
            landmarks = extract_landmarks_from_video(video_path, holistic_model)
            if landmarks.size == 0:
                continue

            # Lưu landmarks gốc
            all_sequences.append(landmarks)
            all_labels.append(label)

            # Lưu augmented landmarks
            augmented_landmarks = apply_all_augmentations(landmarks)
            for aug_lm in augmented_landmarks:
                all_sequences.append(aug_lm)
                all_labels.append(label)

    holistic_model.close()

    # Chuyển đổi sang numpy arrays
    all_sequences = np.array(all_sequences, dtype=object)  # Dữ liệu dạng chuỗi thời gian
    all_labels = np.array(all_labels)

    # Lưu vào file .npz
    np.savez_compressed(output_file, sequences=all_sequences, labels=all_labels)

input_dir = "videos"
output_file = "lstm_train_data_v2.npz"

# Gọi hàm xử lý
process_directory(input_dir, output_file)


Processing action: aicho


100%|██████████| 3/3 [00:06<00:00,  2.19s/it]


Processing action: bome


100%|██████████| 3/3 [00:07<00:00,  2.59s/it]


Processing action: bunoc


100%|██████████| 3/3 [00:10<00:00,  3.41s/it]


Processing action: cogiao


100%|██████████| 3/3 [00:08<00:00,  2.71s/it]


Processing action: conde


100%|██████████| 3/3 [00:10<00:00,  3.58s/it]


Processing action: day


100%|██████████| 3/3 [00:08<00:00,  2.86s/it]


Processing action: hoctap


100%|██████████| 3/3 [00:08<00:00,  2.78s/it]


Processing action: hoctro


100%|██████████| 3/3 [00:10<00:00,  3.34s/it]


Processing action: luoi


100%|██████████| 3/3 [00:10<00:00,  3.39s/it]


Processing action: monTinhocvaCongnghe


100%|██████████| 3/3 [00:08<00:00,  2.97s/it]


Processing action: ngaycuaMe


100%|██████████| 3/3 [00:08<00:00,  2.83s/it]


Processing action: qua


100%|██████████| 3/3 [00:06<00:00,  2.20s/it]


Processing action: tugiac


100%|██████████| 3/3 [00:06<00:00,  2.21s/it]
