In [8]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
import shutil

In [9]:
# 设置路径
train_dir = "train_data"        # 训练集目录
val_dir = "val_data"            # 验证集目录
none_class_samples = 100   # none类的样本数量（可以自行调整）


In [10]:
# 获取所有手势类别
categories = [f for f in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, f))]

# 标签映射（包括none）
gesture_labels = {category: idx + 1 for idx, category in enumerate(categories)}
gesture_labels["none"] = 0

# 读取并准备数据
all_samples_train = []
all_labels_train = []

all_samples_val = []
all_labels_val = []

In [11]:
# 1. 处理训练集数据
for category in categories:
    category_train_dir = os.path.join(train_dir, category)
    files = [f for f in os.listdir(category_train_dir) if f.endswith('.npy')]
    
    for file in files:
        file_path = os.path.join(category_train_dir, file)
        sample = np.load(file_path)  # shape: (30, 21, 3)
        all_samples_train.append(sample)
        all_labels_train.append(gesture_labels[category])

# 2. 处理验证集数据
for category in categories:
    category_val_dir = os.path.join(val_dir, category)
    files = [f for f in os.listdir(category_val_dir) if f.endswith('.npy')]
    
    for file in files:
        file_path = os.path.join(category_val_dir, file)
        sample = np.load(file_path)  # shape: (30, 21, 3)
        all_samples_val.append(sample)
        all_labels_val.append(gesture_labels[category])

In [12]:
# 3. 创建none类的样本并加入训练集和验证集
# 生成训练集中的none样本
for i in range(none_class_samples):
    none_sample = np.zeros((30, 21, 3))  # 生成全0的虚拟样本
    all_samples_train.append(none_sample)
    all_labels_train.append(gesture_labels["none"])

# 生成验证集中的none样本
for i in range(none_class_samples):
    none_sample = np.zeros((30, 21, 3))  # 生成全0的虚拟样本
    all_samples_val.append(none_sample)
    all_labels_val.append(gesture_labels["none"])

In [13]:
# 将数据转换为NumPy数组
all_samples_train = np.array(all_samples_train)  # shape: (num_samples, 30, 21, 3)
all_labels_train = np.array(all_labels_train)    # shape: (num_samples,)

all_samples_val = np.array(all_samples_val)  # shape: (num_samples, 30, 21, 3)
all_labels_val = np.array(all_labels_val)    # shape: (num_samples,)

# 划分训练集和验证集（80%训练集，20%验证集）
X_train, X_val, y_train, y_val = train_test_split(all_samples_train, all_labels_train, test_size=0.2, random_state=42)

print(f"训练集样本数：{X_train.shape[0]}")
print(f"验证集样本数：{X_val.shape[0]}")

训练集样本数：13773
验证集样本数：3444


In [14]:
# 创建对应的文件夹
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# 创建类别文件夹并保存训练集和验证集数据
for category in categories + ['none']:  # 加入none类别
    # 训练集和验证集文件夹
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)

# 保存训练集数据
for sample, label in zip(X_train, y_train):
    category = [key for key, value in gesture_labels.items() if value == label][0]
    file_name = f"{category}_{np.random.randint(100000)}.npy"  # 使用随机数生成文件名，避免冲突
    file_path = os.path.join(train_dir, category, file_name)
    np.save(file_path, sample)

# 保存验证集数据
for sample, label in zip(X_val, y_val):
    category = [key for key, value in gesture_labels.items() if value == label][0]
    file_name = f"{category}_{np.random.randint(100000)}.npy"
    file_path = os.path.join(val_dir, category, file_name)
    np.save(file_path, sample)

print("数据已保存到训练集和验证集文件夹中。")


数据已保存到训练集和验证集文件夹中。
