In [3]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
import cv2  # 使用 OpenCV 读取图片

In [5]:
# 设置图片文件夹路径和标签文件路径
image_folder = 'origindata/ImageData'
label_file = 'dataset/sexlable.txt'

In [7]:
# 读取标签文件，假设每行存储一个标签，对应顺序的图片
with open(label_file, 'r') as f:
    labels = [int(line.strip()) for line in f.readlines()]  # 读取所有标签

# 确保标签数量和图片数量一致
num_images = len(labels)
image_paths = [os.path.join(image_folder, f"{i:06d}.jpg") for i in range(1, num_images+1)]

In [11]:
# 初始化图片列表
images = []

# 遍历图片路径并读取图片
for image_path in image_paths:
    img = cv2.imread(image_path)  # 读取图片
    if img is not None:
        images.append(img)  # 添加到图片列表

# 转换为 NumPy 数组
images = np.array(images)
labels = np.array(labels)

# 划分数据集，80% 作为训练集，20% 作为测试集
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [25]:
#保存位置
train_image_folder = 'dataset/traindata/train_image'
test_image_folder = 'dataset/testdata/test_image'
train_label_folder = 'dataset/traindata'
test_label_folder = 'dataset/testdata'

In [27]:
os.makedirs(train_image_folder, exist_ok=True)
os.makedirs(test_image_folder, exist_ok=True)
os.makedirs(train_label_folder, exist_ok=True)
os.makedirs(test_label_folder, exist_ok=True)

In [33]:
# 保存训练集图片和所有标签到同一个文件
with open(os.path.join(train_label_folder, 'train_labels.txt'), 'w') as train_label_file:
    for i, img in enumerate(X_train):
        # 保存图片到训练集文件夹
        image_filename = os.path.join(train_image_folder, f"train_{i + 1:06d}.jpg")
        cv2.imwrite(image_filename, img)  # 保存图片
        
        # 将标签保存到训练集标签文件中
        train_label_file.write(f"{y_train[i]}\n")  # 每个标签占一行

In [29]:
# 保存测试集图片和所有标签到同一个文件
with open(os.path.join(test_label_folder, 'test_labels.txt'), 'w') as test_label_file:
    for i, img in enumerate(X_test):
        # 保存图片到测试集文件夹
        image_filename = os.path.join(test_image_folder, f"test_{i + 1:06d}.jpg")
        cv2.imwrite(image_filename, img)
        
        # 将标签保存到测试集标签文件中
        test_label_file.write(f"{y_test[i]}\n")  # 每个标签占一行