# 处理数据，生成一个大的npy文件
导入os、numpy、PIL库，并确保tqdm库可用。

In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split

In [2]:
# 文件夹路径
image_dir = 'data/Img'
mask_dir = 'data/Lab'

# 获取文件列表
images = sorted(os.listdir(image_dir))
masks = sorted(os.listdir(mask_dir))

# 确保文件对应
assert len(images) == len(masks), "图片和掩码数量不一致"

# 完整路径
image_paths = [os.path.join(image_dir, img) for img in images]
mask_paths = [os.path.join(mask_dir, msk) for msk in masks]

# 分割数据集
train_imgs, test_imgs, train_masks, test_masks = train_test_split(
    image_paths, mask_paths, test_size=0.2, random_state=42
)

def load_images(paths):
    data = []
    for path in paths:
        img = Image.open(path)
        data.append(np.array(img))
    return np.array(data)

# 加载并保存训练集
train_images = load_images(train_imgs)
train_masks = load_images(train_masks)
np.save('data/train_images.npy', train_images)
np.save('data/train_masks.npy', train_masks)

# 加载并保存测试集
test_images = load_images(test_imgs)
test_masks = load_images(test_masks)
np.save('data/test_images.npy', test_images)
np.save('data/test_masks.npy', test_masks)