Process files

In [7]:
import os
import numpy as np
from PIL import Image

# Define cat and dog breeds
cat_breeds = {
    'Abyssinian', 'Bengal', 'Birman', 'Bombay', 'British_Shorthair', 'Egyptian_Mau',
    'Maine_Coon', 'Persian', 'Ragdoll', 'Russian_Blue', 'Siamese', 'Sphynx'
}

dog_breeds = {
    'american_bulldog', 'american_pit_bull_terrier', 'basset_hound', 'beagle', 'boxer',
    'chihuahua', 'english_cocker_spaniel', 'english_setter', 'german_shorthaired',
    'great_pyrenees', 'havanese', 'japanese_chin', 'keeshond', 'leonberger',
    'miniature_pinscher', 'newfoundland', 'pomeranian', 'pug', 'saint_bernard',
    'samoyed', 'scottish_terrier', 'shiba_inu', 'staffordshire_bull_terrier',
    'wheaten_terrier', 'yorkshire_terrier'
}

def process_file(source_dir, destination_dir, file_format, is_mask=False):
    """
    source_dir: Source image directory
    destination_dir: Output save directory
    file_format: The file extension to process, such as ".jpg" or ".png"
    is_mask: Whether it is mask data. When True, process the mask image (single channel required), otherwise process the normal image (RGB three channels required).
    """

    # Whether you are processing normal images or mask images, you need to create subdirectories according to the type
    for breed in cat_breeds:
        os.makedirs(os.path.join(destination_dir, "cat", breed), exist_ok=True)
    for breed in dog_breeds:
        os.makedirs(os.path.join(destination_dir, "dog", breed), exist_ok=True)

    # Check if the source directory exists
    if not os.path.exists(source_dir):
        print(f"❌ Error: Source directory '{source_dir}' does not exist.")
        exit(1)

    all_files = os.listdir(source_dir)
    if not all_files:
        print(f"⚠️ Warning: No files found in '{source_dir}'. Check your dataset.")
        exit(1)

    copied_files = 0
    for file in all_files:
        if not file.lower().endswith(file_format):
            continue

        # Extract the breed name (assuming the file name format is breedName_XXXX.xxx)
        breed_name = file.rsplit("_", 1)[0]
        source_path = os.path.join(source_dir, file)
        try:
            image = Image.open(source_path)

            if is_mask:
                # Mask image: Convert to grayscale mode
                image = image.convert('L')
                image_array = np.array(image)
                # If it is two-dimensional, add a channel dimension (H, W) -> (H, W, 1)
                if image_array.ndim == 2:
                    image_array = np.expand_dims(image_array, axis=-1)
            else:
                # Normal image: Convert to RGB format (automatically discard the alpha channel)
                if image.mode != 'RGB':
                    image = image.convert('RGB')
                image_array = np.array(image)
                # If it is still four channels, explicitly discard the alpha channel
                if image_array.ndim == 3 and image_array.shape[2] == 4:
                    image_array = image_array[:, :, :3]

            # Check if the shape of the image array meets the requirements (H, W, C)
            expected_channels = 1 if is_mask else 3
            if image_array.ndim != 3 or image_array.shape[2] != expected_channels:
                print(f"⚠️ Warning: {file} does not have expected shape (H, W, {expected_channels}). Skipping...")
                continue

            # Build the .npy file name and output path (stored in the corresponding cat or dog directory according to the breed
            npy_filename = file.replace(file_format, ".npy")
            if breed_name in cat_breeds:
                npy_path = os.path.join(destination_dir, "cat", breed_name, npy_filename)
            elif breed_name in dog_breeds:
                npy_path = os.path.join(destination_dir, "dog", breed_name, npy_filename)
            else:
                print(f"⚠️ Warning: Unknown breed '{breed_name}' in {file}, skipping...")
                continue

            np.save(npy_path, image_array)
            copied_files += 1
            # print(f"✅ Converted {file} → {npy_path}")

        except Exception as e:
            print(f"❌ Error processing {file}: {e}")

    if copied_files > 0:
        print(f"🎉 Successfully converted {copied_files} files to .npy format!")
    else:
        print("⚠️ No files were processed. Check for errors above.")




run the function

In [8]:
# Define source and destination directories
train_color = "Dataset/Dataset/TrainVal/color/"
train_image = "new_dataset/train/image"

train_label = "Dataset/Dataset/TrainVal/label/"
train_mask = "new_dataset/train/mask"

test_color = "Dataset/Dataset/Test/color/"
test_image = "new_dataset/test/image"

test_label = "Dataset/Dataset/Test/label/"
test_mask = "new_dataset/test/mask"


In [9]:
# Process the training images
process_file(train_color, train_image, ".jpg", is_mask=False)

🎉 Successfully converted 3680 files to .npy format!


In [10]:
# Process the training labels
process_file(train_label, train_mask, ".png", is_mask=True)

🎉 Successfully converted 3680 files to .npy format!


In [11]:
# Process the test images
process_file(test_color, test_image, ".jpg", is_mask=False)

🎉 Successfully converted 3710 files to .npy format!


In [12]:
# Process the test labels
process_file(test_label, test_mask, ".png", is_mask=True)

🎉 Successfully converted 3710 files to .npy format!
