In [12]:
import idx2numpy
from PIL import Image
import os

In [13]:
# -----------------------------
# Paths to .ubyte files
# -----------------------------
base_path = "./fashionmnist/"  # folder where your .ubyte files are located

train_images_path = os.path.join(base_path, "train-images-idx3-ubyte")
train_labels_path = os.path.join(base_path, "train-labels-idx1-ubyte")
test_images_path  = os.path.join(base_path, "t10k-images-idx3-ubyte")
test_labels_path  = os.path.join(base_path, "t10k-labels-idx1-ubyte")

In [14]:
# -----------------------------
# Load the ubyte files
# -----------------------------
print("Loading .ubyte files...")
X_train = idx2numpy.convert_from_file(train_images_path)
y_train = idx2numpy.convert_from_file(train_labels_path)
X_test = idx2numpy.convert_from_file(test_images_path)
y_test = idx2numpy.convert_from_file(test_labels_path)

Loading .ubyte files...


In [15]:
print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

Train shape: (60000, 28, 28)
Test shape: (10000, 28, 28)


In [16]:
# -----------------------------
# Label dictionary (Fashion MNIST)
# -----------------------------
labels = {
    0: "T-shirt_top",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle_boot"
}


In [17]:
# -----------------------------
# Output Folder Structure
# -----------------------------
# Example: ./converted_data/test/Ankle_boot/  -> all test images of that class
parent_output_dir = "./converted_test_data"  # parent folder
train_output_dir = os.path.join(parent_output_dir, "train")
test_output_dir  = os.path.join(parent_output_dir, "test")

In [18]:
# Create folders for each class
for split_dir in [train_output_dir, test_output_dir]:
    for label_name in labels.values():
        os.makedirs(os.path.join(split_dir, label_name), exist_ok=True)

In [19]:
# -----------------------------
# Function to Save Images
# -----------------------------
def save_images(X, y, output_dir, split_name, limit=None):
    total = len(X) if limit is None else min(limit, len(X))
    print(f"Saving {total} images to '{split_name}' folder...")

    for i in range(total):
        label_index = y[i]
        label_name = labels[label_index]
        img = Image.fromarray(X[i])
        img_path = os.path.join(output_dir, label_name, f"{split_name}_{i}_label_{label_name}.png")
        img.save(img_path)

    print(f"✅ {split_name.capitalize()} images saved successfully at: {output_dir}")

In [20]:
# -----------------------------
# Save train & test images
# -----------------------------
save_images(X_train, y_train, train_output_dir, "train", limit=None)
save_images(X_test, y_test, test_output_dir, "test", limit=None)

Saving 60000 images to 'train' folder...
✅ Train images saved successfully at: ./converted_test_data\train
Saving 10000 images to 'test' folder...
✅ Test images saved successfully at: ./converted_test_data\test
