---
# First way to get the data: using urllib

In [None]:
"""
Download the dataset from the Fashion-MNIST repo
- filenames: list of 4 filenames to download
- folder: path to the folder to save the files
- base_url: base URL for the Data files
"""

from urllib import request

filenames = ["train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz", "t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz"]
folder = "../data_fashion_mnist/"
base_url = "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/"
for name in filenames:
    print("Downloading" + name + "...")

    request.urlretrieve(base_url + name, folder + name)

print("Download complete.")

Downloadingtrain-images-idx3-ubyte.gz...
Downloadingtrain-labels-idx1-ubyte.gz...
Downloadingt10k-images-idx3-ubyte.gz...
Downloadingt10k-labels-idx1-ubyte.gz...
Download complete.


In [6]:
import gzip
import numpy as np

with gzip.open('../data_fashion_mnist/train-images-idx3-ubyte.gz', 'rb') as f:
    X_train = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28)

with gzip.open('../data_fashion_mnist/t10k-images-idx3-ubyte.gz', 'rb') as f:
    X_test = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28)

with gzip.open('../data_fashion_mnist/train-labels-idx1-ubyte.gz', 'rb') as f:
    y_train = np.frombuffer(f.read(), np.uint8, offset=8)
with gzip.open('../data_fashion_mnist/t10k-labels-idx1-ubyte.gz', 'rb') as f:
    y_test = np.frombuffer(f.read(), np.uint8, offset=8)

---
# Second way to get the data: using torchvision

In [20]:
"""

"""

from torchvision.datasets import FashionMNIST
from torchvision import transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([transforms.ToTensor()]) # transform the images to tensors, normalize in range [0, 1]
trainset = FashionMNIST(root="../data_fashion_mnist/", train=True, download=True, transform=transform)
trainloader = DataLoader(
    trainset, batch_size=1024, num_workers=4, shuffle=True)

for i, (inputs, labels) in enumerate(trainloader, 0):
    print(f'Batch {i} -- {inputs.shape} -- {labels.shape}')


Batch 0 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 1 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 2 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 3 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 4 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 5 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 6 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 7 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 8 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 9 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 10 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 11 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 12 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 13 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 14 -- torch.Size([1024, 1, 28, 28]) -- torch.Size([1024])
Batch 15 -- torch.Size([1024, 1, 28, 28]) -- torch