# Traditional Datasets

## MNIST
Output.shape = [1, 28, 28]

Targetnum = 10

In [None]:
import torch
from torchvision import datasets, transforms

Dataset_path = '/home/mrc/Datasets/MNIST/'
Batch_size = 128
Workers = 4
Targetnum = 10


transform_train = transforms.Compose([
    transforms.Pad(2, padding_mode='constant'),
    transforms.RandomCrop(28),
    transforms.ToTensor(),
    transforms.Normalize((0), (1))
])
 
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0), (1))
])

Train_data = datasets.MNIST(root=Dataset_path, train=True, download=True, transform=transform_train)
Test_data = datasets.MNIST(root=Dataset_path, train=False, download=True, transform=transform_test)

train_data_loader = torch.utils.data.DataLoader(
    dataset=Train_data,
    batch_size=Batch_size,
    shuffle=True,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=True
)

test_data_loader = torch.utils.data.DataLoader(
    dataset=Test_data,
    batch_size=Batch_size,
    shuffle=False,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=False
)

## Fashion-MNIST
Output.shape = [1, 28, 28]

Targetnum = 10

In [None]:
import torch
from torchvision import datasets, transforms

Dataset_path = '/home/mrc/Datasets/FashionMNIST/'
Batch_size = 128
Workers = 4
Targetnum = 10


transform_train = transforms.Compose([
    transforms.Pad(2, padding_mode='constant'),
    transforms.RandomCrop(28),
    transforms.ToTensor(),
    transforms.Normalize((0), (1))
])
 
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0), (1))
])

Train_data = datasets.FashionMNIST(root=Dataset_path, train=True, download=True, transform=transform_train)
Test_data = datasets.FashionMNIST(root=Dataset_path, train=False, download=True, transform=transform_test)

train_data_loader = torch.utils.data.DataLoader(
    dataset=Train_data,
    batch_size=Batch_size,
    shuffle=True,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=True
)

test_data_loader = torch.utils.data.DataLoader(
    dataset=Test_data,
    batch_size=Batch_size,
    shuffle=False,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=False
)

## CIFAR-10
Output.shape = [3, 32, 32]

Targetnum = 10

In [None]:
import torch
from torchvision import datasets, transforms

Dataset_path = '/home/mrc/Datasets/CIFAR10/'
Batch_size = 128
Workers = 4
Targetnum = 10


transform_train = transforms.Compose([
    transforms.Pad(4, padding_mode='reflect'),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
 
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

Train_data = datasets.CIFAR10(root=Dataset_path, train=True, download=True, transform=transform_train)
Test_data = datasets.CIFAR10(root=Dataset_path, train=False, download=True, transform=transform_test)

train_data_loader = torch.utils.data.DataLoader(
    dataset=Train_data,
    batch_size=Batch_size,
    shuffle=True,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=True
)

test_data_loader = torch.utils.data.DataLoader(
    dataset=Test_data,
    batch_size=Batch_size,
    shuffle=False,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=False
)

## CIFAR-100
Output.shape = [3, 32, 32]

Targetnum = 100

In [None]:
import torch
from torchvision import datasets, transforms

Dataset_path = '/home/mrc/Datasets/CIFAR100/'
Batch_size = 128
Workers = 4
Targetnum = 100


transform_train = transforms.Compose([
    transforms.Pad(4, padding_mode='reflect'),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
 
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

Train_data = datasets.CIFAR100(root=Dataset_path, train=True, download=True, transform=transform_train)
Test_data = datasets.CIFAR100(root=Dataset_path, train=False, download=True, transform=transform_test)

train_data_loader = torch.utils.data.DataLoader(
    dataset=Train_data,
    batch_size=Batch_size,
    shuffle=True,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=True
)

test_data_loader = torch.utils.data.DataLoader(
    dataset=Test_data,
    batch_size=Batch_size,
    shuffle=False,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=False
)

## Tiny_ImageNet_64
Output.shape = [3, 64, 64]

Targetnum = 200

In [None]:
import torch
from torchvision import datasets, transforms

Dataset_path = '/home/mrc/Datasets/Tiny_ImageNet_64/'
Batch_size = 128
Workers = 12
Targetnum = 200


from prefetch_generator import BackgroundGenerator

class DataLoaderX(torch.utils.data.DataLoader):
    def __iter__(self):
        return BackgroundGenerator(super().__iter__())

transform_train = transforms.Compose([
    transforms.Pad(8, padding_mode='reflect'),
    transforms.RandomCrop(64),
    # Lighting(0.1),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])

transform_test = transforms.Compose([                      
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])

Train_data = datasets.ImageFolder(root=Dataset_path+'train', transform=transform_train)
Test_data = datasets.ImageFolder(root=Dataset_path+'val', transform=transform_test)

train_data_loader = torch.utils.data.DataLoader(
    dataset=Train_data,
    batch_size=Batch_size,
    shuffle=True,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=True
)

test_data_loader = torch.utils.data.DataLoader(
    dataset=Test_data,
    batch_size=Batch_size,
    shuffle=False,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=False
)

## Tiny_ImageNet_224
Output.shape = [3, 224, 224]

Targetnum = 200

In [None]:
import torch
from torchvision import datasets, transforms

Dataset_path = '/home/mrc/Datasets/Tiny_ImageNet_224/'
Batch_size = 128
Workers = 12
Targetnum = 200


from prefetch_generator import BackgroundGenerator

class DataLoaderX(torch.utils.data.DataLoader):
    def __iter__(self):
        return BackgroundGenerator(super().__iter__())

transform_train = transforms.Compose([
    transforms.Pad(28, padding_mode='reflect'),
    transforms.RandomCrop(224),
    # Lighting(0.1),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])

transform_test = transforms.Compose([                      
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])

Train_data = datasets.ImageFolder(root=Dataset_path+'train', transform=transform_train)
Test_data = datasets.ImageFolder(root=Dataset_path+'val', transform=transform_test)

train_data_loader = torch.utils.data.DataLoader(
    dataset=Train_data,
    batch_size=Batch_size,
    shuffle=True,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=True
)

test_data_loader = torch.utils.data.DataLoader(
    dataset=Test_data,
    batch_size=Batch_size,
    shuffle=False,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=False
)

## ImageNet
Output.shape = [3, 224, 224]

Targetnum = 1000

In [None]:
import torch
from torchvision import datasets, transforms

Dataset_path = '/home/mrc/Datasets/ImageNet/'
Batch_size = 512
Workers = 12
Targetnum = 1000


from prefetch_generator import BackgroundGenerator

class DataLoaderX(torch.utils.data.DataLoader):
    def __iter__(self):
        return BackgroundGenerator(super().__iter__())

transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.08, 1.0)),
    # Lighting(0.1),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])

transform_test = transforms.Compose([                      
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])

Train_data = datasets.ImageFolder(root=Dataset_path+'train', transform=transform_train)
Test_data = datasets.ImageFolder(root=Dataset_path+'val', transform=transform_test)

train_data_loader = DataLoaderX(
    dataset=Train_data,
    batch_size=Batch_size,
    shuffle=True,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=True
)

test_data_loader = DataLoaderX(
    dataset=Test_data,
    batch_size=Batch_size,
    shuffle=False,
    num_workers=Workers, 
    pin_memory=True,
    drop_last=False
)

## Show Image

In [None]:
import numpy as np
import matplotlib.pyplot as plt

data = Test_data
random_integer = np.random.randint(0, len(data))

# imgtype = 4
# while data[random_integer][1] != imgtype:
#     random_integer = np.random.randint(0, len(data))
    
img = data[random_integer][0].permute(1, 2, 0).numpy()
# plt.imsave('test.png', img)
plt.imshow(img)
plt.plot()
print(f'len(data) = {len(data)}')
print(f'Display number = {random_integer}')
print(f'img.shape = {img.shape}, img.type = {data[random_integer][1]}')
print(f'img.max() = {img.max()}, img.min() = {img.min()}\n')

for i in range(img.shape[2]):
    print(f'img[{i}].mean() = {img[:,:,i].mean()}, img[{i}].var() = {img[:,:,i].var()}')

## Speed Test

In [None]:
import time
from tqdm import tqdm

data_loader = train_data_loader

start_time = time.time()
for i, (img, label) in enumerate(tqdm(data_loader)):
    continue
print(f'img.shape = {img.shape}')
print(f'Time used: {time.time() - start_time:.5f} s')

In [None]:
img.shape