# MNIST

In [1]:
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from core.dataloaders.mnist import MnistDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.empty_cache()
torch.cuda.synchronize()

train_data_dir = r"../../datasets/mnist/train"


# setting up transformation
transformation = transforms.Compose([
    transforms.Grayscale(num_output_channels=3), # 1 channel to 3 channel requiered for this model
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])
type(transformation)
# creating dataset
train_dataset = MnistDataset(
    data_dir=train_data_dir,
    train = True,
    transform=transformation
)

import time
from tqdm import tqdm

workers = 6
prefetch = workers * 8
batch_size = 32*4
train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=workers,
                              prefetch_factor=prefetch,
                              persistent_workers=True,
                              pin_memory=False)

start = time.time()
for epoch in range(5):
    print(epoch)
    start_epoch = time.time()
    for i, (images, labels) in tqdm(enumerate(train_dataloader)):
        images = images.to(device)
        labels = labels.to(device)
    print("epoch", time.time() - start_epoch)

print(time.time() - start)

cuda
Folder '../../datasets/mnist/train' already exists.
0


469it [00:11, 42.09it/s]


epoch 11.250001668930054
1


469it [00:07, 58.75it/s]


epoch 7.998485326766968
2


469it [00:07, 61.17it/s]


epoch 7.68399977684021
3


469it [00:07, 60.62it/s]


epoch 7.75200080871582
4


469it [00:07, 61.61it/s]

epoch 7.625999212265015
42.310486793518066





# playing cards

In [2]:
import torch
import time
from tqdm import tqdm
from torch.utils.data import DataLoader
from core.dataloaders.playing_cards import PlayingCardDataset
import torchvision.transforms.v2 as transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.empty_cache()
torch.cuda.synchronize()

train_data_dir = r"../../datasets/playing_cards/train"


size = (224, 224)
# setting up transformation
# no augmentation for validation and test
transformation = transforms.Compose([
        transforms.ToImage(),
        transforms.ToDtype(torch.float32, scale=True),
        transforms.Resize(size)
])

# augmentation for training
augmentation = transforms.Compose([
        transforms.ToImage(),
        transforms.ToDtype(torch.float32, scale=True),
        transforms.Resize(size),
        # we can use random choice to apply random transformations, identify when using raw image
        # flip/rotations transforms
        transforms.RandomChoice([
            transforms.RandomVerticalFlip(p=1),
            transforms.RandomHorizontalFlip(p=1),
            transforms.RandomRotation(degrees=45),
            torch.nn.Identity(),            
        ]),
        # affine transforms
        transforms.RandomChoice([
            transforms.RandomAffine(degrees = 0, translate = (0., 0.2)),
            torch.nn.Identity(),            
        ]),
        # color transforms
        transforms.RandomChoice([
            transforms.RandomAdjustSharpness(sharpness_factor=2),
            transforms.RandomEqualize(p=1),
            transforms.RandomPosterize(bits=2, p=1),
            transforms.RandomInvert(p=1),
            torch.nn.Identity(),            
        ]),

])
# creating dataset
train_dataset = PlayingCardDataset(
    data_dir=train_data_dir,
    transform=transformation
)

workers = 8
prefetch =  8
batch_size = 32*4

train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=workers,
                              prefetch_factor=prefetch,
                              persistent_workers=True,
                              pin_memory=False)

start = time.time()
for epoch in range(5):
    print(epoch)
    start_epoch = time.time()
    for i, (images, labels) in tqdm(enumerate(train_dataloader)):
        pass
    print("epoch", time.time() - start_epoch)
print("workers", workers)
print("prefetch", prefetch)
print(time.time() - start)

cuda
0


60it [00:02, 29.76it/s]


epoch 25.773422956466675
1


60it [00:01, 32.27it/s]


epoch 1.8652148246765137
2


60it [00:01, 32.26it/s]


epoch 1.8728489875793457
3


60it [00:01, 32.07it/s]


epoch 1.8766999244689941
4


60it [00:01, 32.21it/s]

epoch 1.8691062927246094
workers 8
prefetch 8
33.25829219818115





# playing cards hdf5 : Inconsistent results, hdf5 is slower or on par with ImageFolder

In [3]:
# import torch
# 
# from torch.utils.data import DataLoader
# from core.dataloaders.playing_cards_hdf5 import PlayingCardDataset
# import torchvision.transforms.v2 as transforms
# 
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(device)
# torch.cuda.empty_cache()
# torch.cuda.synchronize()
# 
# train_data_dir = r"../../datasets/playing_cards/train.hdf5"
# 
# 
# size = (224, 224)
# # setting up transformation
# # no augmentation for validation and test
# transformation = transforms.Compose([
#         # transforms.ToImage(),
#         transforms.ToDtype(torch.float32, scale=True),
#         transforms.Resize(size)
# ])
# 
# # augmentation for training
# augmentation = transforms.Compose([
#         # transforms.ToImage(),
#         transforms.ToDtype(torch.float32, scale=True),
#         transforms.Resize(size),
#         # we can use random choice to apply random transformations, identify when using raw image
#         # flip/rotations transforms
#         transforms.RandomChoice([
#             transforms.RandomVerticalFlip(p=1),
#             transforms.RandomHorizontalFlip(p=1),
#             transforms.RandomRotation(degrees=45),
#             torch.nn.Identity(),            
#         ]),
#         # affine transforms
#         transforms.RandomChoice([
#             transforms.RandomAffine(degrees = 0, translate = (0., 0.2)),
#             torch.nn.Identity(),            
#         ]),
#         # color transforms
#         transforms.RandomChoice([
#             transforms.RandomAdjustSharpness(sharpness_factor=2),
#             transforms.RandomEqualize(p=1),
#             transforms.RandomPosterize(bits=2, p=1),
#             transforms.RandomInvert(p=1),
#             torch.nn.Identity(),            
#         ]),
# 
# ])
# # creating dataset
# train_dataset = PlayingCardDataset(
#     in_file=train_data_dir,
#     transform=augmentation
# )
# import time
# from tqdm import tqdm
# 
# workers = 6
# prefetch = workers * 8
# batch_size = 32*4
# train_dataloader = DataLoader(train_dataset,
#                               batch_size=batch_size,
#                               shuffle=True,
#                               num_workers=workers,
#                               prefetch_factor=prefetch,
#                               persistent_workers=True,
#                               pin_memory=False)
# 
# 
# start = time.time()
# for epoch in range(5):
#     print(epoch)
#     start_epoch = time.time()
#     for i, (images, labels) in tqdm(enumerate(train_dataloader)):
#         images = images.to(device)
#         labels = labels.to(device)
#     print("epoch", time.time() - start_epoch)
# 
# print(time.time() - start)

cuda
0


0it [00:05, ?it/s]


TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torch\utils\data\_utils\worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
           ^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "D:\Dropbox\Talan\Codes\VisionTransformerFromScratch\core\dataloaders\playing_cards_hdf5.py", line 30, in __getitem__
    images = self.transform(db["images"][index, :, :, :])
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torchvision\transforms\v2\_container.py", line 53, in forward
    outputs = transform(*inputs)
              ^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torchvision\transforms\v2\_container.py", line 155, in forward
    return transform(*inputs)
           ^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torchvision\transforms\v2\_transform.py", line 46, in forward
    params = self._get_params(
             ^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torchvision\transforms\v2\_geometry.py", line 737, in _get_params
    height, width = query_size(flat_inputs)
                    ^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\agarc\anaconda3\envs\ViT_py311\Lib\site-packages\torchvision\transforms\v2\_utils.py", line 194, in query_size
    raise TypeError("No image, video, mask or bounding box was found in the sample")
TypeError: No image, video, mask or bounding box was found in the sample
