In [10]:
import sys

import os
import pathlib
from datasets import deep_fashion_ctsrbm

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

from torch.utils.data import DataLoader, Subset
from torchvision.transforms import Resize, Lambda, Compose

import torch
import torchvision

import utils.mem
import utils.list

from tqdm.notebook import tqdm

In [2]:
ctsrbm_image_transform = torchvision.models.EfficientNet_B3_Weights.DEFAULT.transforms()
ctsrbm_image_transform.antialias = True

ctsrbm_dataset_dir = os.path.join(pathlib.Path.home(), "data", "DeepFashion", "Consumer-to-shop Clothes Retrieval Benchmark")
ctsrbm_dataset = deep_fashion_ctsrbm.ConsToShopClothRetrBmkImageLoader(ctsrbm_dataset_dir, img_transform=ctsrbm_image_transform)

In [3]:
print(ctsrbm_dataset._num_imgs)

239557


In [4]:
ctsrbm_train_dataset = Subset(ctsrbm_dataset, ctsrbm_dataset.get_subset_indices(split="train"))
ctsrbm_test_dataset = Subset(ctsrbm_dataset, ctsrbm_dataset.get_subset_indices(split="test"))
ctsrbm_val_dataset = Subset(ctsrbm_dataset, ctsrbm_dataset.get_subset_indices(split="val"))

In [5]:
ctsrbm_dataset._is_pickable()

-- PICKLE STATUS START --
self._dataset_dirname
   True
self._img_transform
   True
self._split_num_mask
   True
self._domain_num_mask
   True
self._cloth_type_list
   True
self._cloth_subtype_llist
   True
self._cloth_type_inv_dict
   True
self._cloth_subtype_inv_dict_list
   True
self._num_imgs
   True
self._img_filename_codes_arr
   True
self._img_bbox_codes_arr
   True
-- PICKLE STATUS END --


In [6]:
print("Memory overhead:  {:s}".format(utils.mem.sprint_fancy_num_bytes(ctsrbm_dataset._num_bytes())))
print("Data points:     {:8d}".format(len(ctsrbm_dataset)))
print("  Train:         {:8d}".format(len(ctsrbm_train_dataset)))
print("    Val:         {:8d}".format(len(ctsrbm_test_dataset)))
print("   Test:         {:8d}".format(len(ctsrbm_val_dataset)))

Memory overhead:    5.718 MiB
Data points:       239557
  Train:           120927
    Val:            58746
   Test:            59884


In [7]:
for el in ctsrbm_dataset[0]:
    if type(el) == torch.Tensor: print(el.shape, el.dtype)
    else: print(type(el))

torch.Size([3, 300, 300]) torch.float32
<class 'int'>


In [16]:
ctsrbm_train_loader = DataLoader(ctsrbm_train_dataset, batch_size=32, num_workers=6)
ctsrbm_test_loader = DataLoader(ctsrbm_test_dataset, batch_size=32, num_workers=6)
ctsrbm_val_loader = DataLoader(ctsrbm_val_dataset, batch_size=32, num_workers=6)

In [17]:
for batch in ctsrbm_train_loader:

    print("type(batch)", type(batch))
    for idx, el in enumerate(batch):
        print("type(batch[{:d}])".format(idx), type(batch[idx]))
        print("batch[{:d}].size()".format(idx), batch[idx].size())

    break

type(batch) <class 'list'>
type(batch[0]) <class 'torch.Tensor'>
batch[0].size() torch.Size([32, 3, 300, 300])
type(batch[1]) <class 'torch.Tensor'>
batch[1].size() torch.Size([32])


In [18]:
for batch in tqdm(ctsrbm_train_loader):
    pass

  0%|          | 0/3779 [00:00<?, ?it/s]

KeyboardInterrupt: 