In [16]:
import os
import pathlib
from datasets import deep_fashion

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

from torch.utils.data import DataLoader, Subset
from torchvision.transforms import Resize, Lambda, Compose

import torch
import torchvision

import utils.mem
import utils.list

from tqdm.notebook import tqdm

In [17]:
ctsrbm_image_transform = torchvision.models.EfficientNet_B3_Weights.DEFAULT.transforms()
ctsrbm_image_transform.antialias = True

ctsrbm_dataset_dir = os.path.join(pathlib.Path.home(), "data", "DeepFashion", "Consumer-to-shop Clothes Retrieval Benchmark")
ctsrbm_dataset = deep_fashion.ConsToShopClothRetrBM(ctsrbm_dataset_dir, img_transform=ctsrbm_image_transform, neg_aux_filename_id="dummy")

ctsrbm_train_dataset = Subset(ctsrbm_dataset, utils.list.cutdown_list(ctsrbm_dataset.get_split_mask_idxs("train"), 0.5))
ctsrbm_test_dataset = Subset(ctsrbm_dataset, ctsrbm_dataset.get_split_mask_idxs("test"))
ctsrbm_val_dataset = Subset(ctsrbm_dataset, ctsrbm_dataset.get_split_mask_idxs("val"))

ctsrbm_train_loader = DataLoader(ctsrbm_train_dataset, batch_size=32, num_workers=6)
ctsrbm_test_loader = DataLoader(ctsrbm_test_dataset, batch_size=32, num_workers=6)
ctsrbm_val_loader = DataLoader(ctsrbm_val_dataset, batch_size=32, num_workers=6)

In [18]:
ctsrbm_dataset._is_pickable()

-- PICKLE STATUS START --
self._dataset_dir
   True
self._img_transform
   True
self._train_mask_idxs
   True
self._test_mask_idxs
   True
self._val_mask_idxs
   True
self._attribute_name_list
   True
self._supattribute_idx_list
   True
self._supattribute_name_list
   True
self._list_eval_partition_aux_idxs_cursor_list
   True
self._list_eval_partition_neg_aux_idxs_cursor_list
   True
self._image_filename_bbox_cursor_list
   True
self._item_attribute_cursor_list
   True
self._list_eval_partition_aux_idxs_file
   TypeError: cannot pickle '_io.TextIOWrapper' object
self._list_eval_partition_neg_aux_idxs_file
   TypeError: cannot pickle '_io.TextIOWrapper' object
self._image_bbox_file
   TypeError: cannot pickle '_io.TextIOWrapper' object
self._item_attribute_file
   TypeError: cannot pickle '_io.TextIOWrapper' object
self._list_eval_partition_aux_idxs_file_lock
   RuntimeError: Lock objects should only be shared between processes through inheritance
self._list_eval_partition_neg_aux_idxs

In [19]:
print("Memory overhead:  {:s}".format(utils.mem.sprint_fancy_num_bytes(ctsrbm_dataset._num_bytes())))
print("Data points:     {:8d}".format(len(ctsrbm_dataset)))
print("  Train:         {:8d}".format(len(ctsrbm_dataset.get_split_mask_idxs("train"))))
print("    Val:         {:8d}".format(len(ctsrbm_dataset.get_split_mask_idxs("val"))))
print("   Test:         {:8d}".format(len(ctsrbm_dataset.get_split_mask_idxs("test"))))

Memory overhead:   30.039 MiB
Data points:       239557
  Train:            98832
    Val:            48935
   Test:            47773


In [20]:
for el in ctsrbm_dataset[0]:
    if type(el) == torch.Tensor: print(el.shape, el.dtype)
    else: print(type(el))

torch.Size([3, 300, 300]) torch.float32
torch.Size([3, 300, 300]) torch.float32
torch.Size([3, 300, 300]) torch.float32
torch.Size([303]) torch.float32


In [21]:
for batch in ctsrbm_train_loader:

    print("type(batch)", type(batch))
    for idx, el in enumerate(batch):
        print("type(batch[{:d}])".format(idx), type(batch[idx]))
        print("batch[{:d}].size()".format(idx), batch[idx].size())

    break

type(batch) <class 'list'>
type(batch[0]) <class 'torch.Tensor'>
batch[0].size() torch.Size([32, 3, 300, 300])
type(batch[1]) <class 'torch.Tensor'>
batch[1].size() torch.Size([32, 3, 300, 300])
type(batch[2]) <class 'torch.Tensor'>
batch[2].size() torch.Size([32, 3, 300, 300])
type(batch[3]) <class 'torch.Tensor'>
batch[3].size() torch.Size([32, 303])


In [22]:
for batch in tqdm(ctsrbm_train_loader):
    pass

  0%|          | 0/1545 [00:00<?, ?it/s]