## Custom data preparation

### Packages import

In [1]:
# necessary packages
from torchvision import datasets
from torchvision.transforms import ToTensor

# custom packages
from util.data_generate import *
from util.data_io import *

# ignore for possible warnings
import warnings
warnings.filterwarnings('ignore')

### Original data from Fashion-MNIST

In [2]:
# get training dataset
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

# get testing dataset
testing_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

# class names
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

### custom information setup

In [3]:
# 5 closer classes on UMAP
target_02346 = [
    "T-shirt/top",
    "Pullover",
    "Dress",
    "Coat",
    "Shirt",
]

# 3 closer classes on UMAP but far from upper 5 classes
target_579 = [
    "Sandal",
    "Sneaker",
    "Ankle boot",
]

# 2 separate classes on UMAP
target_18 = [
    "Trouser",
    "Bag",
]

# setup number of samples for each class
train_number = 40
test_number = 10

In [4]:
# generate training data of target_02346
train_02346 = pick(training_data, target_02346, train_number)
train_02346, train_number_list_02346, train_stride_list_02346 = album(train_02346, is_rand_stride = True, is_rand_pos = True)
training_image_02346 = dif_frame(train_02346, train_number_list_02346)
training_gt_02346 = vector(train_number_list_02346, train_stride_list_02346)

# generate testing data of target_02346
test_02346 = pick(testing_data, target_02346, test_number)
test_02346, test_number_list_02346, test_stride_list_02346 = album(test_02346, is_rand_stride = True, is_rand_pos = True)
testing_image_02346 = dif_frame(test_02346, test_number_list_02346)
testing_gt_02346 = vector(test_number_list_02346, test_stride_list_02346)

# generate testing data of target_579
test_579 = pick(testing_data, target_579, test_number)
test_579, test_number_list_579, test_stride_list_579 = album(test_579, is_rand_stride = True, is_rand_pos = True)
testing_image_579 = dif_frame(test_579, test_number_list_579)
testing_gt_579 = vector(test_number_list_579, test_stride_list_579)

# generate testing data of target_18
test_18 = pick(testing_data, target_18, test_number)
test_18, test_number_list_18, test_stride_list_18 = album(test_18, is_rand_stride = True, is_rand_pos = True)
testing_image_18 = dif_frame(test_18, test_number_list_18)
testing_gt_18 = vector(test_number_list_18, test_stride_list_18)

print(training_image_02346.shape, training_gt_02346.shape)
print(testing_image_02346.shape, testing_gt_02346.shape)
print(testing_image_579.shape, testing_gt_579.shape)
print(testing_image_18.shape, testing_gt_18.shape)

torch.Size([4224, 84, 112]) torch.Size([4224, 1])
torch.Size([1026, 84, 112]) torch.Size([1026, 1])
torch.Size([621, 84, 112]) torch.Size([621, 1])
torch.Size([454, 84, 112]) torch.Size([454, 1])


In [5]:
# define file names
CSV_NAME_TRAIN_02346 = 'train_02346.csv'
IMAGE_NAME_TRAIN_02346 = 'train_02346'
CSV_NAME_TEST_02346 = 'test_02346.csv'
IMAGE_NAME_TEST_02346 = 'test_02346'
CSV_NAME_TEST_579 = 'test_579.csv'
IMAGE_NAME_TEST_579 = 'test_579'
CSV_NAME_TEST_18 = 'test_18.csv'
IMAGE_NAME_TEST_18 = 'test_18'

# export data
image_export(training_image_02346, IMAGE_NAME_TRAIN_02346)
gt_export(training_gt_02346, CSV_NAME_TRAIN_02346)
image_export(testing_image_02346, IMAGE_NAME_TEST_02346)
gt_export(testing_gt_02346, CSV_NAME_TEST_02346)
image_export(testing_image_579, IMAGE_NAME_TEST_579)
gt_export(testing_gt_579, CSV_NAME_TEST_579)
image_export(testing_image_18, IMAGE_NAME_TEST_18)
gt_export(testing_gt_18, CSV_NAME_TEST_18)

In [6]:
# export raw album data
image_export(train_02346, 'train_02346_raw')
image_export(test_02346, 'test_02346_raw')