## Organizing the Fashion MNIST Image Set
***

#### Importing Auxiliary Packages
***

In [1]:
from torchvision.datasets import FashionMNIST
from PIL import Image
import idx2numpy
import os

#### Downloading the Dataset
***

In [3]:
FashionMNIST('.', download = True)

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: .
    Split: Train

#### Organizing Images in Directories
***

In [4]:
train_images_files = 'FashionMNIST/raw/train-images-idx3-ubyte'
train_labels_files = 'FashionMNIST/raw/train-labels-idx1-ubyte'
test_images_files = 'FashionMNIST/raw/t10k-images-idx3-ubyte'
test_labels_files = 'FashionMNIST/raw/t10k-labels-idx1-ubyte'

train_images_array = idx2numpy.convert_from_file(train_images_files)
train_labels_array = idx2numpy.convert_from_file(train_labels_files)
test_images_array = idx2numpy.convert_from_file(test_images_files)
test_labels_array = idx2numpy.convert_from_file(test_labels_files)

print('== Data Dimension ==\n')
print(f'Train Images Shape: {train_images_array.shape}')
print(f'Train Labels Shape: {train_labels_array.shape}')
print(f'Test Images Shape: {test_images_array.shape}')
print(f'Test Images Shape: {test_labels_array.shape}')

== Data Dimension ==

Train Images Shape: (60000, 28, 28)
Train Labels Shape: (60000,)
Test Images Shape: (10000, 28, 28)
Test Images Shape: (10000,)


In [5]:
def analyze_arrays(labels_array, extract):
    '''
    analyzes the amount of images contained in the array.

    Input:
        labels_array (array) --> array with all image labels.
        extract (string) --> defines the analyzed image extract type label.
    '''
    
    count, sum = 0, 0
    for num in range(0, 10):
        for label in labels_array:
            if num == label: count += 1

        print(f'{extract} images class {num}: {count} images')
        sum += count
        count = 0

    print(f'\n== {extract} total images: {sum} images ==')

In [6]:
analyze_arrays(train_labels_array, 'Train')

Train images class 0: 6000 images
Train images class 1: 6000 images
Train images class 2: 6000 images
Train images class 3: 6000 images
Train images class 4: 6000 images
Train images class 5: 6000 images
Train images class 6: 6000 images
Train images class 7: 6000 images
Train images class 8: 6000 images
Train images class 9: 6000 images

== Train total images: 60000 images ==


In [7]:
analyze_arrays(test_labels_array, 'Test')

Test images class 0: 1000 images
Test images class 1: 1000 images
Test images class 2: 1000 images
Test images class 3: 1000 images
Test images class 4: 1000 images
Test images class 5: 1000 images
Test images class 6: 1000 images
Test images class 7: 1000 images
Test images class 8: 1000 images
Test images class 9: 1000 images

== Test total images: 10000 images ==


In [8]:
def array_to_png(image_array, image_dir):
    '''
    saves an image array in png format.

    Input:
        image_array (array) --> image array
        image_dir (str) --> string with directory and image name.png
    Returns:
    '''
    
    image = Image.fromarray(image_array)
    image.save(image_dir)

In [2]:
def make_directories(main_directory, sub_directories):
    '''
    creates directories to store the dataset images.

    Input:
        main_directory (string) --> main folder name.
        sub_directories (string) --> name of subdirectories.
    '''

    os.makedirs(main_directory, exist_ok = True)

    for sub in sub_directories:
        sub_directory = os.path.join(main_directory, sub)
        os.makedirs(sub_directory, exist_ok = True)

In [10]:
def array_to_png_all_images(directory, sub_dir, labels_array, images_array):
    '''
    assemble a directory of images in png format from array lists.

    Input:
        directory (string) --> main directory.
        sub_dir (string) --> sub directories indexed in order with classes.
        labels_array (array) --> array list containing all labels.
        images_array (array) --> array list containing all images. 
    '''

    for num in range(0, 10):
        for index, label in enumerate(labels_array):
            if num == label: 
                array_to_png(images_array[index], 
                             directory + sub_dir[num] + f'/{index}.png')

In [11]:
sub_directories = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 
                   'bag', 'ankle-boot']
make_directories('FashionMNIST/original-data/train', sub_directories)
make_directories('FashionMNIST/original-data/test', sub_directories)

array_to_png_all_images('FashionMNIST/original-data/train/', sub_directories, train_labels_array, train_images_array)
array_to_png_all_images('FashionMNIST/original-data/test/', sub_directories, test_labels_array, test_images_array)

In [2]:
def analyze_directory(directory, extract):
    '''
    analyzes the amount of images present in subdirectories.

    Input:
        directory (string) --> main directory to be parsed.
        extract (string) --> data extract type.
    '''

    count, sum = 0, 0
    for label in os.listdir(directory):
        count = len(os.listdir(directory + label))
        print(f'{extract} images in "{label}" directory: {count} images')
        sum += count 
        count = 0

    print(f'\n== {extract} total images: {sum} images ==')

In [3]:
analyze_directory('FashionMNIST/original-data/train/', 'Train')

Train images in "ankle-boot" directory: 6000 images
Train images in "bag" directory: 6000 images
Train images in "coat" directory: 6000 images
Train images in "dress" directory: 6000 images
Train images in "pullover" directory: 6000 images
Train images in "sandal" directory: 6000 images
Train images in "shirt" directory: 6000 images
Train images in "sneaker" directory: 6000 images
Train images in "t-shirt" directory: 6000 images
Train images in "trouser" directory: 6000 images

== Train total images: 60000 images ==


In [4]:
analyze_directory('FashionMNIST/original-data/test/', 'Test')

Test images in "ankle-boot" directory: 1000 images
Test images in "bag" directory: 1000 images
Test images in "coat" directory: 1000 images
Test images in "dress" directory: 1000 images
Test images in "pullover" directory: 1000 images
Test images in "sandal" directory: 1000 images
Test images in "shirt" directory: 1000 images
Test images in "sneaker" directory: 1000 images
Test images in "t-shirt" directory: 1000 images
Test images in "trouser" directory: 1000 images

== Test total images: 10000 images ==


#### Organizing the Data to Store the Artificial Images
***

In [3]:
sub_directories = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 
                   'bag', 'ankle-boot']
make_directories('FashionMNIST/artificial-augmentation/train', sub_directories)
make_directories('FashionMNIST/artificial-augmentation/test', sub_directories)