In [42]:
import PIL
import os
import imageio
import numpy as np
import time
import matplotlib.pyplot as plt

In [43]:
img_size = 256
pixel_depth = 24
num_channels = 3
src_dir = 'Pictures'
src_labels = "label.npy"
dst_dir = "Dataset"
#dataset_shape = (img_per_part, img_size, img_size, num_channels)

In [44]:
def main(src_dir, dst_dir, src_labels, force=False):
    """
    Function used for splitting tons of photos into smaller compressed batches,
    also randomizes data order, and keeps track of proper labels for given data
    """
    start_time = time.time()
    if not os.path.exists(dst_dir): #name of saving directory
        os.mkdir(dst_dir)
    make_package(src_dir, dst_dir, src_labels)

    print('Done, it workied for: {0} seconds'.format(time.time() - start_time))

In [45]:
def make_subdataset(src_folder, src_labels, n, new_file_name):
    """
    Function loads dataset into memory as one file,
    splits label.npy into proper sub parts and saves labels
    """
    #Get files
    index = 0
    image_files = os.listdir(src_folder)
    #initialize array for all images
    dataset = np.ndarray(shape = 
                         (len(image_files),img_size,img_size,num_channels),dtype = np.uint8)
    labels = np.ndarray(shape = (len(image_files),6))
    src_labels_open = np.load(src_labels)
    for image in image_files:
        image_file = os.path.join(src_folder, image)
        name = os.path.splitext(image)
        #get unique image_id
        num_extracted = int(name[0])
    #try:
        loaded_image = PIL.Image.open(image_file)
        image_data = np.array(loaded_image)
        if image_data.shape != (img_size,img_size,num_channels):
            raise Exception('Wrong image shape {}'.format(image_file))
            
        dataset[index,:,:,:] = image_data
        labels[index,:] = src_labels_open[num_extracted-1]
        index+=1
    name = "label"+str(n)
    dataset = dataset[0:index, :, : ,:]
    new_label_name = os.path.join(dst_dir, name+".npy")
    np.save(new_label_name, labels)
    np.save(new_file_name, dataset)
    


In [46]:
def make_package(src_dir, dst_dir, src_labels, force= False):
    """
    Function for making data package, save into .npy file
    """
    print("Found {}parts to pickle".format(len(os.listdir(src_dir))))
    nth=0
    for folder in os.listdir(src_dir):
        nth+=1
        file_name = "data"+str(nth)+'.npy'
        if os.path.exists(file_name) and not force:
            # You may override by setting force=True.
            print('%s already present - Skipping pickling.' % file_name)
        else:
            print('Compressing %s.' % file_name)
            src_folder = os.path.join(src_dir, folder)
            dst_file_name = os.path.join(dst_dir, file_name)
            make_subdataset(src_folder, src_labels, nth, dst_file_name)
            

In [47]:
if __name__ == '__main__' :
    main(src_dir, dst_dir, src_labels, force = True)

Found 1parts to pickle
Compressing data1.npy.
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
Done, it workied for: 0.07100415229797363 seconds


In [48]:
nth = 1
src_dir = "./Dataset"
dataset_name = "data"+str(nth)+".npy"
label_name = "label"+str(nth)+".npy"
data_file = os.path.join(src_dir,dataset_name)
label_name = os.path.join(src_dir,label_name)
dataset = np.load(data_file)
img_data = np.ndarray((img_size,img_size,num_channels),dtype = np.uint8)
img_data = dataset[0]
print(dataset.shape)
print(img_data.shape)
img = PIL.Image.fromarray(img_data, mode='RGB')
img.show()

(20, 256, 256, 3)
(256, 256, 3)
