In [10]:
import PIL
import os
import imageio
import numpy as np
import time
import matplotlib.pyplot as plt

In [11]:
img_size = 256
pixel_depth = 24
num_channels = 3
src_dir = 'Pictures'
src_labels = "label.npy"
dst_dir = "Dataset"
#dataset_shape = (img_per_part, img_size, img_size, num_channels)

In [12]:
def main(src_dir, dst_dir, src_labels, force=False):
    """
    Function used for splitting tons of photos into smaller compressed batches,
    also randomizes data order, and keeps track of proper labels for given data
    """
    start_time = time.time()
    if not os.path.exists(dst_dir): #name of saving directory
        os.mkdir(dst_dir)
    make_package(src_dir, dst_dir, src_labels)

    print('Done, it workied for: {0} seconds'.format(time.time() - start_time))

In [16]:
def make_subdataset(src_folder, src_labels, n, new_file_name):
    """
    Function loads dataset into memory as one file,
    splits label.npy into proper sub parts and saves labels
    """
    #Get files
    index = 0
    image_files = os.listdir(src_folder)
    #initialize array for all images
    dataset = np.ndarray(shape = 
                         (len(image_files),img_size,img_size,num_channels),dtype = np.uint8)
    labels = np.ndarray(shape = (len(image_files),6))
    src_labels_open = np.load(src_labels)
    for image in image_files:
        image_file = os.path.join(src_folder, image)
        name = os.path.splitext(image)
        #get unique image_id
        num_extracted = int(name[0])
    #try:
        loaded_image = PIL.Image.open(image_file)
        image_data = np.array(loaded_image)
        if image_data.shape != (img_size,img_size,num_channels):
            raise Exception('Wrong image shape {}'.format(image_file))
            
        dataset[index,:,:,:] = image_data
        labels[index,:] = src_labels_open[num_extracted-1]
        index+=1
    name = "label"+str(n)
    dataset = dataset[0:index, :, : ,:]
    new_label_name = os.path.join(dst_dir, name+".npy")
    np.save(new_label_name, labels)
    np.save(new_file_name, dataset)
    


In [17]:
def make_package(src_dir, dst_dir, src_labels, force= False):
    """
    Function for making data package, save into .npy file
    """
    print("Found {}parts to pickle".format(len(os.listdir(src_dir))))
    nth=0
    for folder in os.listdir(src_dir):
        nth+=1
        file_name = "data"+str(nth)+'.npy'
        if os.path.exists(file_name) and not force:
            # You may override by setting force=True.
            print('%s already present - Skipping pickling.' % file_name)
        else:
            print('Compressing %s.' % file_name)
            src_folder = os.path.join(src_dir, folder)
            dst_file_name = os.path.join(dst_dir, file_name)
            make_subdataset(src_folder, src_labels, nth, dst_file_name)
            

In [18]:
if __name__ == '__main__' :
    main(src_dir, dst_dir, src_labels, force = True)

Found 500parts to pickle
Compressing data1.npy.
Compressing data2.npy.
Compressing data3.npy.
Compressing data4.npy.
Compressing data5.npy.
Compressing data6.npy.
Compressing data7.npy.
Compressing data8.npy.
Compressing data9.npy.
Compressing data10.npy.
Compressing data11.npy.
Compressing data12.npy.
Compressing data13.npy.
Compressing data14.npy.
Compressing data15.npy.
Compressing data16.npy.
Compressing data17.npy.
Compressing data18.npy.
Compressing data19.npy.
Compressing data20.npy.
Compressing data21.npy.
Compressing data22.npy.
Compressing data23.npy.
Compressing data24.npy.
Compressing data25.npy.
Compressing data26.npy.
Compressing data27.npy.
Compressing data28.npy.
Compressing data29.npy.
Compressing data30.npy.
Compressing data31.npy.
Compressing data32.npy.
Compressing data33.npy.
Compressing data34.npy.
Compressing data35.npy.
Compressing data36.npy.
Compressing data37.npy.
Compressing data38.npy.
Compressing data39.npy.
Compressing data40.npy.
Compressing data41.npy.


Compressing data333.npy.
Compressing data334.npy.
Compressing data335.npy.
Compressing data336.npy.
Compressing data337.npy.
Compressing data338.npy.
Compressing data339.npy.
Compressing data340.npy.
Compressing data341.npy.
Compressing data342.npy.
Compressing data343.npy.
Compressing data344.npy.
Compressing data345.npy.
Compressing data346.npy.
Compressing data347.npy.
Compressing data348.npy.
Compressing data349.npy.
Compressing data350.npy.
Compressing data351.npy.
Compressing data352.npy.
Compressing data353.npy.
Compressing data354.npy.
Compressing data355.npy.
Compressing data356.npy.
Compressing data357.npy.
Compressing data358.npy.
Compressing data359.npy.
Compressing data360.npy.
Compressing data361.npy.
Compressing data362.npy.
Compressing data363.npy.
Compressing data364.npy.
Compressing data365.npy.
Compressing data366.npy.
Compressing data367.npy.
Compressing data368.npy.
Compressing data369.npy.
Compressing data370.npy.
Compressing data371.npy.
Compressing data372.npy.


In [19]:
nth = 1
src_dir = "./Dataset"
dataset_name = "data"+str(nth)+".npy"
label_name = "label"+str(nth)+".npy"
data_file = os.path.join(src_dir,dataset_name)
label_name = os.path.join(src_dir,label_name)
dataset = np.load(data_file)
img_data = np.ndarray((img_size,img_size,num_channels),dtype = np.uint8)
img_data = dataset[0]
print(dataset.shape)
print(img_data.shape)
img = PIL.Image.fromarray(img_data, mode='RGB')
img.show()

(20, 256, 256, 3)
(256, 256, 3)
