In [1]:
from data import *

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# data augmentation 

In deep learning tasks, a lot of data is need to train DNN model, when the dataset is not big enough, data augmentation should be applied.

keras.preprocessing.image.ImageDataGenerator is a data generator, which can feed the DNN with data like : (data,label), it can also do data augmentation at the same time.

It is very convenient for us to use keras.preprocessing.image.ImageDataGenerator to do data augmentation by implement image rotation, shift, rescale and so on... see [keras documentation](https://keras.io/preprocessing/image/) for detail.

For image segmentation tasks, the image and mask must be transformed **together!!**

## define your data generator

If you want to visualize your data augmentation result, set save_to_dir = your path

In [4]:
import os

data_path = 'data'
src_path = os.path.join(data_path,'orig')
print(src_path)

data\orig


In [5]:
def create_path(src_path,path_extention):
    new_path = os.path.join(src_path,path_extention)
    if not os.path.exists(new_path):
        os.mkdir(new_path)
    return new_path

#train_path = create_path(data_path,'train')
#print(train_path)

#test_path = create_path(data_path,'test')
#print(test_path)

In [6]:
def create_X_y_paths(src_path,X_name,y_name):
    X_path = create_path(src_path,X_name)
    y_path = create_path(src_path,y_name)
    return X_path, y_path

#test_paths = create_X_y_paths(test_path,'image','labels')
#print(test_paths)


#train_paths = create_X_y_paths(train_path,'image','labels')
#print(train_paths)


In [7]:
import shutil

def copy_images(src_image_list,src_path,dest_path):
    for image in src_image_list:
        image_path = os.path.join(src_path,image)
        _ = shutil.copy(image_path,dest_path)
        

In [8]:
import random
def image_train_test_split(src_path,dest_path,X_name,y_name,test_size=0.3):
    
    train_path = create_path(dest_path,'train')
    test_path = create_path(dest_path,'test')
    
    X_path = os.path.join(src_path,X_name)
    y_path = os.path.join(src_path,y_name)
    
    
    X_train_path, y_train_path = create_X_y_paths(train_path,X_name,y_name)
    X_test_path, y_test_path = create_X_y_paths(test_path,X_name,y_name)
        
    # get images list in src folder
    img_list = [f for f in os.listdir(X_path)]

    random.shuffle(img_list)
    split_ind = int(test_size * len(img_list)) 

    train_data = img_list[split_ind:]
    copy_images(train_data,X_path,X_train_path)
    copy_images(train_data,y_path,y_train_path)
    
    test_data = img_list[:split_ind]
    copy_images(test_data,X_path,X_test_path)
    copy_images(test_data,y_path,y_test_path)
    
    return train_path,test_path
    
    

    
train_path,test_path=image_train_test_split(src_path,data_path,'image','label')

In [9]:
#if you don't want to do data augmentation, set data_gen_args as an empty dict.
#data_gen_args = dict()

train_aug_path = create_path(train_path,'aug')


data_gen_args = dict(rotation_range=0.2,
                    width_shift_range=0.05,
                    height_shift_range=0.05,
                    shear_range=0.05,
                    zoom_range=0.05,
                    horizontal_flip=True,
                    fill_mode='nearest')
myGenerator = trainGenerator(20,train_path,'image','label',data_gen_args,save_to_dir = train_aug_path,image_color_mode='rgb')

## visualize your data augmentation result

In [10]:
#you will see 60 transformed images and their masks in data/membrane/train/aug
num_batch = 3
for i,batch in enumerate(myGenerator):
    if(i >= num_batch):
        break

Found 133 images belonging to 1 classes.
Found 133 images belonging to 1 classes.


## create .npy data

If your computer has enough memory, you can create npy files containing all your images and masks, and feed your DNN with them.

In [9]:
image_arr,mask_arr = geneTrainNpy("data/membrane/train/aug/","data/membrane/train/aug/")
#np.save("data/image_arr.npy",image_arr)
#np.save("data/mask_arr.npy",mask_arr)