In [2]:
pip install augmentor

Note: you may need to restart the kernel to use updated packages.




In [3]:
import Augmentor
import numpy as np
import os
import glob
import random
import collections
from PIL import Image
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import matplotlib.pyplot as plt


In [4]:
root_directory = "D:/datasets/Train/*"

folders = []

for f in glob.glob(root_directory):
    
    if os.path.isdir(f):
        
        folders.append(os.path.abspath(f))
        

print("Folders (classes) found: %s " % [os.path.split(x)[1] for x in folders])

Folders (classes) found: ['jute', 'maize', 'rice', 'sugarcane', 'wheat'] 


In [5]:
pipelines = {}
for folder in folders:
    print("Folder %s:" % (folder))
    pipelines[os.path.split(folder)[1]] = (Augmentor.Pipeline(folder))
    print("\n----------------------------\n")

for p in pipelines.values():
    print("Class %s has %s samples." % (p.augmentor_images[0].class_label, len(p.augmentor_images)))
    
pipelines 

Folder D:\datasets\Train\jute:
Initialised with 319 image(s) found.
Output directory set to D:\datasets\Train\jute\output.
----------------------------

Folder D:\datasets\Train\maize:
Initialised with 319 image(s) found.
Output directory set to D:\datasets\Train\maize\output.
----------------------------

Folder D:\datasets\Train\rice:
Initialised with 317 image(s) found.
Output directory set to D:\datasets\Train\rice\output.
----------------------------

Folder D:\datasets\Train\sugarcane:
Initialised with 318 image(s) found.
Output directory set to D:\datasets\Train\sugarcane\output.
----------------------------

Folder D:\datasets\Train\wheat:
Initialised with 321 image(s) found.
Output directory set to D:\datasets\Train\wheat\output.
----------------------------

Class jute has 319 samples.
Class maize has 319 samples.
Class rice has 317 samples.
Class sugarcane has 318 samples.
Class wheat has 321 samples.


{'jute': <Augmentor.Pipeline.Pipeline at 0x24892149ba0>,
 'maize': <Augmentor.Pipeline.Pipeline at 0x24892176920>,
 'rice': <Augmentor.Pipeline.Pipeline at 0x248b8bba5f0>,
 'sugarcane': <Augmentor.Pipeline.Pipeline at 0x248b8b74df0>,
 'wheat': <Augmentor.Pipeline.Pipeline at 0x248b8bea1a0>}

In [6]:
for pipeline in pipelines.values():
    pipeline.rotate(probability=0.75, max_left_rotation=10, max_right_rotation=10)
    pipeline.flip_left_right(probability=0.8)
    pipeline.skew(probability=0.4)
    pipeline.random_distortion(probability=0.5, grid_width=3, grid_height=7,magnitude=2)
    pipeline.crop_centre(probability=0.1,percentage_area=0.8)
    pipeline.sample(33)

integer_labels = {
    'jute': 0,
    'maize': 1,
    'rice': 2,
    'sugarcane': 3,
    'wheat':4
}

PipelineContainer = collections.namedtuple(
    'PipelineContainer',
    'label label_integer label_categorical pipeline generator'
)


pipeline_containers = []

for label, pipeline in pipelines.items():
    label_categorical = np.zeros(len(pipelines), dtype=int)
    label_categorical[integer_labels[label]] = 1
    pipeline_containers.append(
        PipelineContainer(
            label,
            integer_labels[label],
            label_categorical,
            pipeline,
            pipeline.keras_generator(batch_size=1)
        )
    )



def multi_generator(pipeline_containers, batch_size):
    X = []
    y = []
    for i in range(batch_size):
        pipeline_container = random.choice(pipeline_containers)
        image, _ = next(pipeline_container.generator)
        image = image.reshape((224,224,3)) # Or (1, 28, 28) for channels_first, see Keras' docs.
        X.append(image)
        y.append(pipeline_container.label_categorical) # Or label_integer if required by network
        X = np.asarray(X)
        y = np.asarray(y)
    yield X, y
            
multi_generator(pipeline_containers,1)

Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x248B8C521D0>: 100%|█| 33/33 [00:00<00:00, 58.88 Samples/s]
Processing <PIL.Image.Image image mode=L size=256x256 at 0x248B8C63700>: 100%|██| 33/33 [00:00<00:00, 124.49 Samples/s]
Processing <PIL.Image.Image image mode=L size=256x256 at 0x248B8C63670>: 100%|███| 33/33 [00:00<00:00, 88.82 Samples/s]
Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x248B8C53FD0>: 100%|█| 33/33 [00:00<00:00, 116.01 Samples/s
Processing <PIL.Image.Image image mode=L size=256x256 at 0x248B8CB54B0>: 100%|██| 33/33 [00:00<00:00, 117.77 Samples/s]


<generator object multi_generator at 0x00000248B8C05B60>

In [7]:
print(pipelines.keys())
print(integer_labels.keys())

dict_keys(['jute', 'maize', 'rice', 'sugarcane', 'wheat'])
dict_keys(['jute', 'maize', 'rice', 'sugarcane', 'wheat'])
