In [1]:
pip install augmentor

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.1 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import Augmentor
import numpy as np
import os
import glob
import random
import collections
from PIL import Image
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import matplotlib.pyplot as plt


In [3]:
root_directory = "F:/Main project/datasets/Train/*"

folders = []

for f in glob.glob(root_directory):
    
    if os.path.isdir(f):
        
        folders.append(os.path.abspath(f))
        

print("Folders (classes) found: %s " % [os.path.split(x)[1] for x in folders])

Folders (classes) found: ['jute', 'maize', 'rice', 'sugarcane', 'wheat'] 


In [4]:
pipelines = {}
for folder in folders:
    print("Folder %s:" % (folder))
    pipelines[os.path.split(folder)[1]] = (Augmentor.Pipeline(folder))
    print("\n----------------------------\n")

for p in pipelines.values():
    print("Class %s has %s samples." % (p.augmentor_images[0].class_label, len(p.augmentor_images)))
    
pipelines 

Folder F:\Main project\datasets\Train\jute:
Initialised with 319 image(s) found.
Output directory set to F:\Main project\datasets\Train\jute\output.
----------------------------

Folder F:\Main project\datasets\Train\maize:
Initialised with 319 image(s) found.
Output directory set to F:\Main project\datasets\Train\maize\output.
----------------------------

Folder F:\Main project\datasets\Train\rice:
Initialised with 317 image(s) found.
Output directory set to F:\Main project\datasets\Train\rice\output.
----------------------------

Folder F:\Main project\datasets\Train\sugarcane:
Initialised with 318 image(s) found.
Output directory set to F:\Main project\datasets\Train\sugarcane\output.
----------------------------

Folder F:\Main project\datasets\Train\wheat:
Initialised with 321 image(s) found.
Output directory set to F:\Main project\datasets\Train\wheat\output.
----------------------------

Class jute has 319 samples.
Class maize has 319 samples.
Class rice has 317 samples.
Class 

{'jute': <Augmentor.Pipeline.Pipeline at 0x192e7966ec0>,
 'maize': <Augmentor.Pipeline.Pipeline at 0x1928eb544f0>,
 'rice': <Augmentor.Pipeline.Pipeline at 0x1928f26fe20>,
 'sugarcane': <Augmentor.Pipeline.Pipeline at 0x1928f32d210>,
 'wheat': <Augmentor.Pipeline.Pipeline at 0x1928f2f9750>}

In [5]:
for pipeline in pipelines.values():
    pipeline.rotate(probability=0.75, max_left_rotation=10, max_right_rotation=10)
    pipeline.flip_left_right(probability=0.8)
    pipeline.skew(probability=0.4)
    pipeline.random_distortion(probability=0.5, grid_width=3, grid_height=7,magnitude=2)
    pipeline.crop_centre(probability=0.1,percentage_area=0.8)
    pipeline.sample(33)

integer_labels = {
    'jute': 0,
    'maize': 1,
    'rice': 2,
    'sugarcane': 3,
    'wheat':4
}

PipelineContainer = collections.namedtuple(
    'PipelineContainer',
    'label label_integer label_categorical pipeline generator'
)


pipeline_containers = []

for label, pipeline in pipelines.items():
    label_categorical = np.zeros(len(pipelines), dtype=int)
    label_categorical[integer_labels[label]] = 1
    pipeline_containers.append(
        PipelineContainer(
            label,
            integer_labels[label],
            label_categorical,
            pipeline,
            pipeline.keras_generator(batch_size=1)
        )
    )



def multi_generator(pipeline_containers, batch_size):
    X = []
    y = []
    for i in range(batch_size):
        pipeline_container = random.choice(pipeline_containers)
        image, _ = next(pipeline_container.generator)
        image = image.reshape((224,224,3)) # Or (1, 28, 28) for channels_first, see Keras' docs.
        X.append(image)
        y.append(pipeline_container.label_categorical) # Or label_integer if required by network
        X = np.asarray(X)
        y = np.asarray(y)
    yield X, y
            
multi_generator(pipeline_containers,1)

Processing <PIL.Image.Image image mode=L size=256x256 at 0x1928F3BFCA0>: 100%|██| 33/33 [00:00<00:00, 146.95 Samples/s]
Processing <PIL.Image.Image image mode=L size=256x256 at 0x1928F3BC6D0>: 100%|██| 33/33 [00:00<00:00, 172.86 Samples/s]
Processing <PIL.Image.Image image mode=L size=256x256 at 0x1928F3BD8D0>: 100%|██| 33/33 [00:00<00:00, 188.13 Samples/s]
Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x1928F3BF640>: 100%|█| 33/33 [00:00<00:00, 190.88 Samples/s
Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x1928F3BE4A0>: 100%|█| 33/33 [00:00<00:00, 207.62 Samples/s


<generator object multi_generator at 0x000001928F381850>

In [6]:
print(pipelines.keys())
print(integer_labels.keys())

dict_keys(['jute', 'maize', 'rice', 'sugarcane', 'wheat'])
dict_keys(['jute', 'maize', 'rice', 'sugarcane', 'wheat'])
