In [10]:
import os
from Augmentor import Pipeline
from shutil import copyfile
from tqdm import tqdm
import pathlib
import time

DATASET_SOURCE = 'train'
NEW_IMAGES = 500

In [11]:
IMAGES_FOLDER = 'Images'
nr_images = {}

for class_type in os.listdir(os.path.join(IMAGES_FOLDER, 'test')):
    nr_images[class_type] = 0

for folder in os.listdir(IMAGES_FOLDER):
    if folder != DATASET_SOURCE:
        continue
    for fold in os.listdir(os.path.join(IMAGES_FOLDER, folder)):
        nr_images[fold] += len(os.listdir(os.path.join(IMAGES_FOLDER, folder, fold)))

print(nr_images)

{'CL': 26, 'CUW': 99, 'CUZ': 117, 'DEE': 308, 'DEF': 170, 'DEG': 5082, 'DER': 350, 'DES': 82, 'FL': 84, 'FM': 16, 'healthy_images': 7930, 'REM': 33, 'RF': 107, 'RM': 21, 'RPL': 3, 'SCP': 1074}


In [12]:
class_to_augment = set()
for defect_class in nr_images.keys():
    if nr_images[defect_class] > NEW_IMAGES:
        continue
    class_to_augment.add(defect_class)
class_to_augment

{'CL',
 'CUW',
 'CUZ',
 'DEE',
 'DEF',
 'DER',
 'DES',
 'FL',
 'FM',
 'REM',
 'RF',
 'RM',
 'RPL'}

In [13]:
augmentable_files = {}
for dfct_class in class_to_augment:
    augmentable_files[dfct_class] = []

for dataset_type in os.listdir(IMAGES_FOLDER):
    if dataset_type != DATASET_SOURCE:
        continue
    for class_type in class_to_augment:
        for image in os.listdir(os.path.join(IMAGES_FOLDER, dataset_type, class_type)):
            augmentable_files[class_type].append(os.path.join(IMAGES_FOLDER, dataset_type, class_type, image))

In [14]:
!rm -rf 'TMP'

In [15]:
for key in augmentable_files.keys():
    pathlib.Path(os.path.join('TMP', key)).mkdir(parents=True, exist_ok=True)
    for f in tqdm(augmentable_files[key], desc=f'{key}{"".join(" " for i in range(4-len(key)))}'):
        copyfile(f, os.path.join('TMP', key, f.split(os.sep)[-1]))

CUZ : 100%|██████████| 117/117 [00:02<00:00, 53.22it/s]
RPL : 100%|██████████| 3/3 [00:00<00:00, 62.09it/s]
FL  : 100%|██████████| 84/84 [00:01<00:00, 56.65it/s]
DEE : 100%|██████████| 308/308 [00:05<00:00, 52.47it/s]
RM  : 100%|██████████| 21/21 [00:00<00:00, 62.90it/s]
DES : 100%|██████████| 82/82 [00:01<00:00, 58.90it/s]
DEF : 100%|██████████| 170/170 [00:03<00:00, 51.78it/s]
CL  : 100%|██████████| 26/26 [00:00<00:00, 54.74it/s]
FM  : 100%|██████████| 16/16 [00:00<00:00, 62.87it/s]
CUW : 100%|██████████| 99/99 [00:01<00:00, 50.06it/s]
DER : 100%|██████████| 350/350 [00:06<00:00, 55.78it/s]
RF  : 100%|██████████| 107/107 [00:01<00:00, 54.23it/s]
REM : 100%|██████████| 33/33 [00:00<00:00, 52.42it/s]


In [16]:
for defect_class in class_to_augment: 
    p = Pipeline(os.path.join('TMP', defect_class))
    p.skew(probability=.2)
    p.flip_left_right(probability=.8)
    p.rotate(probability=.3, max_left_rotation=5, max_right_rotation=5)
    p.shear(probability=.6, max_shear_left=7, max_shear_right=7)
    time.sleep(.4)
    p.sample(NEW_IMAGES)

Initialised with 117 image(s) found.
Output directory set to TMP/CUZ/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F59086C8B70>: 100%|██████████| 500/500 [00:11<00:00, 44.23 Samples/s]                  


Initialised with 3 image(s) found.
Output directory set to TMP/RPL/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F59086583C8>: 100%|██████████| 500/500 [00:12<00:00, 39.76 Samples/s]                  


Initialised with 84 image(s) found.
Output directory set to TMP/FL/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F5910901F60>: 100%|██████████| 500/500 [00:12<00:00, 39.19 Samples/s]                   


Initialised with 308 image(s) found.
Output directory set to TMP/DEE/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F59086DFA58>: 100%|██████████| 500/500 [00:12<00:00, 41.39 Samples/s]                  


Initialised with 21 image(s) found.
Output directory set to TMP/RM/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F5908F3C518>: 100%|██████████| 500/500 [00:11<00:00, 42.32 Samples/s]                  


Initialised with 82 image(s) found.
Output directory set to TMP/DES/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F59086F24E0>: 100%|██████████| 500/500 [00:11<00:00, 43.34 Samples/s]                   


Initialised with 170 image(s) found.
Output directory set to TMP/DEF/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F59086D3A90>: 100%|██████████| 500/500 [00:10<00:00, 48.87 Samples/s]                  


Initialised with 26 image(s) found.
Output directory set to TMP/CL/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F590BFED978>: 100%|██████████| 500/500 [00:08<00:00, 56.22 Samples/s]                  


Initialised with 16 image(s) found.
Output directory set to TMP/FM/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F5910051D68>: 100%|██████████| 500/500 [00:13<00:00, 36.44 Samples/s]                  


Initialised with 99 image(s) found.
Output directory set to TMP/CUW/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F59086194A8>: 100%|██████████| 500/500 [00:13<00:00, 35.73 Samples/s]                   


Initialised with 350 image(s) found.
Output directory set to TMP/DER/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F590862BE80>: 100%|██████████| 500/500 [00:12<00:00, 40.89 Samples/s]                  


Initialised with 107 image(s) found.
Output directory set to TMP/RF/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F5908F34780>: 100%|██████████| 500/500 [00:11<00:00, 43.19 Samples/s]                  


Initialised with 33 image(s) found.
Output directory set to TMP/REM/output.

Processing <PIL.Image.Image image mode=RGB size=352x288 at 0x7F5908F440B8>: 100%|██████████| 500/500 [00:10<00:00, 46.75 Samples/s]                  


In [17]:
for key in class_to_augment: 
    for f in tqdm(os.listdir(os.path.join('TMP', key, 'output')), desc=f'{key}{"".join(" " for i in range(4-len(key)))}'):
        copyfile(os.path.join('TMP', key, 'output', f), os.path.join(IMAGES_FOLDER, DATASET_SOURCE, key, f))


CUZ : 100%|██████████| 500/500 [00:05<00:00, 87.20it/s] 
RPL : 100%|██████████| 500/500 [00:04<00:00, 102.08it/s]
FL  : 100%|██████████| 500/500 [00:05<00:00, 91.81it/s] 
DEE : 100%|██████████| 500/500 [00:04<00:00, 100.92it/s]
RM  : 100%|██████████| 500/500 [00:05<00:00, 93.80it/s] 
DES : 100%|██████████| 500/500 [00:04<00:00, 109.03it/s]
DEF : 100%|██████████| 500/500 [00:04<00:00, 100.94it/s]
CL  : 100%|██████████| 500/500 [00:05<00:00, 97.22it/s] 
FM  : 100%|██████████| 500/500 [00:04<00:00, 107.00it/s]
CUW : 100%|██████████| 500/500 [00:04<00:00, 109.11it/s]
DER : 100%|██████████| 500/500 [00:05<00:00, 99.26it/s] 
RF  : 100%|██████████| 500/500 [00:04<00:00, 109.29it/s]
REM : 100%|██████████| 500/500 [00:04<00:00, 105.09it/s]
