In [1]:
import os, cv2, shutil, numpy as np
from IPython.display import clear_output

def get_immediate_subdirectories(a_dir):
    return [name for name in os.listdir(a_dir)
            if os.path.isdir(os.path.join(a_dir, name))]

def get_immediate_files(a_dir):
    return [name for name in os.listdir(a_dir)
            if os.path.isfile(os.path.join(a_dir, name))]

def copy_proper_species(src,dst):
    species=get_immediate_subdirectories(src)
    counter = 0
    print("Copying...")
    for s in species:
        if len(get_immediate_files(src+'/'+s)) >= THRESHOLD: #Species of at least THRESHOLD size
            if not os.path.isdir(dst+'/'+s):
                shutil.copytree(src+'/'+s,dst+'/'+s,symlinks=False,ignore=None)
        print(counter, 'out of', len(species))
        counter = counter + 1
        
def resize_in(path):
    species=get_immediate_subdirectories(path)
    counter = 0
    print("Resizing...")
    for s in species:
        images = get_immediate_files(path+'/'+s)
        for i in images:
            if i != '.DS_Store':
                filename = path+'/'+s+'/'+i
                ori_img = cv2.imread(filename)
                height, width, depth = ori_img.shape
                if height > width:
                    M = cv2.getRotationMatrix2D((width/2, height/2), 90, 1)
                    dst = cv2.warpAffine(ori_img, M, (width, height))
                new_img = cv2.resize(ori_img,(int(160),int(120)))
                cv2.imwrite(filename, new_img)       
        print(counter, 'out of', len(species))
        counter = counter + 1
                
def augment_data(path):
    species = get_immediate_subdirectories(path)
    counter = 0
    for s in species:
        images = get_immediate_files(path+'/'+s)
        id=0 #id of the parent image
        for i in images:
            newFileName=str(id)+'_'+s.replace(' ','_')+'.JPG'
            if not os.path.isfile(path+'/'+s+'/'+newFileName):
                os.rename(path+'/'+s+'/'+i,path+'/'+s+'/'+newFileName)
            id+=1
        
        images = get_immediate_files(path+'/'+s)
        for i in images:
            file=i.split('.')
            make_some_noise(path+'/'+s,file[0])
            
        images = get_immediate_files(path+'/'+s)    
        for i in images:
            file=i.split('.')
            flip_image(path+'/'+s,file[0])
            
        images = get_immediate_files(path+'/'+s)
        for i in images:
            file=i.split('.')
            rot_image(path+'/'+s,file[0])
            
        images = get_immediate_files(path+'/'+s)
        for i in images:
            file=i.split('.')
            change_pos(path+'/'+s,file[0])
            
        images = get_immediate_files(path+'/'+s)
        for i in images:
            file=i.split('.')
            change_val(path+'/'+s,file[0])
        
        images = get_immediate_files(path+'/'+s)
        for i in images:
            file=i.split('.')
            change_sat(path+'/'+s,file[0])
        clear_output()
        print("Augmenting...")
        print(counter, 'out of', len(species))
        counter = counter + 1
            
def simple_augment_data(path):
    species = get_immediate_subdirectories(path)
    counter = 0;
    for s in species:
        images = get_immediate_files(path+'/'+s)
        id=0 #id of the parent image
        for i in images:
            newFileName=str(id)+'_'+s.replace(' ','_')+'.JPG'
            if not os.path.isfile(path+'/'+s+'/'+newFileName):
                os.rename(path+'/'+s+'/'+i,path+'/'+s+'/'+newFileName)
            id+=1
        
        images = get_immediate_files(path+'/'+s)
        for i in images:
            file=i.split('.')
            make_some_noise(path+'/'+s,file[0])
            
        images = get_immediate_files(path+'/'+s)    
        for i in images:
            file=i.split('.')
            flip_image(path+'/'+s,file[0])
            
        images = get_immediate_files(path+'/'+s)
        for i in images:
            file=i.split('.')
            rot_image(path+'/'+s,file[0])
            
        images = get_immediate_files(path+'/'+s)
        for i in images:
            file=i.split('.')
            change_pos(path+'/'+s,file[0])

        clear_output()
        print("Augmenting...")
        print(counter, 'out of', len(species))
        counter = counter + 1
            
        
def flip_image(dir_path,img_name):
    image=cv2.imread(dir_path+'/'+img_name+'.JPG')
    result=cv2.flip(image,0)
    cv2.imwrite(dir_path+'/'+img_name+'_flip'+'.JPG', result)
        
def rot_image(dir_path,img_name):
    image=cv2.imread(dir_path+'/'+img_name+'.JPG')
    for angle in [45,90,135,180,225,270,315]:
        image_center = tuple(np.array(image.shape[1::-1]) / 2)
        rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
        result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
        cv2.imwrite(dir_path+'/'+img_name+'_rot'+str(angle)+'.JPG', result)

def change_pos(dir_path,img_name):
    image=cv2.imread(dir_path+'/'+img_name+'.JPG')
    rows,cols,ch = image.shape
    for direction in ['D','DL','L','UL','U','UR','R','DR']:
        tx=0
        ty=0
        if direction == 'D':
            ty=20
        if direction == 'DL':
            ty=20
            tx=-20
        if direction == 'L':
            tx=-20
        if direction == 'UL':
            tx=-20
            ty=-20
        if direction == 'U':
            ty=-20
        if direction == 'UR':
            ty=-20
            tx=20
        if direction == 'R':
            tx=20
        if direction == 'DR':
            tx=20
            ty=20
                   
    M = np.float32([[1,0,tx],[0,1,ty]])
    result = cv2.warpAffine(image,M,(cols,rows))
    cv2.imwrite(dir_path+'/'+img_name+'_moved'+direction+'.JPG', result)
    
def change_val(dir_path,img_name): #Value = lightness in HSV model
    image=cv2.imread(dir_path+'/'+img_name+'.JPG')
    for value in [30,60,90]:
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        hsv[:,:,2] = np.where((255-hsv[:,:,2])<value,255,hsv[:,:,2]+value)
        result = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
        cv2.imwrite(dir_path+'/'+img_name+'_val'+str(value)+'.JPG', result)
    
def change_sat(dir_path,img_name):
    image=cv2.imread(dir_path+'/'+img_name+'.JPG')
    for saturation in [10,20,30]:
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        hsv[:,:,1] = np.where((255-hsv[:,:,1])<saturation,255,hsv[:,:,1]+saturation)
        result = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
        cv2.imwrite(dir_path+'/'+img_name+'_sat'+str(saturation)+'.JPG', result)
        
def make_some_noise(dir_path,img_name):
    image=cv2.imread(dir_path+'/'+img_name+'.JPG')
    result = cv2.GaussianBlur(image, (3,3), 0 )
    cv2.imwrite(dir_path+'/'+img_name+'_noisy'+'.JPG', result)
    

originals = '../res/Originals'
proper = '../res/Proper'
THRESHOLD = 10 # Minimal ammount of pictures in species to take them into account



        


In [2]:
copy_proper_species(originals,proper)
clear_output()
resize_in(proper)

Resizing...
78 out of 79


In [None]:
simple_augment_data(proper)

Augmenting...
37 out of 79
