In [8]:
# Copyright 2020 Dennis Adelved.. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================


In [None]:
import os
import numpy as np
import shutil
from PIL import Image
import glob
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
import bruges
import random
import cv2
import math
from scipy import signal
import matplotlib.patches as patch
import tensorflow as tf
import functools
import inspect
import sys
import six

In [3]:


#Perform the convolution along the height of the image, given a wavelet
def convolve_image_from_path(image_path,frequency,axis=0,rgb='True'):
    
    channels = 0
    
    wavelet = bruges.filters.ricker(duration=0.100, dt=0.001, f=frequency) #wavelet
    
    if rgb == True:
        
        image = Image.open(image_path)
        image = np.asarray(image)
        channels = image.shape[2]
        new_im = image.copy()
        
        for c in range(new_im.shape[2]):
            new_im[:,:,c] = (np.apply_along_axis(lambda t: np.convolve(t,wavelet,mode='same'),
                                                 axis=axis,arr=new_im[:,:,c]) )
        
    elif rgb == False:
        image = Image.open(image_path).convert('LA')
        image = np.asarray(image)[:,:,0]
        new_im = image.copy()
        new_im = np.apply_along_axis(lambda t: np.convolve(t,wavelet,mode='same'),axis=axis,arr=new_im)

    
    return new_im




#function to save the relevant attributes(folder,file name and path) of the xml's outputted from LabelImg
def change_xml(xml_path,new_folder,new_fname,new_path,channels=3):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    root[0].text = new_folder
    root[1].text = new_fname
    root[2].text = new_path
    if channels==1:
        root[4][2].text = '1'
    
    return tree


    
#convolve grayscale image with random frequency in range [min_freq:10:max_freq]    
def convolve_gray_image_random(im_path, min_freq,max_freq, multiple_axes=True):
    image = Image.open(im_path).convert('L')
    image = np.asarray(image)
    
    f = []
    
    f.append(random.randint(min_freq,max_freq))
    f.append(random.randint(min_freq,max_freq))
    
    wavelet = []
    
    wavelet.append(bruges.filters.ricker(duration=0.100, dt=0.001, f=f[0])) #wavelet
    wavelet.append(bruges.filters.ricker(duration=0.100, dt=0.001, f=f[1])) 
    
    if multiple_axes == True:
        vert = np.apply_along_axis(lambda t: np.convolve(t,wavelet[0],mode='same'),axis=0,arr=image)
        hor = np.apply_along_axis(lambda t: np.convolve(t,wavelet[1],mode='same'),axis=1,arr=image)
    
        return [vert,hor]
    else:
        hor = np.apply_along_axis(lambda t: np.convolve(t,wavelet[0],mode='same'),axis=1,arr=image)
        return [hor]
        

        

#augmentation gray
def convert_to_gray(image_path):
    image = Image.open(image_path).convert('L')
    image = np.asarray(image)
    new_im = image.copy()
    return new_im
#augmentation sobel    
def augment_sobel(image_path):
    image = Image.open(image_path)
    im = np.asarray(image)
    sobelx = cv2.Sobel(im,cv2.CV_64F,1,0,ksize=3)
    sobely = cv2.Sobel(im,cv2.CV_64F,0,1,ksize=3)
    grad = np.sqrt(sobelx**2 + sobely**2)
    return grad
    
#augmentation canny    
def augment_canny(image_path):
    percentiles = np.array([80, 85, 90, 95])
    image = Image.open(image_path)
    im = np.asarray(image)
    perc = percentiles[random.randrange(len(percentiles))]
    canny = cv2.Canny(im,perc/2,perc)
    return canny
    

#import xml and jpg and returns a list of xml,jpg pairs
def import_img_xml_paths(data_dir):
    used_cores = glob.glob(os.path.join(data_dir,'*.jpg'))
    used_xml = glob.glob(os.path.join(data_dir,'*.xml'))
    return list(zip(sorted(used_cores),sorted(used_xml)))
    
    
#splits training and validation data, simple hold-out split    
def simple_holdout_split(image_xml_pairs,split=0.2):
    
    num_im = len(image_xml_pairs)
    
    random.shuffle(image_xml_pairs)
    val = image_xml_pairs[0:math.floor(num_im*split)]
    train = image_xml_pairs[math.floor(num_im*split):]
    
    return train,val
    


    

In [384]:
#Augmentation script for convolving with random frequency
INPUT_DATA = 'folder_name'
OUTPUT_DIR = 'output_directory'

data_dir = os.path.join(os.getcwd(),INPUT_DATA)


def convolve_image_data(data_dir,OUTPUT_DIR):
    
    out  = os.path.join(os.getcwd(), OUTPUT_DIR)
    if OUTPUT_DIR not in os.listdir(cwd):
        os.makedirs(os.path.join(cwd,OUTPUT_DIR))
    
    used_cores = glob.glob(os.path.join(data_dir,'*.jpg'))
    used_xml = glob.glob(os.path.join(data_dir,'*.xml'))

    used_cores = sorted(used_cores)
    used_xml = sorted(used_xml)
    paths = zip(used_cores,used_xml)


    for tup in list(paths):


        im_path = tup[0]
        xml_path = tup[1]

        #gray_im = convert_to_gray(im_path)

        aug = convolve_gray_image_random(im_path, 30,100, multiple_axes=True)


        vert_aug = aug[0]
        hor_aug = aug[1]


        vert_name = 'wavelet_vert_' + tup[0].split('/')[-1]
        hor_name = 'wavelet_hor_' + tup[0].split('/')[-1]


        vert_path = os.path.join(out,vert_name)
        hor_path = os.path.join(out,hor_name)


        vert_tree = change_xml(xml_path,out.split('/')[-1],vert_name,vert_path,1)
        hor_tree = change_xml(xml_path,out.split('/')[-1],hor_name,hor_path,1)


        vert_new_xml_path = vert_path.split('.')[0] + '.xml'
        hor_new_xml_path = hor_path.split('.')[0] + '.xml'


        vert_im = Image.fromarray(vert_aug)
        hor_im = Image.fromarray(hor_aug)


        plt.imsave(vert_path,vert_im,cmap='gray')
        plt.imsave(hor_path,hor_im,cmap='gray')

        vert_tree.write(vert_new_xml_path)
        hor_tree.write(hor_new_xml_path)

convolve_image_data()


In [6]:
#Define paths for input and output
cwd = os.getcwd()
#Path to directory containing the images with their respective xml files
SRC_TRAIN = os.path.join(cwd,'training_directory')
SRC_VALID = os.path.join(cwd,'validation_directory') 
SRC_TEST = os.path.join(cwd,'test_directory') 


#output directory to save the augmented images and xmls
SAVE_DIR_TRAIN = 'save_directory_augmented_training' 
SAVE_DIR_VALID = 'save_directory_augmented_validation' 
SAVE_DIR_TEST = 'save_directory_augmented_test'


#choose type of augmentation ['gray','sobel','canny']
augmentation_type = 'gray'


In [7]:
data_dir = SRC_TEST


used_cores = glob.glob(os.path.join(data_dir,'*.jpg'))
used_xml = glob.glob(os.path.join(data_dir,'*.xml'))

used_cores = sorted(used_cores)
used_xml = sorted(used_xml)
paths = zip(used_cores,used_xml)

if data_dir == SRC_VALID:
    aug_dest = os.path.join(cwd,SAVE_DIR_VALID)
elif data_dir==SRC_TEST:
    aug_dest = os.path.join(cwd,SAVE_DIR_TEST)
else:
    aug_dest = os.path.join(cwd,SAVE_DIR_TRAIN)


for tup in list(paths):
    
    
    im_path = tup[0]
    xml_path = tup[1]
    
    if augmentation_type == 'gray':
        augmented = convert_to_gray(im_path)
    elif augmentation_type == 'canny':
        augmented = augment_canny(im_path)
    elif augmentation_type == 'sobel':
        augmented = augment_sobel(im_path)
    else:
        print('choose a valid augmentation type')
        break



    new_name = tup[0].split('/')[-1]

    new_path = os.path.join(aug_dest,new_name)
    print(new_path)

    tree = change_xml(xml_path,aug_dest.split('/')[-1],new_name,new_path,1)

    new_xml_path = new_path.split('.')[0] + '.xml'

    print(new_name)
    
    plt.imsave(new_path,augmented,cmap='gray')
    
    tree.write(new_xml_path)
    



In [91]:
#K-fold validation split
#splitting the easy test set into 5 folds (training,validaiton and test set)

#define number of folds
samples = ['fold1','fold2','fold3','fold4','fold5']

data_dir = 'test_data' #data directory containing the xml and jpg files

samps = []
for s in samples:
    splits=[]
    splits.append('train_'+s)
    splits.append('valid_'+s)
    splits.append('test_'+s)
    samps.append(splits)




for s in samps:
    xmls = glob.glob(os.path.join(data_dir,'*.xml'))
    ims = glob.glob(os.path.join(data_dir,'*.jpg'))
    xmls=sorted(xmls)
    ims = sorted(ims)
    data = zip(ims,xmls)
    data = list(data)
    random.shuffle(data)
    split = data[0:15]
    test = data[15:]
    random.shuffle(split)
    train=split[0:10]
    val = split[10:]
    for f in s:
        os.mkdir(f)
        if 'train' in f:
            for i in range(len(train)):
                imin = os.path.join(os.getcwd(),train[i][0])
                xmlin = os.path.join(os.getcwd(),train[i][1])
                imout = os.path.join(os.getcwd(),f,train[i][0].split('/')[-1])
                xmlout = os.path.join(os.getcwd(),f,train[i][1].split('/')[-1])
                shutil.copy(imin,imout)
                shutil.copy(xmlin,xmlout)
        if 'valid' in f:
            for i in range(len(val)):
                imin = os.path.join(os.getcwd(),val[i][0])
                xmlin = os.path.join(os.getcwd(),val[i][1])
                imout = os.path.join(os.getcwd(),f,val[i][0].split('/')[-1])
                xmlout = os.path.join(os.getcwd(),f,val[i][1].split('/')[-1])
                shutil.copy(imin,imout)
                shutil.copy(xmlin,xmlout)
        if 'test' in f:
            for i in range(len(test)):
                imin = os.path.join(os.getcwd(),test[i][0])
                xmlin = os.path.join(os.getcwd(),test[i][1])
                imout = os.path.join(os.getcwd(),f,test[i][0].split('/')[-1])
                xmlout = os.path.join(os.getcwd(),f,test[i][1].split('/')[-1])
                shutil.copy(imin,imout)
                shutil.copy(xmlin,xmlout)    
    
    