In [None]:
import json
import sys
import numpy as np
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
import glob
import os
import shutil
import re
import pandas as pd

from PIL import Image, ImageEnhance

from imutils import contours
import imutils
from skimage import measure
import cv2

from tqdm.notebook import tqdm_notebook
from multiprocessing import Pool

from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

IMAGE_REGEX = re.compile(".*?\.(png|jpe?g)$", flags = re.IGNORECASE)
TOP_LEVEL = os.path.expanduser("~/Desktop/jmk/ArthroplastyID/HIPS/")
_IMAGE_EXTENSION = 'png'

def _show(image):
    plt.figure()
    imshow(np.asarray(image))
    return

def _convert_rgba_rgb(image):
    image.load()
    background = Image.new("RGB", image.size, (255, 255, 255))
    background.paste(image, mask=image.split()[3]) # 3 is the alpha channel
    return background

def _load(path):
    r = Image.open(path)
    return r

def _crop(image, coords):
    return image.crop(coords)

def _smart_crop_coordinates(image, x_lim=300, y_lim = 300):
    sc = smartcrop.SmartCrop(rule_of_thirds = False,
                            skin_weight = 0,
                            saturation_weight = 1)
    result = sc.crop(image, x_lim, y_lim)
    left = result['top_crop']['x']
    top = result['top_crop']['y']
    right = left + result['top_crop']['width']
    bottom = top + result['top_crop']['height']
    return (left, top, right, bottom)

def auto_crop_main(image, _SIZE_THRESHOLD = 300, debug = False):
    # if both coord > 300 -> image detection at 300x300
    # if largest coord > 300 but smallest coord < 300 -> object detection at 300x300
    # if max (coords) > 300, pad small coord with black to make a square
    # if both coord < 300 -> box to 300
    # All, then downsize to 200x200
    if(isinstance(image, str)):
        image = _load(image_path)
        image = _convert_rgba_rgb(image)
    width,height = image.size
    if debug:
        print ("{}: {}x{}".format(os.path.basename(image_path),height,width))
    if max(height, width) > _SIZE_THRESHOLD:
        coords = _smart_crop_coordinates(image, x_lim = _SIZE_THRESHOLD, y_lim = _SIZE_THRESHOLD)
        left, top, right, bottom = coords
        new_height = bottom - top
        new_width = right - left
        if debug:
            print('Height: ({} -> {}); Width: ({} -> {})'.format(height, new_height, width, new_width))
            print(coords)
        y_pad = _SIZE_THRESHOLD - new_height
        x_pad = _SIZE_THRESHOLD - new_width
        #prioritize symmetric x padding
        #stretch image if goes over
        new_left = left - x_pad/2
        new_left = 0 if new_left < 0 else new_left
        new_right = new_left + _SIZE_THRESHOLD
        new_right = right if new_right < width else width
        # prioritize bottom y padding
        new_bottom = bottom + y_pad
        if new_bottom > height:
            new_bottom = height 
            # if not all padding used, remove from new_top
            new_top = top - (_SIZE_THRESHOLD - new_bottom)
            new_top = new_top if new_top > 0 else 0
        else:
            new_top = top
        left, top, right, bottom = (new_left, new_top, new_right,new_bottom)
    else:
        top = left = 0
        bottom = height
        right = width
    return (left, top, right, bottom)

def _resize(image, DIMENSION = 200):
    if(isinstance(image, str)):
        image = _load(image)
    return image.resize((DIMENSION,DIMENSION),Image.LANCZOS)

# Get the pixel from the given image
def get_pixel(image, i, j):
    # Inside image bounds?
    width, height = image.size
    if i > width or j > height:
        return None

    # Get Pixel
    pixel = image.getpixel((i, j))
    return pixel

# Create a new image with the given size
def create_image(i, j):
    image = Image.new("RGB", (i, j), "white")
    return image

# Create a Grayscale version of the image
# Uses the ITU-R Recommendation BT.601-7 for converting 
def convert_grayscale(image):
    # Get size
    width, height = image.size

    # Create new Image and a Pixel Map
    new = create_image(width, height)
    pixels = new.load()

    # Transform to grayscale
    for i in range(width):
        for j in range(height):
            # Get Pixel
            pixel = get_pixel(image, i, j)
            # Get R, G, B values (This are int from 0 to 255)
            red =   pixel[0]
            green = pixel[1]
            blue =  pixel[2]

            # Transform to grayscale
            gray = (red * 0.299) + (green * 0.587) + (blue * 0.114)

            # Set Pixel in new image
            pixels[i, j] = (int(gray), int(gray), int(gray))

    # Return new image
    return new

In [None]:
# Get classess of all files in AP and Lateral Folders
# regex: match any string that ends in .png, .jpeg, .jpg


def image_search(files, regex):
    ret = []
    for f in files:
        if regex.search(f):
            ret.append(f)
    return ret

def parse_raw_class_names(files, view):
    # returns dictionary:
    # class : [DICT_1,, DICT_2, ..., DICT_k]
    # where DICT = {PATH:path, BASE: os.path.basename, VIEW: AP | LATERAL}
    class_regex = re.compile("(.*?)\s*?(?:\d|-)*\s*?(?:\(\d\)|\d*|copy)+\.(?:png|jpe?g)$", flags = re.IGNORECASE)
    d = {}
    for f in files:
        base = os.path.basename(f)
        s = class_regex.search(base)
        if s:
            class_name = s.group(1).upper()
            # the following makes this run much slower
            class_name = re.sub('\s+','+',class_name)
            class_name = re.sub('-+','+',class_name)
            if class_name not in d.keys():
                d[class_name] = []
            d[class_name].append({'PATH':f, 'BASE': base, 'VIEW': view})
        else:
            raise Exception("{} is not a valid image name".format(f))
    return d

def get_filename_data(path):
    # Expected format:
    # '{CLASS}_{VIEW}_{INDEX}_{RESIZE}_{SET}_{AUGMENTID}.png'
    base = os.path.basename(path)
    _split = '.'.join(base.split('.')[:-1]).split('_')
    _class = _split[0]
    _view = _split[1]
    _index = _split[2]
    _resize = _split[3]
    _set = _split[4]
    _augment = _split[5]
    return {
        'CLASS':_class,
        'VIEW':_view,
        'INDEX':int(_index),
        'RESIZE':int(_resize),
        'SET':int(_set),
        'AUGMENTID':_augment
           }

def make_filename(dictionary):
    _rename_template = '{CLASS}_{VIEW}_{INDEX}_{RESIZE}_{SET}_{AUGMENTID}.{EXTENSION}'
    _name = _rename_template.format(
        CLASS = dictionary['CLASS'],
        VIEW = dictionary['VIEW'],
        INDEX = dictionary['INDEX'],
        RESIZE = dictionary['RESIZE'],
        SET = dictionary['SET'],
        AUGMENTID = dictionary['AUGMENTID'],
        EXTENSION = _IMAGE_EXTENSION
        )
    return _name

def get_final_dict(AP, LATERAL, image_regex):
    ap_files = image_search(glob.glob(AP + "/*"), image_regex)
    lateral_files = image_search(glob.glob(LATERAL + "/*"), image_regex)

    ap_dict = parse_raw_class_names(ap_files, "AP")
    lateral_dict = parse_raw_class_names(lateral_files, "LATERAL")
    
    # merge dictionaries by class
    final_dict = lateral_dict.copy()
    for k in ap_dict.keys():
        if k not in final_dict:
            final_dict[k] = []
        final_dict[k].extend(ap_dict[k])
    return final_dict


In [None]:
#####
# Separate images into directories by class
# Move all images to the top level directory first for processing
#####

# convert path from TOP_LEVEL/VIEW to TOP_LEVEL/CLASS (class is key of dict)
# resize 0 = not resized
_RENAME_DEST = os.path.join(TOP_LEVEL, '1_RENAME')

for key in final_dict.keys():
    index = 1
    for image in final_dict[key]:
        path = image['PATH']
        base = image['BASE']
        view = image['VIEW']
        # rename image to the following:
        # CLASS_VIEW_INDEX_RESIZE
        _class = key
        _view = view
        _index = index
        index += 1
        _resize = 0
        _name = make_filename({
            'CLASS': _class,
            'VIEW': _view,
            'INDEX': _index,
            'RESIZE': _resize,
            'SET': 0,
            'AUGMENTID': 0}
        )

        src = path
        dest = os.path.join(_RENAME_DEST,_name)
        if not os.path.isdir(_RENAME_DEST):
            os.mkdir(_RENAME_DEST)
        shutil.copyfile(src, dest)

In [None]:
#####
# 2. Move files to new directory
# New file structure:
# TOP_LEVEL
#     CLASS
#          CLASS_VIEW_1_RESIZE...
#          CLASS_VIEW_2_RESIZE...
#####
_SRC = os.path.join(TOP_LEVEL, '1_RENAME')
_DEST = os.path.join(TOP_LEVEL, '2_ORGANIZED')
# make dictionary of dictionaries for each class name
classes = {}
for file in image_search(glob.glob(os.path.join(_SRC,'*')), IMAGE_REGEX):
    name = get_filename_data(file)
    if name['CLASS'] not in classes:
        classes[name['CLASS']] = []
    classes[name['CLASS']].append(name)
# for each class
# make new folder within _DEST
# make new file path
# os.rename(src, dest)
tocopy = []
for _class in classes.keys():
    _newdir = os.path.join(_DEST, _class)
    if not os.path.isdir(_newdir):
        os.mkdir(_newdir)
    for item in classes[_class]:
        _name = make_filename(item)
        _oldfilename = os.path.join(_SRC, _name)
        _newfilename = os.path.join(_newdir, _name)
        #print("{} -> {}".format(_oldfilename, _newfilename))
        tocopy.append((_oldfilename, _newfilename))
        
for f in tqdm_notebook(tocopy):
    shutil.copyfile(f[0],f[1])

In [None]:
# add new images from bryan to the repo
# from BRYAN_ADDITIONS
_src = os.path.expanduser("~/Desktop/jmk/ArthroplastyID/HIPS/BRYAN_ADDITIONS/Mixed 4.24.20")
_dest = os.path.expanduser("~/Desktop/jmk/ArthroplastyID/HIPS/2_ORGANIZED")

src_directories = [x[0] for x in os.walk(_src)]
src_files = []
for f in src_directories:
        for x in glob.glob(f+'/*.png'):
            src_files.append(x)
            
# make index directory
index_d = {}
for f in [x[0] for x in os.walk(_dest)][1:]:
    c = f.split(os.sep)[-1]
    index_d[c] = 0
    for x in glob.glob(f+'/*.png'):
        index_d[c] += 1
            
# rename and convert to grayscale
for f in tqdm_notebook(src_files):
    s = f.split(os.sep)
    _class = s[-2]
    if _class not in index_d.keys():
        index_d[_class] = 0
        os.mkdir(os.path.join(_dest, _class))
    index_d[_class] += 1
    _name = f[-1]
    _newname = {'CLASS': _class,
               'VIEW': 'AP' if re.search('(ap)', f, flags = re.IGNORECASE) else 'LATERAL',
                'INDEX': index_d[_class],
                'RESIZE': 0,
                'SET':0,
                'AUGMENTID':0
               }
    _newname = make_filename(_newname)
    shutil.copyfile(f, os.path.join(_dest, _class, _newname))
    

In [None]:
#####
# 2.5 - Make 2_ORGANIZED_AP, which just contains AP images from 2_ORGANIZED
#####
_SRC = os.path.join(TOP_LEVEL, '2_ORGANIZED')
_DEST = os.path.join(TOP_LEVEL, '2_ORGANIZED_AP')
dirs = [x[0] for x in os.walk(_SRC)][1:] # first is src, remove
for d in dirs:
    for image in image_search(glob.glob(os.path.join(d, '*')), IMAGE_REGEX):
        _file = get_filename_data(image)
        if _file['VIEW'] == 'AP':
            _class = os.path.dirname(image).split(os.sep)[-1]
            _newdir = os.path.join(_DEST, _class)
            if not os.path.isdir(_newdir):
                os.makedirs(_newdir)
            shutil.copyfile(image, os.path.join(_newdir, os.path.basename(image)))

In [None]:
#####
# 3. Make spreadsheet of images
#####
column_names = ['Implant', 'Num_AP', 'Num_LATERAL', 'Total', 'GT40']
df = pd.DataFrame(columns = column_names)

_SRC = os.path.join(TOP_LEVEL, '2_ORGANIZED_AP')
dirs = [x[0] for x in os.walk(_SRC)][1:] # first is src, remove
for d in dirs:
    num_ap = 0
    num_lateral = 0
    for image in image_search(glob.glob(os.path.join(d, '*')), IMAGE_REGEX):
        _name = get_filename_data(image)
        if _name['VIEW'] == 'LATERAL':
            num_lateral += 1
        else:
            num_ap += 1
        total = num_ap + num_lateral
    toAdd = {'Implant' : os.path.basename(d).replace('+',' '),
            'Num_AP' : num_ap,
            'Num_LATERAL' : num_lateral,
            'Total': total,
            'GT40': total >= 40}
    df = df.append(toAdd,ignore_index = True)
df.to_csv('2_ORGANIZED_LIST.csv', index = False)

In [None]:
#####
# 4. Convert all images to grayscale and png
#####

_SRC = os.path.join(TOP_LEVEL, '3_GT20')
_DEST = os.path.join(TOP_LEVEL, '4_GRAYSCALE')

if not os.path.isdir(_DEST):
    os.mkdir(_DEST)

calls = []
dirs = [x[0] for x in os.walk(_SRC)][1:]
for d in dirs:
    # 1 is directory
    # 2 is directories (should be empty)
    # 3 is filenames within #1
    for image in image_search(glob.glob(os.path.join(d, '*')), IMAGE_REGEX):
        _input = image
        _class = os.path.dirname(image).split(os.sep)[-1]
        _base = os.path.basename(image).split('.')[:-1]
        _base = '.'.join(_base + [_IMAGE_EXTENSION]) # makes image png for saving
        _output = os.path.join(_DEST, _class, _base)
        calls.append((_input, _output))
        
        if not os.path.isdir(os.path.join(_DEST, _class)):
            os.mkdir(os.path.join(_DEST, _class))
        
def wrapper(call):
    image = _load(call[0])
    gray = convert_grayscale(image)
    gray.save(call[1])
    return 1

pool = Pool(processes=16)
_sum = 0
for _ in tqdm_notebook(pool.imap_unordered(wrapper, calls), total = len(calls)):
    _sum += _
print("Total images: {}".format(str(_sum)))

In [None]:
#####
# There is no #5...
# 6. After moving all included images to a separate directory, time to change the SET variable in the filenames
# SET variable is 0 by default, definitions below
# 0 - default, not set. Synonymous with NULL
# 1 - training, needs to have augmentation done on this (next step)
# 2 - validation, no augmentation needed
# 3 - testing, no augmentation needed
# NOTE: Im going to ignore imbalance for now, max imbalance is 10:1, which might be okay
#####

def split_train_val_test(list_of_images, train_frac = 0.8, val_frac = 0.1, random_state = 42):
    assert (train_frac + val_frac) < 1.0
    from sklearn.model_selection import train_test_split
    test_frac = 1 - (train_frac + val_frac)
    train, rest = train_test_split(list_of_images, test_size = 1-train_frac, random_state = random_state)
    test_from_rest_size = test_frac/(test_frac + val_frac)
    val, test = train_test_split(rest, test_size = test_from_rest_size, random_state = random_state)
    return train, val, test

def copy_train_val_test(files, DESTINATION, IDENTIFIER):
    for f in tqdm_notebook(files):
        _n = get_filename_data(f)
        # get second level directory (for the class name)
        # normpath, sep on os.sep, get second to last
        _class_dir = os.path.normpath(f).split(os.sep)[-2]
        _n['SET'] = IDENTIFIER
        _newname = make_filename(_n)
        _newname = os.path.join(DESTINATION, _class_dir, _newname)
        # make directory if not present
        _newdir = os.path.dirname(_newname)
        if not os.path.isdir(_newdir):
            os.makedirs(_newdir)
        shutil.copyfile(f, _newname)

_SRC = os.path.join(TOP_LEVEL, '4_GRAYSCALE')
_DEST = os.path.join(TOP_LEVEL, '5_EXPERIMENT_NAMES')
dirs = [x[0] for x in os.walk(_SRC)][1:] # first is src, remove
trains = []
vals = []
tests = []
to_run = []
for d in dirs:
    if len(to_run) != 0 and sum(1 for x in to_run if d.endswith(x)) == 0:
        continue
    print('RUNNING ON: {}'.format(d))
    files = [x for x in image_search(glob.glob(os.path.join(d,'*')), IMAGE_REGEX)]
    train, val, test = split_train_val_test(files)
    trains.extend(train)
    vals.extend(val)
    tests.extend(test)
# change train filenames
copy_train_val_test(trains, _DEST, 1)
# change val filenames
copy_train_val_test(vals, _DEST, 2)
# change train filenames
copy_train_val_test(tests, _DEST, 3)


In [None]:
#####
# 7. Test to make sure number of test, val, split images are as expected
#####
_out_string = 'DIRECTORY: {DIR}\n\tTRAIN: {train}\n\tVALIDATE: {val}\n\tTEST: {test}\n\tTOTAL: {total}'
_SRC = os.path.join(TOP_LEVEL, '5_EXPERIMENT_NAMES')
dirs = [x[0] for x in os.walk(_SRC)][1:] # first is src, remove
for d in dirs:
    files = [x for x in image_search(glob.glob(os.path.join(d,'*')), IMAGE_REGEX)]
    num_train = num_val = num_test = 0
    for f in files:
        _n = get_filename_data(f)
        if _n['SET'] == 1:
            num_train += 1
        if _n['SET'] == 2:
            num_val += 1
        if _n['SET'] == 3:
            num_test += 1
    print(_out_string.format(
        DIR = os.path.basename(d),
        train = num_train,
        val = num_val,
        test = num_test,
        total = num_train + num_val + num_test
    ))

In [None]:
#####
# 8. Augment training images, copy all images to 6_EXPERIMENT_FINAL_ORIGINAL_SIZE
#####
_SRC = os.path.join(TOP_LEVEL, '5_EXPERIMENT_NAMES')
_DEST = os.path.join(TOP_LEVEL, "6_EXPERIMENT_FINAL_ORIGINAL_SIZE")

def make_augments(path, num):
    datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        brightness_range=(0.5, 1.2),
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')
    return_images = []
    
    # load image
    img = _load(path)
    img = img_to_array(img)
    img = img.reshape((1,) + img.shape)
    imgen = datagen.flow(img) # yields a generator object
    
    # make {num} random augmentations
    for i in range(0,num):
        nextimage = next(imgen)[0].astype(int)
        aug_image = Image.fromarray(np.uint8(nextimage))
        return_images.append(aug_image)
    return return_images

to_save = []
dirs = [x[0] for x in os.walk(_SRC)][1:] # first is src, remove

to_run = []
for d in tqdm_notebook(dirs):
    if d.startswith('.'):
        continue
    if len(to_run) != 0 and sum(1 for x in to_run if d.endswith(x)) == 0:
        continue
    print("RUNNING ON: {}".format(d))
    files = [x for x in image_search(glob.glob(os.path.join(d,'*')), IMAGE_REGEX)]
    files = [x for x in files if not x.startswith('.')]
    for f in tqdm_notebook(files):
        _n = get_filename_data(f)
        _class_dir = os.path.normpath(f).split(os.sep)[-2]
        _name = make_filename(_n)
        _newname = os.path.join(_DEST, _class_dir, _name)
        to_save.append((f,_newname)) # (src, dest)
        if _n['SET'] == 1: # train -> augment
            augments = make_augments(f, 100)
            _index = 1
            for a in augments:
                _new_n = _n.copy() # shallow
                _n['AUGMENTID'] = _index
                _index += 1
                _new_filename = make_filename(_n)
                _new_dir = os.path.join(_DEST, _class_dir)
                if not os.path.isdir(_new_dir):
                    os.makedirs(_new_dir)
                a.save(os.path.join(_new_dir, _new_filename))
print('Saving files...')
for f in to_save:
    shutil.copyfile(f[0], f[1])
        

In [None]:
#####
# 9. Check number of images
#####
_out_string = 'DIRECTORY: {DIR}\n\tTRAIN: {train}\n\tVALIDATE: {val}\n\tTEST: {test}\n\tTOTAL: {total}'
_SRC = os.path.join(TOP_LEVEL, '6_EXPERIMENT_FINAL_ORIGINAL_SIZE')
dirs = [x[0] for x in os.walk(_SRC)][1:] # first is src, remove
for d in dirs:
    files = [x for x in image_search(glob.glob(os.path.join(d,'*')), IMAGE_REGEX)]
    num_train = num_val = num_test = 0
    for f in files:
        _n = get_filename_data(f)
        if _n['SET'] == 1:
            num_train += 1
        if _n['SET'] == 2:
            num_val += 1
        if _n['SET'] == 3:
            num_test += 1
    print(_out_string.format(
        DIR = os.path.basename(d),
        train = num_train,
        val = num_val,
        test = num_test,
        total = num_train + num_val + num_test
    ))

In [None]:
#####
# 10. Make sure all images are valid
#####
_SRC = os.path.join(TOP_LEVEL, '6_EXPERIMENT_FINAL_ORIGINAL_SIZE')
dirs = [x[0] for x in os.walk(_SRC)][1:] # first is src, remove
all_files = []
for d in dirs:
    files = [x for x in image_search(glob.glob(os.path.join(d,'*')), IMAGE_REGEX)]
    all_files.extend(files)
s = set()
for f in tqdm_notebook(all_files):
    _b = os.path.basename(f)
    _class_dir = os.path.normpath(f).split(os.sep)[-2]
    try:
        _load(f)
    except Exception as e:
        _n = get_filename_data(f)
        s.add((_n['CLASS'], _n['VIEW'],_n['INDEX']))
# some images didn't save correctly / got corrupted, time to prune the weeds
for a in s:
    print(a)

In [None]:
#####
# 11. Resize all images
#####

_SRC = os.path.join(TOP_LEVEL, '6_EXPERIMENT_FINAL_ORIGINAL_SIZE')
_DEST = os.path.join(TOP_LEVEL, '7_EXPERIMENT_FINAL_{}')
dirs = [x[0] for x in os.walk(_SRC)][1:] # first is src, remove

to_run = []

all_files = []
for d in dirs:
    if len(to_run) != 0 and sum(1 for x in to_run if d.endswith(x)) == 0:
        continue
    files = [x for x in image_search(glob.glob(os.path.join(d,'*')), IMAGE_REGEX)]
    all_files.extend(files)

for f in tqdm_notebook(all_files):
    _b = os.path.basename(f)
    _class_dir = os.path.normpath(f).split(os.sep)[-2]
    for i in [224, 299]:
        _newdir = os.path.join(_DEST.format(i), _class_dir)
        if not os.path.isdir(_newdir):
            os.makedirs(_newdir)
        _data = get_filename_data(_b)
        _data['RESIZE'] = i
        _newname = make_filename(_data)
        _newname = os.path.join(_newdir, _newname)
        _resized = _resize(f, DIMENSION=i)
        _resized.save(_newname)

In [None]:
#####
# 12. Make spreadsheet of numbers gathered in final_dict
#####

_src = os.path.join(TOP_LEVEL, "7_EXPERIMENT_FINAL_224")
column_names = ['Implant', 'Num_AP', 'Num_LATERAL', 'Total']
df = pd.DataFrame(columns = column_names)

images = os.walk(_src)
for i in images:
    print(i)
    break
# for k in final_dict.keys():
#     num_ap = 0
#     num_lateral = 0
#     for image in final_dict[k]:
#         if image['VIEW'] == 'AP':
#             num_ap += 1
#         elif image['VIEW'] == 'LATERAL':
#             num_lateral += 1
#         else:
#             assert 1==0
#     toAdd = {'Implant' : k,
#             'Num_AP' : num_ap,
#             'Num_LATERAL' : num_lateral,
#             'Total': num_ap + num_lateral}
#     df = df.append(toAdd,ignore_index = True)

# df.to_csv('ImplantNumbers.csv', index = False)

In [None]:
#####
# 13. Make Keras style train, validate, test directories
#####
_SRC = os.path.join(TOP_LEVEL, "7_EXPERIMENT_FINAL_299")
_DEST = os.path.join(TOP_LEVEL, "8_VGG_224")

dirs = [x[0] for x in os.walk(_SRC)][1:]
all_files = []

# make directory tree as follows:
# data
#     train
#         class_1
#            images
#     validation
#         class_1
#            images

for d in dirs:
    files = [x for x in image_search(glob.glob(os.path.join(d,'*')), IMAGE_REGEX)]
    all_files.extend(files)
    
for image in tqdm_notebook(all_files):
    _class = os.path.dirname(image).split(os.sep)[-1]
    _data = get_filename_data(image)
    if _data['SET'] == 1:
        _type = "train"
    elif _data["SET"] == 2:
        _type = "validation"
    elif _data["SET"] == 3:
        _type = "test"
    else:
        raise Exception("SET not valid")
    if _data['AUGMENTID'] != '0':
        continue
    _newdir = "{}/{}/{}".format(_DEST, _type, _class)
    if not os.path.exists(_newdir):
        os.makedirs(_newdir)
    shutil.copyfile(image, os.path.join(_newdir, os.path.basename(image)))

In [None]:
%matplotlib inline
img = Image.open('/Volumes/MEDIA/ArthoplastyID/HIPS/5_EXPERIMENT_NAMES/BIOMET+ARCOS/BIOMET+ARCOS_AP_11_0_1_0.png')

from keras.preprocessing.image import *
#         rotation_range=40,
#         width_shift_range=0.2,
#         height_shift_range=0.2,
#         brightness_range=(0.5, 1.2),
#         shear_range=0.2,
#         zoom_range=0.2,
#         horizontal_flip=True,
#         fill_mode='nearest')
img = img_to_array(img)
img = img.reshape((1,) + img.shape)  # this is a Numpy array with shape (1, 3, 150, 150)
imgen = ImageDataGenerator(fill_mode='nearest').flow(img)
fig, ax = plt.subplots(1, 10, figsize=(20, 10))
ax = ax.ravel()
ax[0].imshow(img[0].astype(int))
for i in range(1,10):
    ax[i].imshow(next(imgen)[0].astype(int))
    

In [None]:
##########
# Old, one-off functions
##########

def rename_bhr():
    r = re.compile(r'[^\.].*?\.png$', flags = re.IGNORECASE)
    for root, dirs, files in os.walk(dir_path):
        l = [os.path.join(root,x) for x in files if r.match(x)]
    for f in l:
        if "BHR" in f:
            d = os.path.dirname(f)
            n = os.path.basename(f)
            n = n.replace('-',' ')
            n = n.replace('BHR', 'Birmingham')
            new_name = os.path.join(d,n)
            print('{}: {} -> {}'.format(os.path.dirname(f), 
                            os.path.basename(f),
                            os.path.basename(new_name)))
            os.rename(f, new_name)
            
def contour_test():
    image = cv2.imread(image_path)
    blurred = cv2.GaussianBlur(image, (11,11),0)
    thresh = cv2.threshold(blurred, 160, 255, cv2.THRESH_BINARY)[1]
    _show(blurred)
    _show(thresh)

    # convert to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
    _show(binary)
    contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # draw all contours
    image = cv2.drawContours(image, contours, -1, (0, 255, 0), 2)
    _show(_load(image_path))
    _show(_crop(_load(image_path), _smart_crop_coordinates(_load(image_path))))
    

def failed_buffer():
    image = _load(image_path)
    _BLACK = [0,0,0]

    image = cv2.copyMakeBorder(cv2.imread(image_path), 0, 0, 0, 171, 
                               cv2.BORDER_CONSTANT, value = _BLACK)
    image = Image.fromarray(image)

    _show(image)
    coords = auto_crop_coordinates(image)
    print(coords)

    im = auto_crop_main(image, debug = True)

    _show(_crop(image,coords))

In [None]:
r = re.compile(r'[^\.].*?\.png$', flags = re.IGNORECASE)
for root, dirs, files in os.walk(LATERAL):
    l = [os.path.join(root,x) for x in files if r.match(x)]
for f in l:
    if "M:L".upper() in f.upper():
        d = os.path.dirname(f)
        n = os.path.basename(f)
        n = n.replace('-',' ')
        n = re.sub('M:L', 'ML',f, flags = re.IGNORECASE)
        new_name = os.path.join(d,n)
        print('{}: {} -> {}'.format(os.path.dirname(f), 
                            os.path.basename(f),
                            os.path.basename(new_name)))
#         print(new_name)
        os.rename(f, new_name)