In [1]:
import os
import glob
from PIL import Image
import shutil
import random
import numpy as np
import tarfile

ROOT_DIR = 'D:\\datasets\\'

In [2]:
def create_subimages(image_path, destination_dir, patch_width=160, patch_height=160):
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    img = Image.open(image_path)
    img_width, img_height = img.size
    img_height -= img_height % patch_height
    img_width -= img_width % patch_width
    k = 1
    for i in range(0, img_height, patch_height):
        for j in range(0, img_width, patch_width):
            box = (j, i, j + patch_width, i + patch_height)
            patch = Image.new('RGB', (patch_height, patch_width), 255)
            patch.paste(img.crop(box))
            patch.save(f'{destination_dir}{image_name}_{k}.jpg')
            k += 1

def copy_image(image_path, destination_dir):
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    img = Image.open(image_path)
    img.save(f'{destination_dir}{image_name}.jpg')

def create_compressed(img, destination_path, level, rotation):
    if rotation:
        img = img.rotate(rotation)
    img.save(destination_path, format='jpeg', quality=level)

def create_noisified(img, destination_path, level, rotation):
    if rotation:
        img = img.rotate(rotation)
    img = np.array(img).astype(np.float32)
    img /= 255.
    img += level * np.random.normal(size=img.shape)
    img = np.clip(img, 0., 1.)
    img = Image.fromarray(np.uint8(img * 255))
    img.save(destination_path)

def create_downscaled(img, destination_path, factor, rotation):
    if rotation:
        img = img.rotate(rotation)
    img_width, img_height = img.size
    img = img.resize((int(img_width / factor), int(img_height / factor)), Image.BICUBIC)
    img = img.resize((img_width, img_height), Image.BICUBIC)
    img.save(destination_path)

def preprocess_bsds500(tar_filename):
    """
    Complete pre-process of the whole dataset and saving in a tar file.
    Not optimized for easier understanding (run only once anyway).
    """
    dataset_dir = f'{ROOT_DIR}bsds500\\'
    temp_dir = f'{ROOT_DIR}temp\\'
    # Delete and create temp dir
    if os.path.exists(temp_dir):
        shutil.rmtree(temp_dir)
    # Write sub-images in temp/targets/train dir and copy others in corresponding temp/dirs
    for local_dir in ['train', 'test', 'val']:
        destination_dir = f'{temp_dir}targets\\{local_dir}\\'
        os.makedirs(destination_dir)
        igms_paths = glob.glob(f'{dataset_dir}{local_dir}\\*.jpg')
        for i, image_path in enumerate(igms_paths):
            if local_dir == 'train':
                create_subimages(image_path, destination_dir)
            else:
                copy_image(image_path, destination_dir)
    # Preprocess sub-images, create a downscaled, noisified and compressed version for each train and val image
    for local_dir in ['train', 'val']:
        destination_dir = f'{temp_dir}data\\{local_dir}\\'
        os.makedirs(destination_dir)
        igms_paths = glob.glob(f'{temp_dir}targets\\{local_dir}\\*.jpg')
        for image_path in igms_paths:
            image_name = os.path.splitext(os.path.basename(image_path))[0]
            img = Image.open(image_path)
            create_noisified(img, f'{destination_dir}{image_name}_n.jpg',
                              [.15, .25, .50][random.randint(0, 2)], random.randint(0, 3) * 90 if local_dir == 'train' else 0)
            create_downscaled(img, f'{destination_dir}{image_name}_d.jpg',
                              random.randint(2, 4), random.randint(0, 3) * 90 if local_dir == 'train' else 0)
            create_compressed(img, f'{destination_dir}{image_name}_c.jpg',
                              random.randint(1, 4) * 10, random.randint(0, 3) * 90 if local_dir == 'train' else 0)
    # Preprocess all test sets
    igms_paths = glob.glob(f'{temp_dir}targets\\test\\*.jpg')
    for image_path in igms_paths:
        image_name = os.path.splitext(os.path.basename(image_path))[0]
        img = Image.open(image_path)
        for local_dir in ['noise_15', 'noise_25', 'noise_50',
                      'upscale_2', 'upscale_3', 'upscale_4',
                      'compress_10', 'compress_20', 'compress_30', 'compress_40']:
            destination_dir = f'{temp_dir}data\\test\\{local_dir}\\'
            if not os.path.exists(destination_dir):
                os.makedirs(destination_dir)
            action, level = local_dir.split('_')
            if action == 'noise':
                create_noisified(img, f'{destination_dir}{image_name}.jpg', int(level) / 100., 0)
            if action == 'upscale':
                create_downscaled(img, f'{destination_dir}{image_name}.jpg', int(level), 0)
            if action == 'compress':
                create_compressed(img, f'{destination_dir}{image_name}.jpg', int(level), 0)
    # Create tar file with temp dir
    with tarfile.open(tar_filename, "w:gz") as tar:
        tar.add(temp_dir, arcname=os.path.basename(temp_dir))
    # Delete temp dir
    shutil.rmtree(temp_dir)

def load_bsds500(batch_size):
    tar_filename = f'{ROOT_DIR}bsds500.tgz'
    # If tar file does not exists
    if not os.path.exists(tar_filename):
        preprocess_bsds500(tar_filename)
    # Create tf datasets from tar file

In [3]:
load_bsds500(32)