# Preprocess Dataset/Generate Dataset

In [1]:
import os
import torch
import pathlib
import PIL
from fastai.vision import *
from torchvision import transforms

In [2]:
DIV2K_path = pathlib.Path('.').parent.absolute()

## Get Image Info

In [3]:
# Training Images
train_HR = DIV2K_path/'DIV2K_train_HR'
train_image_list = ImageList.from_folder(train_HR)
train_image_name_list = [img_path.relative_to(train_HR) for img_path in train_image_list.items]
shapes = [PIL.Image.open(img_path).size for img_path in train_image_list.items]
print(f"min dimension of all images={torch.min(torch.tensor(shapes))}")

min dimension of all images=648


In [4]:
# Validation Images
valid_HR = DIV2K_path/'DIV2K_valid_HR'
valid_image_list = ImageList.from_folder(valid_HR)
valid_image_name_list = [img_path.relative_to(valid_HR) for img_path in valid_image_list.items]
shapes = [PIL.Image.open(img_path).size for img_path in valid_image_list.items]
print(f"min dimension of all images={torch.min(torch.tensor(shapes))}")

min dimension of all images=816


## Crop HR Images and save

In [5]:
high_res = 600
low_res_factor = 4

In [6]:
train_HR_crop = DIV2K_path/f'DIV2K_train_HR_crop_{high_res}'
train_HR_crop.mkdir(parents=True, exist_ok=True)
valid_HR_crop = DIV2K_path/f'DIV2K_valid_HR_crop_{high_res}'
valid_HR_crop.mkdir(parents=True, exist_ok=True)

In [7]:
def crop_HR_image_and_save(src_path, dest_path, image_name_list):
    for image_name in image_name_list:
        src_image_path = src_path/image_name
        target_image_path = dest_path/image_name
        transformed_img = HR_crop_transforms(PIL.Image.open(src_image_path))
        transformed_img.save(target_image_path)

In [8]:
class HR_Cropper(object):
    def __init__(self, src_path, dest_path, high_res):
        self.src_path = src_path
        self.dest_path = dest_path
        self.high_res = high_res

    def __call__(self, image_name, i):
        src_image_path = self.src_path/image_name
        target_image_path = self.dest_path/image_name
        src_img = PIL.Image.open(src_image_path)
        transformed_img = transforms.Compose([
            transforms.RandomCrop(min(src_img.size)),
            transforms.RandomCrop(self.high_res, pad_if_needed=True, padding_mode='reflect'),
        ])(src_img)
        transformed_img.save(target_image_path)

In [9]:
# crop and save images
parallel(HR_Cropper(valid_HR, valid_HR_crop, high_res), valid_image_name_list)
parallel(HR_Cropper(train_HR, train_HR_crop, high_res), train_image_name_list)

In [10]:
# Cropped Train Images
train_HR_crop_image_list = ImageList.from_folder(train_HR_crop)
train_HR_crop_image_name_list = [img_path.relative_to(train_HR_crop) for img_path in train_HR_crop_image_list.items]
shapes = [PIL.Image.open(img_path).size for img_path in train_HR_crop_image_list.items]
print(f"min dimension of all images={torch.min(torch.tensor(shapes))}")

min dimension of all images=600


In [11]:
# Cropped Train Images
valid_HR_crop_image_list = ImageList.from_folder(valid_HR_crop)
valid_HR_crop_image_name_list = [img_path.relative_to(valid_HR_crop) for img_path in valid_HR_crop_image_list.items]
shapes = [PIL.Image.open(img_path).size for img_path in valid_HR_crop_image_list.items]
print(f"min dimension of all images={torch.min(torch.tensor(shapes))}")

min dimension of all images=600


## Generate Low Resolution Data

In [12]:
low_res_size = high_res // low_res_factor
print(f"low_res_size={low_res_size}")

low_res_size=150


In [13]:
# create destination directory
train_LR = DIV2K_path/f'DIV2K_train_LR_{low_res_size}'
train_LR.mkdir(parents=True, exist_ok=True)
valid_LR = DIV2K_path/f'DIV2K_valid_LR_{low_res_size}'
valid_LR.mkdir(parents=True, exist_ok=True)

In [14]:
class Downscaler(object):
    def __init__(self, src_path, dest_path, low_res_size):
        self.src_path = src_path
        self.dest_path = dest_path
        self.low_res_size = low_res_size

    def __call__(self, image_name, i):
        src_image_path = self.src_path/image_name
        target_image_path = self.dest_path/image_name
        src_img = PIL.Image.open(src_image_path)
        downscaled_img = src_img.resize((self.low_res_size, self.low_res_size), resample=PIL.Image.BILINEAR).convert('RGB')
        downscaled_img.save(target_image_path)

In [15]:
# transform and save images
parallel(Downscaler(valid_HR_crop, valid_LR, low_res_size), valid_HR_crop_image_name_list)
parallel(Downscaler(train_HR_crop, train_LR, low_res_size), train_HR_crop_image_name_list)