In [7]:
# author: Lino Grossano lino.grossano@gmail.com
# author: Stefano Manzini stefano.manzini@gmail.com
from PIL import Image
from pathlib import Path
from glob import glob
import os
import numpy as np

In [8]:
image_name = "wallhaven-g78rvl.jpg"
folder_name = "imgs_to_crop"

In [9]:
def load_pic(image_name, folder_name):
    
    """
    Assumes a subdirectory <folder name> containing the
    image <image_name> to load.
    
    params
    ======
    
    image_name: <str>
    folder_name: <str>
    """
    
    fullpath = Path(folder_name, image_name)
    picture = Image.open(fullpath)
    return picture

In [10]:
def crop_in_tiles_and_save(
    image, img_filename=None, tile_size=28, shift=0, img_type="jpg",
    ):
    
    """
    this function crops an image in several tiles; every tile
    is a square of *tile_size* pixels. *shift* = 0 crops the image
    from coordinates (0,0), while *shift* = tile_size/2
    (or other values) crops tiles that overlaps edges.
    Every set of tiles is saved in a separate folder.
    
    
    params
    ======
    
    image: a Pillow open image
    img_filename: <str> name of the input image. If None, we try to get it
    tile_size: <int> pixels; size of the tile side
    shift: <int>: the offset from 0,0 in pixels
    img_type: <str>: desired output filename
    """
    
    assert isinstance(tile_size, int)
    assert isinstance(shift, int)
    assert isinstance(img_type, str)
    
    if img_filename is None:
        try: # for UNIX:
            rev_pos = image.filename[::-1].find("/")
            if rev_pos == -1:
                raise TypeError # we're on Windows            
            pos = len(image.filename) - rev_pos
            img_filename = image.filename[pos:]
        except: # for Windows:
            rev_pos = image.filename[::-1].find("\\")
            pos = len(image.filename) - rev_pos
            img_filename = image.filename[pos:]
        
        try:
            assert "." in img_filename
            assert len(img_filename) > 1
        except:
            print("**Warning**: something went wrong guessing the image filename.")
            print(f"I think this is the image name: {img_filename}")
            print(f"This is the full path: {image.filename}")
            img_filename = "image.jpg"
    
    width, height = image.size
       
    folder = Path(img_filename + "_tiles")
    folder.mkdir(exist_ok=True) #skip errors if folder already exists
    storing_folder = str(folder)

    #calculate coordinates of every tile
    for x in range (0+shift, width, tile_size):
        if width - x < tile_size:
            continue
            
        for y in range (0+shift, height, tile_size):
            if height - y < tile_size:
                continue
            
            # tile coord ===
            tile_coord = (
                x, y, # upper left coords
                x + tile_size, y + tile_size # lower right coords
            )
            
            # output filename and path ===
            xcoord = "%04d" % x
            ycoord = "%04d" % y
            offset = "" if shift == 0 else str(shift)
            outfile_name = f"{img_filename}_{xcoord}_{ycoord}_offset_{offset}.{img_type}"
            full_outpath =  Path(storing_folder, outfile_name)
            
            image.crop(tile_coord).save(full_outpath)

In [11]:
def crop_in_tiles(
    image, tile_size=28, shift=0):
    
    """
    This function crops an image in several tiles
    tile_size × tile_size squares, yielding a tile
    every iteration.
    
    If the input image is not a perfect multiple of
    a(tile_size) × b(tile_size), non-square tiles are NOT
    YIELDED.
    
    params
    ======
    
    image: a Pillow open image
    tile_size: <int> pixels; size of the tile side
    shift: <int>: the offset from 0,0 in pixels
    """
    
    assert isinstance(tile_size, int)
    assert isinstance(shift, int)
    
    width, height = image.size
       
    #calculate coordinates of every tile
    for x in range (0+shift, width, tile_size):
        if width - x < tile_size:
            continue
            
        for y in range (0+shift, height, tile_size):
            if height - y < tile_size:
                continue
            
            # tile coord ===
            tile_coord = (
                x, y, # upper left coords
                x + tile_size, y + tile_size # lower right coords
            )
            
            tile = image.crop(tile_coord)
            yield tile

In [12]:
pic = load_pic(image_name, folder_name)

In [13]:
crop_in_tiles_and_save(pic) # 3942 files (OK!)

In [14]:
class Dataset:

    def __init__(self, folder_name, tile_size=28, clean_tag="ISO200", noise_tag="ISO1600"):
        self.folder_name = folder_name
        self.tile_size = tile_size
        
        
        # loading image names from dataset directory ===
        # for Python < 3.10 with limited glob functionality
        self.basedir = Path(os.getcwd())
        os.chdir(folder_name)
        img_files = glob("*.JPG")
        os.chdir(self.basedir)
        
        self.clean_pics_filenames = sorted([x for x in img_files if clean_tag in x])
        self.noise_pics_filenames = sorted([x for x in img_files if noise_tag in x])
        
        assert len(self.clean_pics_filenames) == len(self.noise_pics_filenames)
        
        self.clean_pics = [self._load_pic(x, folder_name) for x in self.clean_pics_filenames]
        self.noise_pics = [self._load_pic(x, folder_name) for x in self.noise_pics_filenames]

    
    def _load_pic(self, image_name, folder_name):

        """
        Assumes a subdirectory <folder name> containing the
        image <image_name> to load.

        params
        ======

        image_name: <str>
        folder_name: <str>
        """

        fullpath = Path(folder_name, image_name)
        picture = Image.open(fullpath)
        return picture
    

    def _crop_in_tiles(self, image, tile_size=28, shift=0):

        """
        This function crops an image in several tiles
        tile_size × tile_size squares, yielding a tile
        every iteration.

        If the input image is not a perfect multiple of
        a(tile_size) × b(tile_size), non-square tiles are NOT
        YIELDED.

        params
        ======

        image: a Pillow open image
        tile_size: <int> pixels; size of the tile side
        shift: <int>: the offset from 0,0 in pixels
        """

        assert isinstance(tile_size, int)
        assert isinstance(shift, int)

        width, height = image.size

        #calculate coordinates of every tile
        for x in range (0+shift, width, tile_size):
            if width - x < tile_size:
                continue

            for y in range (0+shift, height, tile_size):
                if height - y < tile_size:
                    continue

                # tile coord ===
                tile_coord = (
                    x, y, # upper left coords
                    x + tile_size, y + tile_size # lower right coords
                )

                tile = image.crop(tile_coord)
                yield tile
        

    def _split_into_channels(self, image, as_array=False):
        
        if not as_array:
            return [image.getchannel(x) for x in "RGB"]
        else:
            return [np.array(image.getchannel(x)) for x in "RGB"]
    

    def make_dataset(self):        
        # these will store tile1_R, tile1_G, tile1_B, tile2_R, tile2_G, ..
        self.clean_tiles_ = []
        self.noise_tiles_ = []
        
        for clean in self.clean_pics:
            tiles = self._crop_in_tiles(clean, tile_size=self.tile_size,)
            for tile in tiles:
                self.clean_tiles_.extend(self._split_into_channels(tile, as_array=True))
        
        for noise in self.noise_pics:
            tiles = self._crop_in_tiles(noise, tile_size=self.tile_size,)
            for tile in tiles:
                self.noise_tiles_.extend(self._split_into_channels(tile, as_array=True))

In [15]:
ds = Dataset("standard_dataset", tile_size=56)

In [16]:
ds.clean_pics_filenames

['dataset_0001_ISO200.JPG',
 'dataset_0002_ISO200.JPG',
 'dataset_0003_ISO200.JPG',
 'dataset_0004_ISO200.JPG',
 'dataset_0005_ISO200.JPG',
 'dataset_0006_ISO200.JPG',
 'dataset_0007_ISO200.JPG',
 'dataset_0008_ISO200.JPG']

In [17]:
ds.noise_pics_filenames

['dataset_0001_ISO1600.JPG',
 'dataset_0002_ISO1600.JPG',
 'dataset_0003_ISO1600.JPG',
 'dataset_0004_ISO1600.JPG',
 'dataset_0005_ISO1600.JPG',
 'dataset_0006_ISO1600.JPG',
 'dataset_0007_ISO1600.JPG',
 'dataset_0008_ISO1600.JPG']

In [18]:
ds.make_dataset()

In [19]:
len(ds.clean_tiles_)

134688

In [20]:
len(ds.noise_tiles_)

134688

In [21]:
tile = ds.noise_tiles_[0]

In [22]:
tile

array([[ 5,  6, 13, ..., 38, 36, 32],
       [ 5,  8, 17, ..., 36, 38, 41],
       [16, 20, 27, ..., 40, 39, 38],
       ...,
       [29, 29, 35, ..., 42, 49, 51],
       [29, 33, 36, ..., 40, 33, 41],
       [27, 29, 34, ..., 37, 23, 31]], dtype=uint8)

In [23]:
tile.shape

(56, 56)

In [24]:
#ghe sem!