In [1]:
import os
import glob
import pandas as pd
import cv2
from joblib import Parallel, delayed

In [2]:
CPU_COUNT = os.cpu_count()

CURR_DIR = os.getcwd()
INRIA_DIR = os.path.abspath('../data/inria')
INRIA_EXTRACTED_PATH = os.path.join(INRIA_DIR, 'data')
INRIA_IMAGES_PATH = os.path.join(INRIA_EXTRACTED_PATH, 'images')
INRIA_LABELS_PATH = os.path.join(INRIA_EXTRACTED_PATH, 'labels')

# Make dataframe with images and masks

In [3]:
images_paths = glob.glob(os.path.join(INRIA_IMAGES_PATH, '*.tif'))
images_paths = [x.replace('\\', '/') for x in images_paths]
images_names = [x.split('/')[-1].split('.')[0] for x in images_paths]

In [4]:
masks_paths = glob.glob(os.path.join(INRIA_LABELS_PATH, '*.tif'))
masks_paths = [x.replace('\\', '/') for x in masks_paths]
masks_names = [x.split('/')[-1].split('.')[0] for x in masks_paths]

In [5]:
images_dict = {'images_paths': images_paths, 'images_names': images_names}
masks_dict = {'masks_paths': masks_paths, 'masks_names': masks_names}

In [6]:
df_images = pd.DataFrame(data=images_dict)
df_masks = pd.DataFrame(data=masks_dict)
df_inria = pd.merge(df_images, df_masks, how='inner', left_on='images_names', right_on='masks_names')
df_inria

Unnamed: 0,images_paths,images_names,masks_paths,masks_names
0,C:/Users/MZ/Documents/python-projects/housing-...,austin1,C:/Users/MZ/Documents/python-projects/housing-...,austin1
1,C:/Users/MZ/Documents/python-projects/housing-...,austin10,C:/Users/MZ/Documents/python-projects/housing-...,austin10
2,C:/Users/MZ/Documents/python-projects/housing-...,austin11,C:/Users/MZ/Documents/python-projects/housing-...,austin11
3,C:/Users/MZ/Documents/python-projects/housing-...,austin12,C:/Users/MZ/Documents/python-projects/housing-...,austin12
4,C:/Users/MZ/Documents/python-projects/housing-...,austin13,C:/Users/MZ/Documents/python-projects/housing-...,austin13
...,...,...,...,...
175,C:/Users/MZ/Documents/python-projects/housing-...,vienna5,C:/Users/MZ/Documents/python-projects/housing-...,vienna5
176,C:/Users/MZ/Documents/python-projects/housing-...,vienna6,C:/Users/MZ/Documents/python-projects/housing-...,vienna6
177,C:/Users/MZ/Documents/python-projects/housing-...,vienna7,C:/Users/MZ/Documents/python-projects/housing-...,vienna7
178,C:/Users/MZ/Documents/python-projects/housing-...,vienna8,C:/Users/MZ/Documents/python-projects/housing-...,vienna8


# Tile images and masks

In [7]:
INRIA_TILED_IMAGES_PATH = os.path.join(INRIA_EXTRACTED_PATH, 'images_tiled')
INRIA_TILED_MASKS_PATH = os.path.join(INRIA_EXTRACTED_PATH, 'masks_tiled')

if not os.path.exists(INRIA_TILED_IMAGES_PATH):
    os.mkdir(INRIA_TILED_IMAGES_PATH)

if not os.path.exists(INRIA_TILED_MASKS_PATH):
    os.mkdir(INRIA_TILED_MASKS_PATH)

In [8]:
def augment_image_and_mask(image_path, mask_path, image_name):
    img = cv2.imread(image_path)
    mask = cv2.imread(mask_path)
    h_image, w_image = img.shape[:2]
    h_mask, w_mask = mask.shape[:2]
    assert h_image == h_mask and w_image == w_mask

    h, w = h_image, w_image
    N = 10
    h_tile, w_tile = h // N, w // N

    tiles = []
    for y in range(N):
        y_start = y * h_tile
        y_end = y_start + h_tile

        # Index must be int!
        y_start = int(y_start)
        y_end = int(y_end)

        for x in range(N):
            x_start = x * w_tile
            x_end = x_start + w_tile

            x_start = int(x_start)
            x_end = int(x_end)

            # Here, save the splitted image
            img_tile = img[y_start:y_end, x_start:x_end]
            mask_tile = mask[y_start:y_end, x_start:x_end]
            
            tiles.append((img_tile, mask_tile))

    for i in range(len(tiles)):
        cv2.imwrite(os.path.join(INRIA_TILED_IMAGES_PATH, f'{image_name}_image_{i}.tif'), img=tiles[i][0])
        cv2.imwrite(os.path.join(INRIA_TILED_MASKS_PATH, f'{image_name}_mask_{i}.tif'), img=tiles[i][1])


In [9]:
_ = Parallel(n_jobs=CPU_COUNT)(delayed(augment_image_and_mask)(df_inria.iloc[i]['images_paths'], df_inria.iloc[i]['masks_paths'], df_inria.iloc[i]['images_names']) for i in range(len(df_inria)))

# Make file with image tile - mask tile mapping. It will be used in model training

In [10]:
images_paths = glob.glob(os.path.join(INRIA_TILED_IMAGES_PATH, '*.tif'))
images_paths = [x.replace('\\', '/') for x in images_paths]
images_names = [x.split('/')[-1].split('_')[0] for x in images_paths]

In [11]:
masks_paths = glob.glob(os.path.join(INRIA_TILED_MASKS_PATH, '*.tif'))
masks_paths = [x.replace('\\', '/') for x in masks_paths]
masks_names = [x.split('/')[-1].split('_')[0] for x in masks_paths]

In [12]:
images_dict = {'images_paths': images_paths, 'images_names': images_names}
masks_dict = {'masks_paths': masks_paths, 'masks_names': masks_names}

In [13]:
len(masks_names)

18000

In [14]:
df_images = pd.DataFrame(data=images_dict)
df_masks = pd.DataFrame(data=masks_dict)
df_inria = pd.concat([df_images, df_masks], axis=1)
df_inria

Unnamed: 0,images_paths,images_names,masks_paths,masks_names
0,C:/Users/MZ/Documents/python-projects/housing-...,austin10,C:/Users/MZ/Documents/python-projects/housing-...,austin10
1,C:/Users/MZ/Documents/python-projects/housing-...,austin10,C:/Users/MZ/Documents/python-projects/housing-...,austin10
2,C:/Users/MZ/Documents/python-projects/housing-...,austin10,C:/Users/MZ/Documents/python-projects/housing-...,austin10
3,C:/Users/MZ/Documents/python-projects/housing-...,austin10,C:/Users/MZ/Documents/python-projects/housing-...,austin10
4,C:/Users/MZ/Documents/python-projects/housing-...,austin10,C:/Users/MZ/Documents/python-projects/housing-...,austin10
...,...,...,...,...
17995,C:/Users/MZ/Documents/python-projects/housing-...,vienna9,C:/Users/MZ/Documents/python-projects/housing-...,vienna9
17996,C:/Users/MZ/Documents/python-projects/housing-...,vienna9,C:/Users/MZ/Documents/python-projects/housing-...,vienna9
17997,C:/Users/MZ/Documents/python-projects/housing-...,vienna9,C:/Users/MZ/Documents/python-projects/housing-...,vienna9
17998,C:/Users/MZ/Documents/python-projects/housing-...,vienna9,C:/Users/MZ/Documents/python-projects/housing-...,vienna9


In [15]:
df_inria.to_csv('../inria_tiled.csv')