In [7]:
import os
import glob
import pandas as pd
import cv2
from joblib import Parallel, delayed

In [38]:
CPU_COUNT = os.cpu_count()

CURR_DIR = os.getcwd()
DATA_DIR = os.path.abspath('../data')
INRIA_DIR = os.path.join(DATA_DIR, 'AerialImageDataset')

INRIA_IMAGES_PATH = os.path.join(INRIA_DIR, 'images')
INRIA_LABELS_PATH = os.path.join(INRIA_DIR, 'gt')

INRIA_EXTRACTED_PATH = os.path.join(DATA_DIR, 'PreprocessedAerialImageDataset')


# Make dataframe with images and masks

In [39]:
images_paths = glob.glob(os.path.join(INRIA_IMAGES_PATH, '*.tif'))
images_paths = [x.replace('\\', '/') for x in images_paths]
images_names = [x.split('/')[-1].split('.')[0] for x in images_paths]

In [40]:
masks_paths = glob.glob(os.path.join(INRIA_LABELS_PATH, '*.tif'))
masks_paths = [x.replace('\\', '/') for x in masks_paths]
masks_names = [x.split('/')[-1].split('.')[0] for x in masks_paths]

In [41]:
images_dict = {'images_paths': images_paths, 'images_names': images_names}
masks_dict = {'masks_paths': masks_paths, 'masks_names': masks_names}

In [42]:
df_images = pd.DataFrame(data=images_dict)
df_masks = pd.DataFrame(data=masks_dict)
df_inria = pd.merge(df_images, df_masks, how='inner', left_on='images_names', right_on='masks_names')
df_inria

Unnamed: 0,images_paths,images_names,masks_paths,masks_names


# Tile images and masks

In [43]:
INRIA_TILED_IMAGES_PATH = os.path.join(INRIA_EXTRACTED_PATH, 'images_tiled')
INRIA_TILED_MASKS_PATH = os.path.join(INRIA_EXTRACTED_PATH, 'masks_tiled')

if not os.path.exists(INRIA_TILED_IMAGES_PATH):
    os.makedirs(INRIA_TILED_IMAGES_PATH)

if not os.path.exists(INRIA_TILED_MASKS_PATH):
    os.makedirs(INRIA_TILED_MASKS_PATH)

In [44]:
def augment_image_and_mask(image_path, mask_path, image_name):
    img = cv2.imread(image_path)
    mask = cv2.imread(mask_path)
    h_image, w_image = img.shape[:2]
    h_mask, w_mask = mask.shape[:2]
    assert h_image == h_mask and w_image == w_mask

    h, w = h_image, w_image
    N = 10
    h_tile, w_tile = h // N, w // N

    tiles = []
    for y in range(N):
        y_start = y * h_tile
        y_end = y_start + h_tile

        y_start = int(y_start)
        y_end = int(y_end)

        for x in range(N):
            x_start = x * w_tile
            x_end = x_start + w_tile

            x_start = int(x_start)
            x_end = int(x_end)

            img_tile = img[y_start:y_end, x_start:x_end]
            mask_tile = mask[y_start:y_end, x_start:x_end]
            
            tiles.append((img_tile, mask_tile))

    for i in range(len(tiles)):
        cv2.imwrite(os.path.join(INRIA_TILED_IMAGES_PATH, f'{image_name}_image_{i}.png'), img=tiles[i][0])
        cv2.imwrite(os.path.join(INRIA_TILED_MASKS_PATH, f'{image_name}_mask_{i}.png'), img=tiles[i][1])


In [45]:
_ = Parallel(n_jobs=CPU_COUNT)(delayed(augment_image_and_mask)(df_inria.iloc[i]['images_paths'], df_inria.iloc[i]['masks_paths'], df_inria.iloc[i]['images_names']) for i in range(len(df_inria)))

# Make file with image tile - mask tile mapping. It will be used in model training

In [46]:
images_paths = glob.glob(os.path.join(INRIA_TILED_IMAGES_PATH, '*.png'))
images_paths = [x.replace('\\', '/') for x in images_paths]
images_names = [x.split('/')[-1].split('_')[0] for x in images_paths]

In [47]:
masks_paths = glob.glob(os.path.join(INRIA_TILED_MASKS_PATH, '*.png'))
masks_paths = [x.replace('\\', '/') for x in masks_paths]
masks_names = [x.split('/')[-1].split('_')[0] for x in masks_paths]

In [48]:
images_dict = {'images_paths': images_paths, 'images_names': images_names}
masks_dict = {'masks_paths': masks_paths, 'masks_names': masks_names}

In [49]:
len(masks_names)

0

In [50]:
df_images = pd.DataFrame(data=images_dict)
df_masks = pd.DataFrame(data=masks_dict)
df_inria = pd.concat([df_images, df_masks], axis=1)
df_inria

Unnamed: 0,images_paths,images_names,masks_paths,masks_names


In [51]:
df_inria.to_csv('../inria_tiled.csv')