# Imgage Handler

Create 400 x 400 images from higher resolution images.
Filter out any images that contain empty space.

In [31]:
# some basic imports

import time
import math
import os
import re
import cv2
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
from random import sample
from PIL import Image
import gc

In [32]:
def convert_to_png(path, filetype, mode='RGB'):
    directory = os.listdir(path)
    dir_size = len(directory)
    print('Size of directory: ' + str(dir_size))
    i = 0
    for f in directory:
        file_path = os.path.join(path, f)
        file_name = os.path.basename(f).removesuffix(filetype)
        save_path = os.path.join(path, 'png', file_name + '.png')
        image = Image.open(file_path).convert(mode)
        image.save(save_path)
        i += 1
        print('Files converted: ' + str(i) + ' / ' + str(dir_size))

def load_all_from_path(path, block = None):
    # loads all HxW .pngs contained in path as a 4D np.array of shape (n_images, H, W, 3)
    # images are loaded as floats with values in the interval [0., 1.]
    files = sorted(glob(path + '\*.png'))
    if block is None:
        print('block_size_all: ' + str(len(files)))
        return np.stack([np.array(Image.open(f)) for f in files]).astype(np.float32) / 255.
    else:
        print('block_size: ' + str(len(files[block[0]:min(block[1],len(files)-1)])))
        return np.stack([np.array(Image.open(f)) for f in files[block[0]:min(block[1],len(files)-1)]]).astype(np.float32) / 255.

def show_first_n(imgs, masks, n=5):
    # visualizes the first n elements of a series of images and segmentation masks
    imgs_to_draw = min(5, len(imgs))
    fig, axs = plt.subplots(2, imgs_to_draw, figsize=(18.5, 6))
    for i in range(imgs_to_draw):
        axs[0, i].imshow(imgs[i])
        axs[1, i].imshow(masks[i])
        axs[0, i].set_title(f'Image {i}')
        axs[1, i].set_title(f'Mask {i}')
        axs[0, i].set_axis_off()
        axs[1, i].set_axis_off()
    plt.show()



In [33]:
def count_white_im(im_np):
    # count how many pixles are white
    count = 0
    for i in range(0, len(im_np), CHECK_STEP_SIZE):
        for j in range(0, len(im_np[i]), CHECK_STEP_SIZE):
            if np.array_equal(im_np[i,j], [1., 1., 1.]):
                count += 1
    return count

def count_white_mask(mask_np):
    # count how many pixles are white
    count = 0
    for i in range(0, len(mask_np), CHECK_STEP_SIZE):
        for j in range(0, len(mask_np[i]), CHECK_STEP_SIZE):
            if mask_np[i,j] == 1.:
                count += 1
    return count

def split_images(images, masks, target_train_path, target_mask_path, iteration):
    print(len(images))
    total_accepted = total_rejected = 0

    for idx in range(len(images)):
        accepted = rejected = 0
        for i in range(len(SLICES)):
            for j in range(len(SLICES)):
                im_np = images[idx, SLICES[i],SLICES[j]]
                mask_np = masks[idx, SLICES[i],SLICES[j]] * 255
                if count_white_mask(mask_np) < WHITE_THRESHOLD_MASK or count_white_im(im_np) > WHITE_THRESHOLD_IM:
                    rejected += 1
                    continue
                accepted += 1
                file_idx = idx + iteration * BLOCK_SIZE
                file_name = str(file_idx) + '_' + str(i) + '_' + str(j) + '.png'
                im_png = Image.fromarray((im_np * 255).astype('uint8'))
                im_png.save(os.path.join(target_train_path, file_name))
                mask_png = Image.fromarray((mask_np * 255).astype('uint8'))
                mask_png.save(os.path.join(target_mask_path, file_name))
        print('im_ind: ' + str(idx) + '/' + str(len(images)-1) + ' | accepted: ' + str(accepted) + ' | rejected: ' + str(rejected))
        total_accepted += accepted
        total_rejected += rejected

    print('total_accepted: ' + str(total_accepted) + ' | total_rejected: ' + str(total_rejected))

In [34]:
BLOCK_SIZE = 400
SLICES = [slice(100,500), slice(550,950), slice(1000,1400)]
WHITE_THRESHOLD_IM = 5
WHITE_THRESHOLD_MASK = 50
CHECK_STEP_SIZE = 10

In [35]:
data_path = 'mass_roads'
train_path = os.path.join(data_path, 'validation')

train_path_images = os.path.join(train_path, 'images')
train_path_masks = os.path.join(train_path, 'groundtruth')

target_train_path = 'test_im'
target_mask_path = 'test_mask'

In [36]:
start_handle_data = time.time()

In [37]:
for i in range(len(os.listdir(train_path_images)) // BLOCK_SIZE + 1):
    train_images = load_all_from_path(train_path_images, block=(i*BLOCK_SIZE,(i+1)*BLOCK_SIZE))
    train_masks = load_all_from_path(train_path_masks, block=(i*BLOCK_SIZE,(i+1)*BLOCK_SIZE))
    split_images(train_images, train_masks, target_train_path, target_mask_path, i)
    train_images = []
    train_masks = []
    gc.collect()
    print(gc.get_stats())

block_size: 13
block_size: 13
13
im_ind: 0/12 | accepted: 6 | rejected: 3
im_ind: 1/12 | accepted: 9 | rejected: 0
im_ind: 2/12 | accepted: 9 | rejected: 0
im_ind: 3/12 | accepted: 4 | rejected: 5
im_ind: 4/12 | accepted: 7 | rejected: 2
im_ind: 5/12 | accepted: 2 | rejected: 7
im_ind: 6/12 | accepted: 8 | rejected: 1
im_ind: 7/12 | accepted: 9 | rejected: 0
im_ind: 8/12 | accepted: 9 | rejected: 0
im_ind: 9/12 | accepted: 5 | rejected: 4
im_ind: 10/12 | accepted: 5 | rejected: 4
im_ind: 11/12 | accepted: 8 | rejected: 1
im_ind: 12/12 | accepted: 8 | rejected: 1
total_accepted: 89 | total_rejected: 28
[{'collections': 716, 'collected': 27272, 'uncollectable': 0}, {'collections': 64, 'collected': 7363, 'uncollectable': 0}, {'collections': 11, 'collected': 5470, 'uncollectable': 0}]


In [38]:
stop_handle_data = time.time()
print(f"Time to handle data: {round(stop_handle_data - start_handle_data, 2)}")

Time to handle data: 4.87
