# Setup

In [1]:
from pathlib import Path
import numpy as np
from sklearn.model_selection import train_test_split

# Preprocessing

Load multiple images/masks and stack them into a single `numpy` array.

In [2]:
images = np.stack(list(map(lambda index: np.load(f'../data/images_train/image_{index:03d}.npy'), range(40))))
masks = np.stack(list(map(lambda index: np.load(f'../data/masks_train/mask_{index:03d}.npy'), range(40))))

if (len(images.shape) != 4 or len(masks.shape) != 4):
    raise Exception(f"Unexpected shape of images/masks: {images.shape}, {masks.shape}")
if (images.shape[0] != masks.shape[0] 
    or images.shape[2] != masks.shape[2] 
    or images.shape[3] != masks.shape[3]):
    raise Exception(f"Shape mismatch between images and masks: {images.shape} != {masks.shape}")

print(f"Number of images: {images.shape[0]}\nChannels: {images.shape[1]}\nHeight: {images.shape[2]}\nWidth: {images.shape[3]}")

Number of images: 40
Channels: 10
Height: 1024
Width: 1024


Obtain the indices of the instances for which labels are given.

In [3]:
indices = np.argwhere(masks)

Perform the train-test split.

In [4]:
indices_train, indices_test = train_test_split(indices, train_size=0.9, random_state=0)
np.save('../data/indices_train.npy', indices_train)
np.save('../data/indices_test.npy', indices_test)

print(f"Generated {indices_train.shape[0]} training and {indices_test.shape[0]} testing indices "
       "and saved them to 'indices_train.npy' and 'indices_test.npy'")

Generated 34976 training and 3887 testing indices and saved them to 'indices_train.npy' and 'indices_test.npy'


Create dataset for training/testing with instances in the rows and features in the columns.

In [5]:
X_train = images[indices_train[:, 0], :, indices_train[:, 2], indices_train[:, 3]]
X_test = images[indices_test[:, 0], :, indices_test[:, 2], indices_test[:, 3]]
y_train = masks[indices_train[:, 0], 0, indices_train[:, 2], indices_train[:, 3]]
y_test = masks[indices_test[:, 0], 0, indices_test[:, 2], indices_test[:, 3]]

np.save('../data/X_train.npy', X_train)
np.save('../data/X_test.npy', X_test)
np.save('../data/y_train.npy', y_train)
np.save('../data/y_test.npy', y_test)