In [1]:
from pathlib import Path
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as pyplot

In [2]:
def load_files(directory: str):
    return np.stack(list(map(lambda path: np.load(path), Path(directory).glob('**/*.npy'))))

images = load_files('../data/images_train')
masks = load_files('../data/masks_train').squeeze()  # squeeze removes the dimension of size 1

if (len(images.shape) != 4):
    raise Exception(f"Unexpected shape of images: {images.shape}")
if (len(images.shape)-1 != len(masks.shape)
    or images.shape[0] != masks.shape[0] 
    or images.shape[2] != masks.shape[1] 
    or images.shape[3] != masks.shape[2]):
    raise Exception(f"Shape mismatch between images and masks: {images.shape} != {masks.shape}")

print(f"Number of images: {images.shape[0]}\nChannels: {images.shape[1]}\nHeight: {images.shape[2]}\nWidth: {images.shape[3]}")

Number of images: 40
Channels: 10
Height: 1024
Width: 1024


Find the indices of non-zero labels.

In [3]:
indices = np.argwhere(masks)

Split the indices into a training and a test set. We use 90% for training and (cross-)validation and 10% for testing (the final evaluation).

In [4]:
indices_train, indices_test = train_test_split(indices, train_size=0.9, random_state=42)
np.save('../data/indices_train.npy', indices_train)
np.save('../data/indices_test.npy', indices_test)

print(f"Generated {indices_train.shape[0]} training and {indices_test.shape[0]} testing indices "
       "and saved them to 'indices_train.npy' and 'indices_test.npy'")

Generated 34976 training and 3887 testing indices and saved them to 'indices_train.npy' and 'indices_test.npy'


Obtain the feature vectors $\mathbf X$ and labels $\mathbf y$ of the given indices.

In [5]:
X_train = images[indices_train[:, 0], :, indices_train[:, 1], indices_train[:, 2]]
X_test = images[indices_test[:, 0], :, indices_test[:, 1], indices_test[:, 2]]
y_train = masks[indices_train[:, 0], indices_train[:, 1], indices_train[:, 2]]
y_test = masks[indices_test[:, 0], indices_test[:, 1], indices_test[:, 2]]

np.save('../data/X_train.npy', X_train)
np.save('../data/X_test.npy', X_test)
np.save('../data/y_train.npy', y_train)
np.save('../data/y_test.npy', y_test)