In [1]:
import rasterio
import matplotlib.pyplot as plt
import pathlib as Path
import random
from scipy.ndimage import label
from skimage.color import label2rgb
import numpy as np
import os

from PIL import Image as im

In [2]:
root_folder = "belgium"

window_a_folder = Path.Path(root_folder) / "s2_images" / "window_a"
window_b_folder = Path.Path(root_folder) / "s2_images" / "window_b"
label_masks_2_class = Path.Path(root_folder) / "label_masks" / "semantic_2class"
label_masks_3_class = Path.Path(root_folder) / "label_masks" / "semantic_3class"
label_masks_instance = Path.Path(root_folder) / "label_masks" / "instance"

img_files = os.listdir(window_a_folder)
semantic_2_class_files = list(label_masks_2_class.glob("*.tif"))

In [3]:
# delete empty masks as they may cause issues later on during training/testing
valid_masks = []
for mask in os.listdir(label_masks_3_class):
    with rasterio.open(os.path.join(label_masks_3_class, mask)) as src:
        img = src.read()[:3, :, :]  # Reading first 3 bands
        img = img.transpose(1, 2, 0) / 3000  # Normalizing 

        if np.max(img) > 0:
            valid_masks.append(mask)


## TRAIN TEST SPLIT

In [36]:
train_size = int(0.80 * len(valid_masks))
np.random.seed(42)

In [37]:
idxs = np.random.randint(0, len(valid_masks), train_size)
# name of every training image/mask in the folder
train_img = np.array(valid_masks)[idxs]

# same for test dataset
test_img = np.array(valid_masks)[~idxs]

In [38]:
## MAKE TRAINING FOLDERS IMAGES + GROUND TRUTH
import os
import glob

## da fare (check se dataset esiste)

files = glob.glob('/tiff_dataset/*')
for f in files:
    os.remove(f)

os.makedirs(Path.Path(os.getcwd()) / "dataset" / "training" / "images")
os.makedirs(Path.Path(os.getcwd()) / "dataset" / "training" / "gt")


for image_name in train_img:
    # source paths
    from_img = window_a_folder / str(image_name)
    from_gt = label_masks_3_class / str(image_name)

    # destination paths
    to_img = Path.Path("dataset/training/images/") / str(image_name).split('.')[0]
    to_gt = Path.Path("dataset/training/gt/") / str(image_name).split('.')[0]

    with rasterio.open(from_img) as src:
        window_a = src.read()[:3, :, :]  # Reading first 3 bands
        window_a = window_a.transpose(1, 2, 0) / 3000  # Normalizing 
    
        data = im.fromarray((window_a * 255).astype(np.uint8))  
        data.save(str(to_img)+'.png', 'PNG')

    with rasterio.open(from_gt) as src:
        semantic_3_class = src.read()
        semantic_3_class = (semantic_3_class * 127.5).astype(np.uint8)
        semantic_3_class[semantic_3_class < 255] = 0
        data = im.fromarray(semantic_3_class[0]).convert('RGB')

        data.save(str(to_gt)+'.png', 'PNG')

In [39]:
# SAME FOR TEST DATASET

os.makedirs(Path.Path(os.getcwd()) / "dataset" / "test" / "images")
os.makedirs(Path.Path(os.getcwd()) / "dataset" / "test" / "gt")


for image_name in test_img:
    # source paths
    from_img = window_a_folder / str(image_name)
    from_gt = label_masks_3_class / str(image_name)

    # destination paths
    to_img = Path.Path("dataset/test/images/") / str(image_name).split('.')[0]
    to_gt = Path.Path("dataset/test/gt/") / str(image_name).split('.')[0]

    with rasterio.open(from_img) as src:
        window_a = src.read()[:3, :, :]  # Reading first 3 bands
        window_a = window_a.transpose(1, 2, 0) / 3000  # Normalizing 
    
        data = im.fromarray((window_a * 255).astype(np.uint8))  
        data.save(str(to_img)+'.png', 'PNG')

    with rasterio.open(from_gt) as src:
        semantic_3_class = src.read()
        semantic_3_class = (semantic_3_class * 127.5).astype(np.uint8)
        semantic_3_class[semantic_3_class < 255] = 0
        data = im.fromarray(semantic_3_class[0]).convert('RGB')

        data.save(str(to_gt)+'.png', 'PNG')