In [93]:
import os
import re
from glob import glob
import yaml
import numpy as np
from patoolib import extract_archive
import importlib
import utils  # the module you want to reload


importlib.reload(utils)

from utils import wkt2masc, load_images_from_folder

In [45]:
DATA = "full"  # "full" or "sampled"

current_dir = os.getcwd()
BASE_DIR = os.path.dirname(current_dir)
data_dir = os.path.join(BASE_DIR, 'data')
config_file = os.path.join(BASE_DIR, 'config.yml')

In [16]:
# if there isn´t unrar installed, install it with: sudo apt-get install unrar (linux)
# for windows, install it from: https://www.rarlab.com/rar_add.htm (unrarw32.exe)

dataset_path = os.path.join(data_dir, "BurnedAreaUAV_dataset")
if not os.path.exists(dataset_path):
    extract_archive(os.path.join(data_dir, "BurnedAreaUAV_dataset_v1.rar"), program="unrar", outdir=data_dir)

with open(config_file, "r", encoding="utf-8") as f:
    config = yaml.safe_load(f)

sampled_masks_txt_path = os.path.join(BASE_DIR, config["data"]["sampled_masks_txt"])

# move images to train and test folders
dataset_train_imgs_path = os.path.join(dataset_path, 'PNG', 'train', 'frames')
dataset_test_imgs_path = os.path.join(dataset_path, 'PNG', 'test', 'frames')

train_imgs_list = [os.path.basename(x) for x in glob(dataset_train_imgs_path + "/*.png")]
test_imgs_list = [os.path.basename(x) for x in glob(dataset_test_imgs_path + "/*.png")]

train_imgs_dir = os.path.join(BASE_DIR, config["data"]["train_dir"], "images")
test_imgs_dir = os.path.join(BASE_DIR, config["data"]["test_dir"], "images")

if not os.path.exists(train_imgs_dir):
    os.makedirs(train_imgs_dir)
    
if not os.path.exists(test_imgs_dir):
    os.makedirs(test_imgs_dir)
    
for file_name in train_imgs_list:
    source = os.path.join(dataset_train_imgs_path, file_name)
    destination = os.path.join(train_imgs_dir, file_name)
    os.rename(source, destination)

for file_name in test_imgs_list:
    source = os.path.join(dataset_test_imgs_path, file_name)
    destination = os.path.join(test_imgs_dir, file_name)
    os.rename(source, destination)
    
test_imgs_dir = os.path.join(BASE_DIR, config["data"]["test_dir"], "images")
images_test_path = os.path.join(dataset_path, "BurnedAreaUAV_dataset_v1", "/PNG/test/frames")

with open(sampled_masks_txt_path, "r", encoding="utf-8") as f:
    polygons = f.readlines()
    # extract indexes and polygons
    indexes = [int(polygon.split(",")[0]) for polygon in polygons]
    # polygons = [polygon.split(",", 1)[1][:-1] for polygon in polygons]

# convert WKT files to segmentation masks : full train, sampled train and test
train_masks_dir = os.path.join(BASE_DIR, config["data"]["train_dir"], "masks")
if not os.path.exists(train_masks_dir):
    os.makedirs(train_masks_dir)

test_masks_dir = os.path.join(BASE_DIR, config["data"]["test_dir"], "masks")
if not os.path.exists(test_masks_dir):
    os.makedirs(test_masks_dir)

train_sampled_masks_dir = os.path.join(BASE_DIR, config["data"]["train_sampled_dir"], "masks")
if not os.path.exists(train_sampled_masks_dir):
    os.makedirs(train_sampled_masks_dir)

wkt2masc(
    wkt_file=os.path.join(BASE_DIR, config["data"]["train_wkt"]),
    images_path=train_masks_dir,
    orig_dims=config["data"]["original_vid_dims"],
    height=config["data"]["input_size"][0],
    width=config["data"]["input_size"][1],
)

wkt2masc(
    wkt_file=os.path.join(BASE_DIR, config["data"]["test_wkt"]),
    images_path=test_masks_dir,
    orig_dims=config["data"]["original_vid_dims"],
    height=config["data"]["input_size"][0],
    width=config["data"]["input_size"][1],
)

wkt2masc(
    wkt_file=os.path.join(BASE_DIR, config["data"]["sampled_masks_wkt"]),
    images_path=train_sampled_masks_dir,
    orig_dims=config["data"]["original_vid_dims"],
    height=config["data"]["input_size"][0],
    width=config["data"]["input_size"][1],
)


    --------------------------------------
    # [1mProperties of the resulting masks[0m
    # Width: 512, Height: 512
    # Number of masks to create: 226
    --------------------------------------
    


100%|██████████| 226/226 [00:00<00:00, 649.78it/s]



    --------------------------------------
    # [1mProperties of the resulting masks[0m
    # Width: 512, Height: 512
    # Number of masks to create: 23
    --------------------------------------
    


100%|██████████| 23/23 [00:00<00:00, 620.94it/s]



    --------------------------------------
    # [1mProperties of the resulting masks[0m
    # Width: 512, Height: 512
    # Number of masks to create: 13
    --------------------------------------
    


100%|██████████| 13/13 [00:00<00:00, 540.55it/s]


In [41]:
# reconstruct the masks labels
msks_train_paths = sorted(glob(os.path.join(train_masks_dir, "*.png")))
msks_train_labels = [int(os.path.basename(m).split("_")[1].split(".")[0]) * 100 for m in msks_train_paths]
msks_train_sampled_paths = sorted(glob(os.path.join(train_sampled_masks_dir, "*.png")))
msks_train_sampled_labels = [100 * i for i in indexes]
msks_test_paths = sorted(glob(os.path.join(test_masks_dir, "*.png")))
msks_test_labels = [int(os.path.basename(m).split("_")[1].split(".")[0]) * 100 + 20250 for m in msks_test_paths]

print("Train masks:", len(msks_train_paths))
print("Train sampled masks:", len(msks_train_sampled_paths))
print("Test masks:", len(msks_test_paths))

In [94]:
# Load images and masks

# data/BurnedAreaUAV_dataset/PNG/train/frames
if DATA == 'full':
    TRAIN_DIR = os.path.join(BASE_DIR, config["data"]["train_dir"], "images")
else:
    TRAIN_DIR = os.path.join(BASE_DIR, config["data"]["train_sampled_dir"], "images")
    
train_imgs = load_images_from_folder(TRAIN_DIR, target_size=config["data"]["input_size"])
test_imgs = load_images_from_folder(os.path.join(BASE_DIR, config["data"]["test_dir"], "images"), 
                                    target_size=config["data"]["input_size"])

max_val = np.max(msks_train_labels)
train_labels = (msks_train_labels/max_val).astype(np.float32)
train_labels = np.expand_dims(train_labels, axis=-1)

test_labels = (msks_test_labels/max_val).astype(np.float32)
test_labels = np.expand_dims(test_labels, axis=-1)

print(f"train_imgs.shape: {train_imgs.shape}, train_labels.shape: {train_labels.shape}")
print(f"test_imgs.shape: {test_imgs.shape}, test_labels.shape: {test_labels.shape}")

# train_imgs = train_imgs.reshape((-1, config["data"]["input_size"][0]*config["data"]["input_size"][1]))
# test_imgs = test_imgs.reshape((-1, config["data"]["input_size"][0]*config["data"]["input_size"][1]))

train_imgs.shape: (226, 512, 512, 3), train_labels.shape: (226, 1)
test_imgs.shape: (23, 512, 512, 3), test_labels.shape: (23, 1)


In [None]:
# train_imgs.shape: (13, 262144), train_labels.shape: (13, 1)
# test_imgs.shape: (23, 262144), test_labels.shape: (23, 1)

### Models