In [2]:
import numpy as np
import os
from PIL import Image
from tqdm import tqdm
from utils import *
import glob

In [21]:
def subdivs_train(mdgm_path, folder_path, split_mdgm=668):
    '''
    Saves the arrays for subdivided mdgm-cloudmask pairs of the inputted mdgm to folder_path.
    
    Parameters:
    mdgm_path -- path to an mdgm image in a file directory that resembles https://doi.org/10.7910/DVN/WU6VZ8. See getInfo_train() in utils for description of folder structure
    folder_path -- path to an output folder for the mdgm-cloudmask pairs
    split_mdgm -- integer equal to the input size of the model (default 668)
    '''
    split_mask = int(check_UNET_num(split_mdgm))
    folders = mdgm_path.split(os.sep)
    im_name = folders[-1][:8]
    
    mdgm = Image.open(mdgm_path)
    cloudmask = get_cloudmask(get_cloudmask_train(mdgm_path))

    (ylow, yhigh) = get_cloudmask_bounds(cloudmask)
    (xlow, xhigh) = get_black_bounds(mdgm, 'lr', (ylow, yhigh))

    padded_mdgm = pad_mdgm(mdgm, xhigh, xlow, yhigh, ylow)
    
    cloudmask[cloudmask <= 0] = 0
    cloudmask[cloudmask >= 1] = 1

    x_splits = int(np.ceil((xhigh + 1 - xlow) / split_mask * 2 - 1))
    y_splits = int(np.ceil((yhigh + 1 - ylow) / split_mask * 2 - 1))
    xSize = (xhigh + 1 - xlow) / (x_splits + 1)
    ySize = (yhigh + 1 - ylow) / (y_splits + 1)
    
    for i in range(x_splits):
        for j in range(y_splits):
            xmin = round(xSize * i)
            xmax = xmin + split_mdgm
            
            ymin = round(ySize * j)
            ymax = ymin + split_mdgm

            # safety: final subdivisions are based on ends of mdgm, not the running split count
            if i == x_splits - 1:
                xmax = padded_mdgm.width
                xmin = xmax - split_mdgm
            
            if j == y_splits - 1:
                ymax = padded_mdgm.height
                ymin = ymax - split_mdgm
            
            sub_mask = np.expand_dims(cloudmask[ylow + ymin : ylow + ymin + split_mask, xmin + xlow : xmin + xlow + split_mask], axis=2)
            sub_mdgm = padded_mdgm.crop((xmin, ymin, xmax, ymax))
            
            save_path_mdgm = os.path.join(folder_path, "img", "{im}_{x}{y}.npy".format(im = im_name, x = "{:02d}".format(i), y = "{:02d}".format(j)))
            save_path_mask = os.path.join(folder_path, "mask", "{im}_{x}{y}.npy".format(im = im_name, x = "{:02d}".format(i), y = "{:02d}".format(j)))
            
            np.save(save_path_mdgm, np.array(sub_mdgm))
            np.save(save_path_mask, np.array(sub_mask))

In [27]:
# complete the glob with the folder where the cloudmask training data is stored. Should be a file directory which contains data organized by martian subphase. Each subphase folder should resemble https://doi.org/10.7910/DVN/WU6VZ8.
# all_images = glob.glob("./data/train/**/*.jpeg", recursive=True)

# a small subset for the sample in trainProcessed containing P01day01, P01day11, and P01day21. Remove if all_images is defined above
all_images = glob.glob("./data/train/P01/mdgms/P01day*1.jpeg", recursive=True)

# folder to save the mdgm-cloudmask subdivisions
folder_path = "./data/train_processed"

try:
    os.makedirs(folder_path)
except:
    pass

# model size
(in_dim, out_dim) = (668,484)

try:
    os.makedirs(os.path.join(folder_path, 'img'))
    os.makedirs(os.path.join(folder_path, 'mask'))
except:
    pass

for img_path in tqdm(all_images):
    subdivs_train(img_path, folder_path)

100%|██████████| 3/3 [00:01<00:00,  2.85it/s]
