In [2]:
import numpy as np
import os
from PIL import Image
from tqdm import tqdm
from utils import *
import glob

In [21]:
def subDivs_train(mdgmPath, folderPath, splitMDGM=668):
    '''
    Saves the arrays for subdivided mdgm-cloudmask pairs of the inputted mdgm to folderPath.
    
    Parameters:
    mdgmPath -- path to an mdgm image in a file directory that resembles https://doi.org/10.7910/DVN/WU6VZ8. See getInfo_train() in utils for description of folder structure
    folderPath -- path to an output folder for the mdgm-cloudmask pairs
    splitMDGM -- integer equal to the input size of the model (default 668)
    '''
    splitMask = int(checkUNETNum(splitMDGM))
    folders = mdgmPath.split(os.sep)
    imName = folders[-1][:8]
    
    mdgm = Image.open(mdgmPath)
    cloudMask = getCloudMask(getCloudMask_train(mdgmPath))

    (lowY, highY) = getCloudMaskBounds(cloudMask)
    (lowX, highX) = getBlackBounds(mdgm, 'lr', (lowY, highY))

    paddedMDGM = padMDGM(mdgm, highX, lowX, highY, lowY)
    
    cloudMask[cloudMask <= 0] = 0
    cloudMask[cloudMask >= 1] = 1

    xSplits = int(np.ceil((highX + 1 - lowX) / splitMask * 2 - 1))
    ySplits = int(np.ceil((highY + 1 - lowY) / splitMask * 2 - 1))
    xSize = (highX + 1 - lowX) / (xSplits + 1)
    ySize = (highY + 1 - lowY) / (ySplits + 1)
    
    for i in range(xSplits):
        for j in range(ySplits):
            minX = round(xSize * i)
            maxX = minX + splitMDGM
            
            minY = round(ySize * j)
            maxY = minY + splitMDGM

            # safety: final subdivisions are based on ends of mdgm, not the running split count
            if i == xSplits - 1:
                maxX = paddedMDGM.width
                minX = maxX - splitMDGM
            
            if j == ySplits - 1:
                maxY = paddedMDGM.height
                minY = maxY - splitMDGM
            
            subMask = np.expand_dims(cloudMask[lowY + minY : lowY + minY + splitMask, minX + lowX : minX + lowX + splitMask], axis=2)
            subMDGM = paddedMDGM.crop((minX, minY, maxX, maxY))
            
            savePathMDGM = os.path.join(folderPath, "img", "{im}_{x}{y}.npy".format(im = imName, x = "{:02d}".format(i), y = "{:02d}".format(j)))
            savePathMask = os.path.join(folderPath, "mask", "{im}_{x}{y}.npy".format(im = imName, x = "{:02d}".format(i), y = "{:02d}".format(j)))
            
            np.save(savePathMDGM, np.array(subMDGM))
            np.save(savePathMask, np.array(subMask))

In [27]:
# complete the glob with the folder where the cloudmask training data is stored. Should be a file directory which contains data organized by martian subphase. Each subphase folder should resemble https://doi.org/10.7910/DVN/WU6VZ8.
# allImages = glob.glob("./data/train/**/*.jpeg", recursive=True)

# a small subset for the sample in trainProcessed containing P01day01, P01day11, and P01day21. Remove if allImages is defined above
allImages = glob.glob("./data/train/P01/mdgms/P01day*1.jpeg", recursive=True)

# folder to save the mdgm-cloudmask subdivisions
folderPath = "./data/trainProcessed"

try:
    os.makedirs(folderPath)
except:
    pass

# model size
(inDim, outDim) = (668,484)

try:
    os.makedirs(os.path.join(folderPath, 'img'))
    os.makedirs(os.path.join(folderPath, 'mask'))
except:
    pass

for imgPath in tqdm(allImages):
    subDivs_train(imgPath, folderPath)

100%|██████████| 3/3 [00:01<00:00,  2.85it/s]
