In [1]:
import numpy as np
import os
from PIL import Image
from netCDF4 import Dataset
from tqdm import tqdm
from utils import *
from model import *
import glob
from shutil import copy

In [2]:
cachedWindow2D = window_2D(484, 2)

In [3]:
model = tf.keras.models.load_model('save_cloudModel', custom_objects={'staticWeightedBincrossentropy': staticWeightedBincrossentropy})

In [None]:
def subDivs_compute(mdgmPath, splitMDGM=668, probs=False, c=3, bounding=None):
    '''
    Computes a cloudMask for a given mdgm.
    
    Parameters:
    mdgmPath -- path to an mdgm image in a file directory that resembles https://doi.org/10.7910/DVN/U3766S. See getInfo_compute() in utils for description of folder structure
    splitMDGM -- integer equal to the input size of the model (default 668)
    probs -- returns the cloud probablity if True, returns the binary classification if False (default False)
    c -- integer that alters the automated polar extents by c degrees. See getPolarBoundsNew() in utils for usage. Not used if bounding is not None (default 3)
    bounding -- optional tuple of four coordinates to manually dictate the bounding box instead of using the ls value. Tuple should be in format (minX, maxX, minY, maxY) (default None)
    '''
    # gets output mask size (668 --> 484)
    splitMask = int(checkUNETNum(splitMDGM))
    
    mdgm = Image.open(mdgmPath)

    if bounding is not None:
        (lowX, highX, lowY, highY) = bounding
        highX, highY = highX - 1, highY - 1
    else:
        latN, latS = getPolarBoundsNew(getInfo_compute(mdgmPath)[0], c)

        (lowY, highY) = (boundToPixel(latN),boundToPixel(latS))
        (lowX, highX) = getBlackBounds(mdgm, 'lr', (lowY, highY))
    
    paddedMDGM = padMDGM(mdgm, highX, lowX, highY, lowY)
    
    # calculates subdivision data: xSplits, ySplits = number of splits for each axis; xSize, ySize = size of each subdivision 
    xSplits = int(np.ceil((highX + 1 - lowX) / splitMask * 2 - 1))
    ySplits = int(np.ceil((highY + 1 - lowY) / splitMask * 2 - 1))
    xSize = (highX + 1 - lowX) / (xSplits + 1)
    ySize = (highY + 1 - lowY) / (ySplits + 1)

    # creates cloudMask array with -999 defaults
    fullCloudMask = np.ndarray((xSplits*ySplits, 2, mdgm.height, mdgm.width))
    fullCloudMask.fill(-999)
    
    for i in range(xSplits):
        for j in range(ySplits):
            minX = round(xSize * i)
            maxX = minX + splitMDGM
            
            minY = round(ySize * j)
            maxY = minY + splitMDGM

            # safety: final subdivisions are based on ends of mdgm, not the running split count
            if i == xSplits - 1:
                maxX = paddedMDGM.width
                minX = maxX - splitMDGM
            
            if j == ySplits - 1:
                maxY = paddedMDGM.height
                minY = maxY - splitMDGM
            
            subMDGM = np.expand_dims(np.array(paddedMDGM.crop((minX, minY, maxX, maxY))), axis=0)

            # model prediction for each subdivision
            if not probs:
                fullCloudMask[i*ySplits + j, :1, lowY + minY : lowY + minY + splitMask, minX + lowX : minX + lowX + splitMask] = binaryCloudMask(model.predict(subMDGM)).reshape((1,484,484))
            else:
                fullCloudMask[i*ySplits + j, :1, lowY + minY : lowY + minY + splitMask, minX + lowX : minX + lowX + splitMask] = model.predict(subMDGM).reshape((1,484,484))
            
            fullCloudMask[i*ySplits + j, 1:, lowY + minY : lowY + minY + splitMask, minX + lowX : minX + lowX + splitMask] = cachedWindow2D
    
    # stitches predicted subdivisions together by selecting the predictions closest to the centers of their subdivision
    index = np.expand_dims(np.argmax(fullCloudMask[:,1,...],axis=0),0)
    cloudMask = np.take_along_axis(fullCloudMask[:,0,...],index,axis=0)[0]
    
    # allThreeOnly mask, faster implementation
    nanImg = np.array(mdgm)
    rgbProduct = nanImg.prod(axis=2)
    cloudMask[(nanImg[...,1] < 20) & (nanImg[...,2] < 20) | (nanImg[...,2] < 30) | (rgbProduct == 0)] = -999
    
    return mdgm, cloudMask

In [None]:
# runs through all mdgms located at masterPath and stores the model's output at savePath

# masterPath must follow format of https://doi.org/10.7910/DVN/U3766S. The folder should contain one or more years of data.
masterPath = "./data/raw"
savePath = "./data/preds"

def makeNetCDF(cloudMask, saveMaskPath):
    '''
    Creates NetCDF file for an inputted cloudmask array.
    '''
    root = Dataset(saveMaskPath, 'w', format='NETCDF4_CLASSIC')
    root.set_fill_off()

    # dimensions
    root.createDimension('x', 3600)
    root.createDimension('y', 1801)

    # variables
    longitude = root.createVariable('longitude', 'float32', ('x',),zlib=True)
    latitude = root.createVariable('latitude', 'float32', ('y',),zlib=True)
    cloudmask = root.createVariable('cloudmask', 'float32', ('y','x'),zlib=True)

    # data
    lon_range = np.linspace(-180, 179.9, 3600, dtype=np.float32)
    lat_range = np.linspace(-90, 90, 1801, dtype=np.float32)
    longitude[:] = lon_range
    latitude[:] = lat_range
    cloudmask[...] = cloudMask
    root.close()

try:
    os.mkdir(savePath)
except:
    pass

for year in [i for i in os.listdir(masterPath) if os.path.isdir(os.path.join(masterPath, i))]:
    print(year)
    for subphase in tqdm([i for i in os.listdir(os.path.join(masterPath, year)) if os.path.isdir(os.path.join(masterPath, year, i))]):
        os.makedirs(os.path.join(savePath, year, subphase, 'mdgms'))
        os.makedirs(os.path.join(savePath, year, subphase, 'cloudmasks'))
        copy(os.path.join(masterPath, year, subphase,"{}_ls.txt".format(subphase)), os.path.join(savePath, year, subphase,"{}_ls.txt".format(subphase)))
        for mdgmPath in glob.glob(os.path.join(masterPath, year, subphase, '*.jpg')):
            subAndDay = mdgmPath.split(os.sep)[-1][:9]
            (mdgm, clouds) = subDivs_compute(mdgmPath)
            copy(os.path.join(masterPath, year, subphase,mdgmPath), os.path.join(savePath, year, subphase,'mdgms','{}.jpg'.format(subAndDay)))
            makeNetCDF(np.flip(clouds, axis=0), os.path.join(savePath,year,subphase,'cloudmasks',"cloudmask_{}.ncdf".format(subAndDay)))

B


100%|██████████| 19/19 [50:11<00:00, 158.52s/it]


D


100%|██████████| 22/22 [58:17<00:00, 158.97s/it]


F


100%|██████████| 23/23 [1:03:15<00:00, 165.02s/it]


G


100%|██████████| 23/23 [1:02:48<00:00, 163.83s/it]


J


100%|██████████| 8/8 [22:21<00:00, 167.74s/it]


P


100%|██████████| 22/22 [58:35<00:00, 159.81s/it]
