In [1]:
import spectral as spy
from matplotlib import pyplot
import numpy as np
import h5py
import sys, os
from PIL import Image

Define function for reading Tif, HDR; remove pixel value depends on mask data

In [2]:
def readTifImage(file_name):
    im = Image.open('dataset/CloudMaskExamples/Mask1.tif')
    data = np.array(im)
    return data

In [3]:
def readHDRFile(file_name, band):
    img = spy.open_image(file_name)
    data = img.read_band(band)
    return data

In [4]:
def addCloudMaskOverData(data, mask):
    maskedData = np.copy(data)
    maskedData[np.asarray(mask) == 0] = 0
    return maskedData

In [5]:
def get2DMaskArray(mask):
    mask2D = np.zeros((mask.shape[:2]))
    mask2D = np.sum(mask, axis=2) - 255
    return mask2D

Reading list of Input. Last 5 (L8) * 9 (masks) will be for validation

In [6]:
for root, folders, files in os.walk('dataset/CloudFree/'):
    break
listInputTrain = folders[:10]
listInputTest = folders[15:25]

for __, _, files in os.walk('dataset/CloudMaskExamples/'):
    break
listMask = files

## HDF5 structures


**label**: Original image

**mask**: Mask used

**masked**: Masked image

**ref**: Referenced image

**ratio**: All value scale to [0..1]. Original ratio when divided

## Create HDF5 contains Training-Set

### Initilization

In [7]:
# Shuffle Referenced Images
nTrain = len(listInputTrain)
nMask = len(listMask)

refImg = np.arange(nTrain)
np.random.shuffle(refImg)

lLabel = np.zeros((nMask*nTrain,400,400,3),dtype=np.float32)
lMask = np.zeros((nMask*nTrain,400,400),dtype=np.float32)
lMasked = np.zeros((nMask*nTrain,400,400,3),dtype=np.float32)
lRef = np.zeros((nMask*nTrain,400,400,3),dtype=np.float32)
lRatio = np.zeros((nMask*nTrain,3),dtype=np.float32)

For each mask, create one training-set

In [8]:
for j in range(nMask):
    _mask = get2DMaskArray(readTifImage(listMask[j]))
    
    dLabel = np.zeros((400,400,3))
    dMasked = np.zeros((400,400,3))
    dRef = np.zeros((400,400,3))
    dRatio = np.zeros((3))
    
    for i in range(nTrain):
        # Read for each bands
        for b in range(3):
            # Read cloud-free data
            label = readHDRFile(os.path.join(root, listInputTrain[i], 'input_without_mask.hdr'), b)
            # Add mask over label
            masked = addCloudMaskOverData(data=label,mask=_mask)           
            # Read reference
            ref = readHDRFile(os.path.join(root, listInputTrain[refImg[i]], 'input_without_mask.hdr'), b)
            # Calculate ratio
            ratio = np.max(label)
            ratio = max(ratio, np.max(ref))
            # Normalize data
            label /= ratio
            masked /= ratio
            ref /= ratio

            dLabel[:,:,b] = label.astype('float32')
            dMasked[:,:,b] = masked.astype('float32')
            dRef[:,:,b] = ref.astype('float32')
            dRatio[b] = ratio.astype('float32')
        
        nSet = j*nMask + i
        lLabel[nSet,:,:,:] = dLabel
        lMask[nSet,:,:] = _mask
        lMasked[nSet,:,:,:] = dMasked
        lRef[nSet,:,:,:] = dRef
        lRatio[nSet,:]=dRatio

In [9]:
f = h5py.File('small_train.hdf5', 'w')
f.create_dataset('label',data=lLabel)
f.create_dataset('mask',data=lMask)
f.create_dataset('masked',data=lMasked)
f.create_dataset('ref',data=lRef)
f.create_dataset('ratio',data=lRatio)
f.close()

## Create HDF5 contains Test-Set

### Initilization

In [10]:
# Shuffle Referenced Images
nTest = len(listInputTest)
nMask = len(listMask)

refImg = np.arange(nTest)
np.random.shuffle(refImg)

lLabel = np.zeros((nMask*nTest,400,400,3),dtype=np.float32)
lMask = np.zeros((nMask*nTest,400,400),dtype=np.float32)
lMasked = np.zeros((nMask*nTest,400,400,3),dtype=np.float32)
lRef = np.zeros((nMask*nTest,400,400,3),dtype=np.float32)
lRatio = np.zeros((nMask*nTest,3),dtype=np.float32)

For each mask, create one training-set

In [11]:
for j in range(nMask):
    _mask = get2DMaskArray(readTifImage(listMask[j]))
    
    dLabel = np.zeros((400,400,3))
    dMasked = np.zeros((400,400,3))
    dRef = np.zeros((400,400,3))
    dRatio = np.zeros((3))
    
    for i in range(nTest):
        # Read for each bands
        for b in range(3):
            # Read cloud-free data
            label = readHDRFile(os.path.join(root, listInputTest[i], 'input_without_mask.hdr'), b)
            # Add mask over label
            masked = addCloudMaskOverData(data=label,mask=_mask)           
            # Read reference
            ref = readHDRFile(os.path.join(root, listInputTest[refImg[i]], 'input_without_mask.hdr'), b)
            # Calculate ratio
            ratio = np.max(label)
            ratio = max(ratio, np.max(ref))
            # Normalize data
            label /= ratio
            masked /= ratio
            ref /= ratio

            dLabel[:,:,b] = label.astype('float32')
            dMasked[:,:,b] = masked.astype('float32')
            dRef[:,:,b] = ref.astype('float32')
            dRatio[b] = ratio.astype('float32')
        
        nSet = j*nMask + i
        lLabel[nSet,:,:,:] = dLabel
        lMask[nSet,:,:] = _mask
        lMasked[nSet,:,:,:] = dMasked
        lRef[nSet,:,:,:] = dRef
        lRatio[nSet,:]=dRatio

In [12]:
f = h5py.File('small_test.hdf5', 'w')
f.create_dataset('label',data=lLabel)
f.create_dataset('mask',data=lMask)
f.create_dataset('masked',data=lMasked)
f.create_dataset('ref',data=lRef)
f.create_dataset('ratio',data=lRatio)
f.close()