In [24]:
import spectral as spy
from matplotlib import pyplot
import numpy as np
import h5py
import sys, os
from PIL import Image
import cv2

Define function for reading Tif, HDR; remove pixel value depends on mask data

In [25]:
def readTifImage(file_name):
    im = Image.open(file_name)
    data = np.array(im)
    if (data.shape != [400,400]):
        data = cv2.resize(data, (400,400))
    return data

In [26]:
def readHDRFile(file_name, band):
    img = spy.open_image(file_name)
    data = img.read_band(band)
    return data

In [27]:
def addCloudMaskOverData(data, mask):
    maskedData = np.copy(data)
    maskedData[np.asarray(mask) == 0] = 0
    return maskedData

In [28]:
def get2DMaskArray(mask):
    mask2D = np.zeros((mask.shape[:2]))
    mask2D = np.sum(mask, axis=2) - 255
    return mask2D

Reading list of Input. Last 5 (L8) * 9 (masks) will be for validation

In [29]:
for root, folders, files in os.walk('dataset/CloudFree/'):
    break
listInputTrain = folders[:-10]
listInputTest = folders[-10:]

for __, _, files in os.walk('dataset/CloudMaskExamples/'):
    break
listMask = files

## HDF5 structures


**label**: Original image

**mask**: Mask used

**masked**: Masked image

**ref**: Referenced image

**ratio**: All value scale to [0..1]. Original ratio when divided

## Create HDF5 contains Training-Set

### Initilization

In [30]:
# Shuffle Referenced Images
nTrain = len(listInputTrain)
nMask = len(listMask)

refImg = np.arange(nTrain)
np.random.shuffle(refImg)

nBands = 6
imgSize = 400

lLabel = np.zeros((nMask*nTrain*nBands,imgSize,imgSize,1),dtype=np.float32)
lMask = np.zeros((nMask*nTrain*nBands,imgSize,imgSize),dtype=np.float32)
lMasked = np.zeros((nMask*nTrain*nBands,imgSize,imgSize,1),dtype=np.float32)
lRef = np.zeros((nMask*nTrain*nBands,imgSize,imgSize,1),dtype=np.float32)

For each mask, create one training-set

In [31]:
p = -1

for j in range(nMask):
    _mask = get2DMaskArray(readTifImage(os.path.join('dataset', 'CloudMaskExamples', listMask[j])))
    
    dLabel = np.zeros((imgSize,imgSize,nBands),dtype='float32')
    dMasked = np.zeros((imgSize,imgSize,nBands),dtype='float32')
    dRef = np.zeros((imgSize,imgSize,nBands),dtype='float32')
    
    for i in range(nTrain):
        # Read for each bands
        for b in range(nBands):
            # Read cloud-free data
            label = readHDRFile(os.path.join(root, listInputTrain[i], 'input_without_mask.hdr'), b)
            # Add mask over label
            masked = addCloudMaskOverData(data=label,mask=_mask)           
            # Read reference
            ref = readHDRFile(os.path.join(root, listInputTrain[refImg[i]], 'input_without_mask.hdr'), b)
#             # Calculate ratio
#             ratio = np.max(label)
#             ratio = max(ratio, np.max(ref))
#             # Normalize data
#             label *= 255.0 / ratio 
#             masked *= 255.0 / ratio
#             ref *= 255.0 / ratio
            
            dLabel[:,:,b] = label.astype('float32')
            dMasked[:,:,b] = masked.astype('float32')
            dRef[:,:,b] = ref.astype('float32')
            
        for b in range(nBands):
            p += 1
            lLabel[p,:,:,0] = dLabel[:,:,b]
            lMask[p,:,:] = _mask[:,:]
            lMasked[p,:,:,0] = dMasked[:,:,b]
            lRef[p,:,:,0] = dRef[:,:,b]

In [32]:
f = h5py.File('train1608_unscale.hdf5', 'w')
f.create_dataset('label',data=lLabel)
f.create_dataset('mask',data=lMask)
f.create_dataset('masked',data=lMasked)
f.create_dataset('ref',data=lRef)
f.close()

## Create HDF5 contains Test-Set

### Initilization

In [33]:
# Shuffle Referenced Images
nTest = len(listInputTest)
nMask = len(listMask)

refImg = np.arange(nTest)
np.random.shuffle(refImg)

nBands = 6
imgSize = 400

lLabel = np.zeros((nMask*nTest*nBands,imgSize,imgSize,1),dtype=np.float32)
lMask = np.zeros((nMask*nTest*nBands,imgSize,imgSize),dtype=np.float32)
lMasked = np.zeros((nMask*nTest*nBands,imgSize,imgSize,1),dtype=np.float32)
lRef = np.zeros((nMask*nTest*nBands,imgSize,imgSize,1),dtype=np.float32)


For each mask, create one training-set

In [34]:
p = -1

for j in range(nMask):
    _mask = get2DMaskArray(readTifImage(os.path.join('dataset', 'CloudMaskExamples', listMask[j])))
    
    dLabel = np.zeros((imgSize,imgSize,nBands),dtype='float32')
    dMasked = np.zeros((imgSize,imgSize,nBands),dtype='float32')
    dRef = np.zeros((imgSize,imgSize,nBands),dtype='float32')
    
    for i in range(nTest):
        # Read for each bands
        for b in range(nBands):
            # Read cloud-free data
            label = readHDRFile(os.path.join(root, listInputTest[i], 'input_without_mask.hdr'), b)
            # Add mask over label
            masked = addCloudMaskOverData(data=label,mask=_mask)           
            # Read reference
            ref = readHDRFile(os.path.join(root, listInputTest[refImg[i]], 'input_without_mask.hdr'), b)
#             # Calculate ratio
#             ratio = np.max(label)
#             ratio = max(ratio, np.max(ref))
#             # Normalize data
#             label *= 255.0 / ratio
#             masked *= 255.0 / ratio
#             ref *= 255.0 / ratio
            
            dLabel[:,:,b] = label.astype('float32')
            dMasked[:,:,b] = masked.astype('float32')
            dRef[:,:,b] = ref.astype('float32')
            
        for b in range(nBands):
            p += 1
            
            lLabel[p,:,:,0] = dLabel[:,:,b]
            lMask[p,:,:] = _mask[:,:]
            lMasked[p,:,:,0] = dMasked[:,:,b]
            lRef[p,:,:,0] = dRef[:,:,b]
            

In [35]:
f = h5py.File('test1608_unscale.hdf5', 'w')
f.create_dataset('label',data=lLabel)
f.create_dataset('mask',data=lMask)
f.create_dataset('masked',data=lMasked)
f.create_dataset('ref',data=lRef)
f.close()