In [84]:
'''
/*Copyright (c) 2021 SERVIR-Mekong

https://mygeoblog.com/2021/04/12/using-convolutional-neural-networks-part-1/
   
Permission is hereby granted, free of charge, to any person obtaining a copy
of the data and associated documentation files, to deal in the data
without restriction, including without limitation the rights to use, copy, modify,
merge, publish, distribute, sublicense, and/or sell copies, and to permit persons
to whom the data is furnished to do so, subject to the following conditions:
   
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
   
THE DATA IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''
 
import ee
import geemap
from time import sleep
import math
import numpy as np
import random
from numpy.random import seed
from numpy.random import rand
 
ee.Initialize()
 

In [85]:
# import the label image collection
sparcs = ee.ImageCollection('users/andreanascetti/IEEE/Nice_Labels')

Map = geemap.Map()
Map.centerObject(sparcs)
palette = [
  '000000',
  'FF5733', #(0)  Urban fabric
  'F98A04', #(1)  Industrial, commercial, public, military, private and transport units
  'FBDB10', #(2)  Mine, dump and construction sites
  'B7E603', #(3)  Artificial non-agricultural vegetated areas
  'B6FF45', #(4)  Arable land (annual crops)
  '5FB331', #(5)  Permanent crops
  '3FAF17', #(6)  Pastures
  'FCE5CD', #()complex and mixed...
  'C6ECB6', #()orchards at the fringe
  '237605', #(7)  Forests
  '43B680', #(8)  Herbaceous vegetation associations
  '9B621A', #(9)  Open spaces with little or no vegetation
  '0691C3', #(10)  Wetlands
  '085CB8', #(11)  Water
  '000000',
]
label_params = {'min':0, 'max':15, 'palette':palette}
Map.addLayer(sparcs,label_params,"Labels_Nice")

Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

In [86]:
period_of_interest = ee.Filter.date('2017-01-01', '2019-12-31')
# Map.user_rois.getInfo()
Area_Nice = ee.Geometry.Polygon(
        [[[6.560201, 43.526958],[6.560201, 44.371294],[7.51888, 44.371294],[7.51888, 43.526958],[6.560201, 43.526958]]])

In [87]:
# This is the cloud masking function provided by GEE but adapted for use in Python.
def maskS2clouds(image):
    qa = image.select('QA60')

    # Bits 10 and 11 are clouds and cirrus, respectively.
    cloudBitMask = 1 << 10 # 1024
    cirrusBitMask = 1 << 11 # 2048

    # Both flags should be set to zero, indicating clear conditions.
    mask = qa.bitwiseAnd(cloudBitMask).eq(0)
    mask = mask.bitwiseAnd(cirrusBitMask).eq(0)
    
    helper = image.updateMask(mask).divide(10000)
    helper = ee.Image(helper.copyProperties(image, properties=["system:time_start"]))
    #ee.Image(helper.copyProperties(image, properties=["CLOUD_COVERAGE_PERCENTAGE"]))

    return helper

In [88]:
# Produces a kernel of a given sized for sampling in GEE
def get_kernel (kernel_size):
    eelist = ee.List.repeat(1, kernel_size)
    lists = ee.List.repeat(eelist, kernel_size)
    kernel = ee.Kernel.fixed(kernel_size, kernel_size, lists)
    return kernel

In [99]:
# Define kernel size 
kernel_size = 256
image_kernel = get_kernel(kernel_size) # 256x256 matrix of ones
 
# Specify inputs (Landsat bands) to the model and the response variable.
opticalBands = ["B2","B3","B4","B5","B6","B7","B8","B8A","B11","B12"]
 
BANDS = opticalBands
RESPONSE = ['cloud','shadow','snow','water','land']
FEATURES = BANDS + RESPONSE

Map = geemap.Map()
vis_yellow = {'color': 'f5e105ff'} # yellow
vis_green = {'color': '00FF00'} # green
vis_blue = {'color': '0000FF'} # blue

vis_s2 = {
    'bands': ['B4', 'B3', 'B2'],
    'min': 0,
    'max': 0.3,
}



for i in range(14,15):#,80,1):
     
    # get the sparc image
    sparcIm = ee.Image(sparcs.toList(100).get(i))#.select(['b1','b2','b3','b4','b5'],RESPONSE) # select([inputBandNames],[newBandNames])
    Map.centerObject(sparcIm)
    
    # generate a negative buffer should be at least 128 pixels x 30 meter
    geom = sparcIm.geometry().buffer(-40) # in meter, negative means the geom is contracted
    #Map.addLayer(geom,{'color': 'FF0000'},'geom {}'.format(i))
    
    # create training, testing and validation points, numbers 31 and 17 are used for the random seed.
    pointsTrain = ee.FeatureCollection.randomPoints(geom, 7,i) # (region, number of points, seed, maxError)
    pointsTest = ee.FeatureCollection.randomPoints(geom, 2,i*31)
    pointsVal = ee.FeatureCollection.randomPoints(geom, 1,i*17)
    
    # get the S2 imagery of the SPARC imagery
    dataset = (
        ee.ImageCollection('COPERNICUS/S2_SR') 
        .filterBounds(sparcIm.geometry())
        .filter(period_of_interest)
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE',10)) 
        .map(maskS2clouds)
        .select(opticalBands)
    )
    s2Image = dataset.first().clip
    Map.addLayer(s2Image)
     
    # combine the image with the sparc image
    image = s2Image.addBands(sparcIm)#.unmask(0,False)
        
    
    # create the neighborhood kernel for sampling
    neighborhood = image.neighborhoodToArray(image_kernel) # image
    print(neighborhood.getInfo().keys())
    print(neighborhood.bandNames().getInfo()) # ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12', 'b1']
    #Map.addLayer(neighborhood,{},'neighborhood')
 
    # sample the training, testing and validation 
    trainingDataTrain = neighborhood.sample(region = pointsTrain,scale= 30,tileScale= 16, geometries= True) # <class 'ee.featurecollection.FeatureCollection'>
    trainingDataTest = neighborhood.sample(region = pointsTest, scale= 30,tileScale= 16, geometries= True)
    trainingDataVal = neighborhood.sample(region = pointsVal,scale= 30,tileScale= 16, geometries= True)
    
    #print(trainingDataVal.getInfo().keys())
    
    
    
    #Map.addLayer(image,vis_s2,'image {}'.format(i))
    
    Map.addLayer(sparcIm,label_params,'labels {}'.format(i))
    
    Map.addLayer(pointsTrain,vis_yellow,'points Train {}'.format(i))
    #Map.addLayer(pointsTest,vis_green,'points Test {}'.format(i))
    #Map.addLayer(pointsVal,vis_blue,'points Val {}'.format(i))
    
    
    
    # export training, test, val patches as TFRecord

    
'''    trainingTaskTrain = ee.batch.Export.table.toCloudStorage(collection= ee.FeatureCollection(trainingDataTrain),
                        description= "trainpatch"+str(i),
                        fileNamePrefix= folder+ trainFilePrefix,
                        bucket= outputBucket,
                        fileFormat= 'TFRecord',
                        selectors= FEATURES)'''
    # execute the tasts
#     trainingTaskTrain.start()
#     trainingTaskTest.start()
#     trainingTaskVal.start()

Map     
    

dict_keys(['type', 'bands', 'properties'])
['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12', 'b1']


Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

In [None]:
# Tensorflow setup.
import tensorflow as tf
print(tf.__version__)
 
from tensorflow.python.keras import layers
from tensorflow.python.keras import losses
from tensorflow.python.keras import models
from tensorflow.python.keras import metrics
from tensorflow.python.keras import optimizers
 

In [82]:
def parse_tfrecord(example_proto):
    """The parsing function.
    Read a serialized example into the structure defined by FEATURES_DICT.
    Args:
    example_proto: a serialized Example.
    Returns:
    A dictionary of tensors, keyed by feature name.
    """
    return tf.io.parse_single_example(example_proto, FEATURES_DICT)

def to_tuple(inputs):
    """Function to convert a dictionary of tensors to a tuple of (inputs, outputs).
    Turn the tensors returned by parse_tfrecord into a stack in HWC shape.
    Args:
    inputs: A dictionary of tensors, keyed by feature name.
    Returns:
    A tuple of (inputs, outputs).
    """
    inputsList = [inputs.get(key) for key in FEATURES]
    stacked = tf.stack(inputsList, axis=0)
    # Convert from CHW to HWC
    stacked = tf.transpose(stacked, [1, 2, 0])
    return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]

def get_dataset(pattern):
    """Function to read, parse and format to tuple a set of input tfrecord files.
    Get all the files matching the pattern, parse and convert to tuple.
    Args:
    pattern: A file pattern to match in a Cloud Storage bucket.
    Returns:
    A tf.data.Dataset
    """
    glob = tf.io.gfile.glob(pattern)
    dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')
    dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)
    dataset = dataset.map(to_tuple, num_parallel_calls=5)
    return dataset

def get_training_dataset(glob,eval=True):
    """Get the preprocessed training dataset
    Returns: 
    A tf.data.Dataset of training data.
    """
    dataset = get_dataset(glob)
    if eval:
        dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
    else:
        dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
    return dataset

def conv_block(input_tensor, num_filters):
    encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
    encoder = layers.BatchNormalization()(encoder)
    encoder = layers.Activation('relu')(encoder)
    encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)
    encoder = layers.BatchNormalization()(encoder)
    encoder = layers.Activation('relu')(encoder)
    return encoder

def encoder_block(input_tensor, num_filters):
    encoder = conv_block(input_tensor, num_filters)
    encoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)
    return encoder_pool, encoder
 
def decoder_block(input_tensor, concat_tensor, num_filters):
    decoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
    decoder = layers.concatenate([concat_tensor, decoder], axis=-1)
    decoder = layers.BatchNormalization()(decoder)
    decoder = layers.Activation('relu')(decoder)
    decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
    decoder = layers.BatchNormalization()(decoder)
    decoder = layers.Activation('relu')(decoder)
    decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
    decoder = layers.BatchNormalization()(decoder)
    decoder = layers.Activation('relu')(decoder)
    return decoder
 
def get_model():
    n = 4
    inputs = layers.Input(shape=[None, None, len(BANDS)]) 
    encoder0_pool, encoder0 = encoder_block(inputs, n) 
    encoder1_pool, encoder1 = encoder_block(encoder0_pool, n*2) 
    encoder2_pool, encoder2 = encoder_block(encoder1_pool, n*4)
    encoder3_pool, encoder3 = encoder_block(encoder2_pool, n*8)
    encoder4_pool, encoder4 = encoder_block(encoder3_pool, n*16)
    center = conv_block(encoder4_pool, n*32) # center
    decoder4 = decoder_block(center, encoder4, n*16) 
    decoder3 = decoder_block(decoder4, encoder3, n*8) 
    decoder2 = decoder_block(decoder3, encoder2, n*4)
    decoder1 = decoder_block(decoder2, encoder1, n*2)
    decoder0 = decoder_block(decoder1, encoder0, n)
    outputs = layers.Conv2D(5, (1, 1), activation='softmax')(decoder0)
 
    model = models.Model(inputs=[inputs], outputs=[outputs])
 
    print(model.summary())
 
    model.compile(
        optimizer = 'ADAM', 
        loss = losses.categorical_crossentropy,
        metrics = [metrics.categorical_accuracy])
 
    return model

In [None]:
KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
COLUMNS = [tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))
 
# Specify model training parameters.
TRAIN_SIZE = 6000 # train size is 80*100*0.7
BATCH_SIZE = 16
EPOCHS = 16
BUFFER_SIZE = 4000 
   
# location of the gcp bucket
data_path = "/path/to/data/"
 
# import the training, testing and validation records
training_files = data_path + '/training/train*'
testing_files = data_path + '/testing/test*'
validation_files = data_path + '/validation/val*' 

training_ds = get_training_dataset(training_files)
testing_ds = get_training_dataset(testing_files)  
validation_ds = get_training_dataset(validation_files,False)

model = get_model()
 
model.fit(
   x = training_ds, 
   epochs = EPOCHS, 
   steps_per_epoch =int(TRAIN_SIZE / BATCH_SIZE), 
   validation_data = testing_ds,
   validation_steps = 100)
 
print(model.evaluate(x=validation_ds))
 
# save model
MODEL_DIR = '/path/to/save/qamodel/'
tf.saved_model.save(model, MODEL_DIR)