In [23]:
# Tensorflow setup.
import tensorflow as tf
print(tf.__version__)

2.11.0


Set global variables

In [38]:
# Specify names locations for outputs in Google Cloud.
BUCKET = 'cs6140'
FOLDER = 'dataset'
TRAINING_BASE = 'training_patches'
EVAL_BASE = 'eval_patches'

# Specify feature bands to the model and the response variable.
MODIS_BANDS = ['EVI']
COPERNICUS_BANDS = ['discrete_classification', 'forest_type']
TERRA_BANDS = ['Percent_Tree_Cover', 'Percent_NonTree_Vegetation']
BANDS = MODIS_BANDS + COPERNICUS_BANDS + TERRA_BANDS
RESPONSE = 'annualNPP'
FEATURES = BANDS + [RESPONSE]

# Specify the size and shape of patches (256x256 pixels images) expected by the model.
KERNEL_SIZE = 256
KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]

# Columns for input features and response
COLUMNS = [
  # Configuration for parsing a fixed-length input feature.
  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES
]
# Label each column with feature name by dictionary
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

# Sizes of the training and evaluation datasets.
# TODO: modify as needed
TRAIN_SIZE = 10000
EVAL_SIZE = 5000

# Specify model training parameters.
# TODO: modify as needed
BATCH_SIZE = 16
EPOCHS = 2
BUFFER_SIZE = 500
OPTIMIZER = 'Adam'
LOSS = 'MeanSquaredError'
METRICS = ['RootMeanSquaredError']


Load the data exported from Earth Engine into a tf.data.Dataset.

In [25]:
def parse_tfrecord(example_proto):
  """The parsing function.
  Read a serialized example into the structure defined by FEATURES_DICT.
  Args:
    example_proto: a serialized Example.
  Returns:
    A dictionary of tensors, keyed by feature name.
  """
  return tf.io.parse_single_example(example_proto, FEATURES_DICT)


def to_tuple(inputs):
  """Function to convert a dictionary of tensors to a tuple of (inputs, outputs).
  Turn the tensors returned by parse_tfrecord into a stack in HWC shape.
  Args:
    inputs: A dictionary of tensors, keyed by feature name.
  Returns:
    A tuple of (inputs, outputs).
  """
  inputsList = [inputs.get(key) for key in FEATURES]
  stacked = tf.stack(inputsList, axis=0)
  # Convert from CHW to HWC
  stacked = tf.transpose(stacked, [1, 2, 0])
  return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]


def get_dataset(pattern):
  """Function to read, parse and format to tuple a set of input tfrecord files.
  Get all the files matching the pattern, parse and convert to tuple.
  Args:
    pattern: A file pattern to match in a Cloud Storage bucket.
  Returns:
    A tf.data.Dataset
  """
  glob = tf.io.gfile.glob(pattern)
  dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')
  dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)
  dataset = dataset.map(to_tuple, num_parallel_calls=5)
  return dataset

Use the helpers to read in the training dataset. Print the first record to check.

In [26]:
def get_training_dataset():
	"""Get the preprocessed training dataset
  Returns:
    A tf.data.Dataset of training data.
  """
	# directory for Google Drive
	# root_dir = 'drive/My Drive/'
	# glob = root_dir + FOLDER + '/' + 'training_patches' + '*'
	glob = 'gs://' + BUCKET + '/' + FOLDER + '/' + TRAINING_BASE + '*'
	dataset = get_dataset(glob)
	# shuffle in n iterations, random pick one element from buffer in each iteration
	# batch in size BATCH_SIZE
	# repeat when all element are comsumed
	dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
	return dataset

training = get_training_dataset()
print(iter(training.take(1)).next())

(<tf.Tensor: shape=(16, 256, 256, 5), dtype=float32, numpy=
array([[[[0.6507627 , 0.57      , 0.8       , 0.85714287, 0.21428572],
         [0.65923625, 0.57      , 0.8       , 0.86904764, 0.20408164],
         [0.67189103, 0.57      , 0.8       , 0.79761904, 0.26530612],
         ...,
         [0.70453155, 0.57      , 0.8       , 0.9047619 , 0.1632653 ],
         [0.70277715, 0.57      , 0.8       , 0.9047619 , 0.14285715],
         [0.6945665 , 0.57      , 0.8       , 0.9047619 , 0.15306123]],

        [[0.66343766, 0.57      , 0.8       , 0.8452381 , 0.2244898 ],
         [0.6745504 , 0.57      , 0.8       , 0.8333333 , 0.23469388],
         [0.6855772 , 0.57      , 0.8       , 0.85714287, 0.21428572],
         ...,
         [0.702039  , 0.57      , 0.8       , 0.6904762 , 0.33673468],
         [0.7004313 , 0.57      , 0.8       , 0.89285713, 0.1632653 ],
         [0.7054416 , 0.57      , 0.8       , 0.9047619 , 0.14285715]],

        [[0.6737162 , 0.57      , 0.8       , 0.8333333 

In [27]:
def get_eval_dataset():
	"""Get the preprocessed evaluation dataset
  Returns:
    A tf.data.Dataset of evaluation data.
  """
	# root_dir = 'drive/My Drive/'
	# glob = root_dir + FOLDER + '/' + 'eval_patches' + '*'
	glob = 'gs://' + BUCKET + '/' + FOLDER + '/' + EVAL_BASE + '*'
	dataset = get_dataset(glob)
	dataset = dataset.batch(1).repeat()
	return dataset

evaluation = get_eval_dataset()
evaluation

<RepeatDataset element_spec=(TensorSpec(shape=(None, 256, 256, 5), dtype=tf.float32, name=None), TensorSpec(shape=(None, 256, 256, 1), dtype=tf.float32, name=None))>

Keras implementation of the U-Net model.

In [28]:
from keras.models import *
from keras.layers import *
from keras import metrics
from keras import optimizers
from keras import losses

# TODO: test other activation functions

def conv_block(input_tensor, num_filters):
	encoder = Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
	encoder = BatchNormalization()(encoder)
	encoder = Activation('relu')(encoder)
	encoder = Conv2D(num_filters, (3, 3), padding='same')(encoder)
	encoder = BatchNormalization()(encoder)
	encoder = Activation('relu')(encoder)
	return encoder

def encoder_block(input_tensor, num_filters):
	encoder = conv_block(input_tensor, num_filters)
	encoder_pool = MaxPooling2D((2, 2), strides=(2, 2))(encoder)
	return encoder_pool, encoder

def decoder_block(input_tensor, concat_tensor, num_filters):
	decoder = Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
	decoder = concatenate([concat_tensor, decoder], axis=-1)
	decoder = BatchNormalization()(decoder)
	decoder = Activation('relu')(decoder)
	decoder = Conv2D(num_filters, (3, 3), padding='same')(decoder)
	decoder = BatchNormalization()(decoder)
	decoder = Activation('relu')(decoder)
	decoder = Conv2D(num_filters, (3, 3), padding='same')(decoder)
	decoder = BatchNormalization()(decoder)
	decoder = Activation('relu')(decoder)
	return decoder

def get_model():
	inputs = Input(shape=[None, None, len(BANDS)]) # 256
	encoder0_pool, encoder0 = encoder_block(inputs, 32) # 128
	encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64) # 64
	encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128) # 32
	encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256) # 16
	encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512) # 8
	center = conv_block(encoder4_pool, 1024) # center
	decoder4 = decoder_block(center, encoder4, 512) # 16
	decoder3 = decoder_block(decoder4, encoder3, 256) # 32
	decoder2 = decoder_block(decoder3, encoder2, 128) # 64
	decoder1 = decoder_block(decoder2, encoder1, 64) # 128
	decoder0 = decoder_block(decoder1, encoder0, 32) # 256
	outputs = Conv2D(1, (1, 1), activation='sigmoid')(decoder0)

	model = Model(inputs=[inputs], outputs=[outputs])

	model.compile(
		optimizer=optimizers.get(OPTIMIZER),
		loss=losses.get(LOSS),
		metrics=[metrics.get(metric) for metric in METRICS])

	return model

The model summary

In [29]:
m = get_model()
m.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, None, None,  0           []                               
                                 5)]                                                              
                                                                                                  
 conv2d_46 (Conv2D)             (None, None, None,   1472        ['input_3[0][0]']                
                                32)                                                               
                                                                                                  
 batch_normalization_54 (BatchN  (None, None, None,   128        ['conv2d_46[0][0]']              
 ormalization)                  32)                                                         

path to save model parameters

In [30]:
import os
checkpoint_path = 'training_1/cp.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_path)
checkpoint_dir

'training_1'

callback function that saves the model weights

In [31]:
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

Train the model

In [None]:
m.fit(
    x=training,
    epochs=EPOCHS,
    steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE),
    validation_data=evaluation,
    validation_steps=EVAL_SIZE)

m.save_weights('./final/normalized')

Epoch 1/2
Epoch 2/2

In [37]:
# Restore the weights
m.load_weights('./final/normalized')

# Evaluate the model
loss, acc = m.evaluate(x=evaluation, verbose=2, steps=625)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))

625/625 - 12s - loss: 0.0132 - root_mean_squared_error: 0.1148 - 12s/epoch - 20ms/step
Restored model, accuracy: 11.48%
