In [4]:
# Tensorflow setup.
import tensorflow as tf
print(tf.__version__)

2.11.0


Set global variables

In [5]:
# Specify names locations for outputs in Google Cloud.
BUCKET = 'cs6140'
FOLDER = 'dataset'
TRAINING_BASE = 'training_patches'
EVAL_BASE = 'eval_patches'

# Specify feature bands to the model and the response variable.
# TODO: explore more bands
BANDS = ['NDVI', 'EVI']
RESPONSE = 'annualNPP'
FEATURES = BANDS + [RESPONSE]

# Specify the size and shape of patches (256x256 pixels images) expected by the model.
KERNEL_SIZE = 256
KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]

# Columns for input features and response
COLUMNS = [
  # Configuration for parsing a fixed-length input feature.
  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES
]
# Label each column with feature name by dictionary
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

# Sizes of the training and evaluation datasets.
# TODO: modify as needed
TRAIN_SIZE = 16000
EVAL_SIZE = 8000

# Specify model training parameters.
# TODO: modify as needed
BATCH_SIZE = 16
EPOCHS = 10
BUFFER_SIZE = 2000
OPTIMIZER = 'Adam'
LOSS = 'MeanSquaredError'
METRICS = ['RootMeanSquaredError']

Load the data exported from Earth Engine into a tf.data.Dataset.

In [6]:
def parse_tfrecord(example_proto):
  """The parsing function.
  Read a serialized example into the structure defined by FEATURES_DICT.
  Args:
    example_proto: a serialized Example.
  Returns:
    A dictionary of tensors, keyed by feature name.
  """
  return tf.io.parse_single_example(example_proto, FEATURES_DICT)


def to_tuple(inputs):
  """Function to convert a dictionary of tensors to a tuple of (inputs, outputs).
  Turn the tensors returned by parse_tfrecord into a stack in HWC shape.
  Args:
    inputs: A dictionary of tensors, keyed by feature name.
  Returns:
    A tuple of (inputs, outputs).
  """
  inputsList = [inputs.get(key) for key in FEATURES]
  stacked = tf.stack(inputsList, axis=0)
  # Convert from CHW to HWC
  stacked = tf.transpose(stacked, [1, 2, 0])
  return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]


def get_dataset(pattern):
  """Function to read, parse and format to tuple a set of input tfrecord files.
  Get all the files matching the pattern, parse and convert to tuple.
  Args:
    pattern: A file pattern to match in a Cloud Storage bucket.
  Returns:
    A tf.data.Dataset
  """
  glob = tf.io.gfile.glob(pattern)
  dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')
  dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)
  dataset = dataset.map(to_tuple, num_parallel_calls=5)
  return dataset

Use the helpers to read in the training dataset. Print the first record to check.

In [7]:
def get_training_dataset():
	"""Get the preprocessed training dataset
  Returns:
    A tf.data.Dataset of training data.
  """
	# directory for Google Drive
	# root_dir = 'drive/My Drive/'
	# glob = root_dir + FOLDER + '/' + 'training_patches' + '*'
	glob = 'gs://' + BUCKET + '/' + FOLDER + '/' + TRAINING_BASE + '*'
	dataset = get_dataset(glob)
	# shuffle in n iterations, random pick one element from buffer in each iteration
	# batch in size BATCH_SIZE
	# repeat when all element are comsumed
	dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
	return dataset

training = get_training_dataset()
print(iter(training.take(1)).next())

2023-07-26 00:26:48.307385: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-07-26 00:26:48.391188: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-07-26 00:26:48.392887: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-07-26 00:26:48.397908: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild

(<tf.Tensor: shape=(16, 256, 256, 2), dtype=float32, numpy=
array([[[[4386., 2099.],
         [3586., 1647.],
         [3367., 1418.],
         ...,
         [2900., 1570.],
         [2916., 1802.],
         [5015., 1972.]],

        [[2874., 1594.],
         [2807., 1381.],
         [3077., 1439.],
         ...,
         [4507., 1814.],
         [4507., 1814.],
         [4507., 1814.]],

        [[2463., 1327.],
         [2463., 1327.],
         [2570., 1250.],
         ...,
         [4583., 2001.],
         [4637., 2238.],
         [4637., 2238.]],

        ...,

        [[4078., 2165.],
         [4302., 2294.],
         [4602., 2238.],
         ...,
         [3833., 1800.],
         [3833., 1800.],
         [3660., 1700.]],

        [[5387., 2658.],
         [5387., 2658.],
         [5132., 2534.],
         ...,
         [3702., 1754.],
         [3832., 2146.],
         [3832., 2146.]],

        [[5858., 2696.],
         [5940., 2789.],
         [5940., 2789.],
         ...,
       

In [11]:
def get_eval_dataset():
	"""Get the preprocessed evaluation dataset
  Returns:
    A tf.data.Dataset of evaluation data.
  """
	# root_dir = 'drive/My Drive/'
	# glob = root_dir + FOLDER + '/' + 'eval_patches' + '*'
	glob = 'gs://' + BUCKET + '/' + FOLDER + '/' + EVAL_BASE + '*'
	dataset = get_dataset(glob)
	dataset = dataset.batch(1).repeat()
	return dataset

evaluation = get_eval_dataset()
evaluation

<RepeatDataset element_spec=(TensorSpec(shape=(None, 256, 256, 2), dtype=tf.float32, name=None), TensorSpec(shape=(None, 256, 256, 1), dtype=tf.float32, name=None))>

Keras implementation of the U-Net model.

In [8]:
from keras.models import *
from keras.layers import *
from keras import metrics
from keras import optimizers
from keras import losses

# TODO: test other activation functions

def conv_block(input_tensor, num_filters):
	encoder = Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
	encoder = BatchNormalization()(encoder)
	encoder = Activation('relu')(encoder)
	encoder = Conv2D(num_filters, (3, 3), padding='same')(encoder)
	encoder = BatchNormalization()(encoder)
	encoder = Activation('relu')(encoder)
	return encoder

def encoder_block(input_tensor, num_filters):
	encoder = conv_block(input_tensor, num_filters)
	encoder_pool = MaxPooling2D((2, 2), strides=(2, 2))(encoder)
	return encoder_pool, encoder

def decoder_block(input_tensor, concat_tensor, num_filters):
	decoder = Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
	decoder = concatenate([concat_tensor, decoder], axis=-1)
	decoder = BatchNormalization()(decoder)
	decoder = Activation('relu')(decoder)
	decoder = Conv2D(num_filters, (3, 3), padding='same')(decoder)
	decoder = BatchNormalization()(decoder)
	decoder = Activation('relu')(decoder)
	decoder = Conv2D(num_filters, (3, 3), padding='same')(decoder)
	decoder = BatchNormalization()(decoder)
	decoder = Activation('relu')(decoder)
	return decoder

def get_model():
	inputs = Input(shape=[None, None, len(BANDS)]) # 256
	encoder0_pool, encoder0 = encoder_block(inputs, 32) # 128
	encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64) # 64
	encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128) # 32
	encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256) # 16
	encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512) # 8
	center = conv_block(encoder4_pool, 1024) # center
	decoder4 = decoder_block(center, encoder4, 512) # 16
	decoder3 = decoder_block(decoder4, encoder3, 256) # 32
	decoder2 = decoder_block(decoder3, encoder2, 128) # 64
	decoder1 = decoder_block(decoder2, encoder1, 64) # 128
	decoder0 = decoder_block(decoder1, encoder0, 32) # 256
	outputs = Conv2D(1, (1, 1), activation='sigmoid')(decoder0)

	model = Model(inputs=[inputs], outputs=[outputs])

	model.compile(
		optimizer=optimizers.get(OPTIMIZER),
		loss=losses.get(LOSS),
		metrics=[metrics.get(metric) for metric in METRICS])

	return model

The model summary

In [9]:
m = get_model()
m.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, None,  0           []                               
                                 2)]                                                              
                                                                                                  
 conv2d (Conv2D)                (None, None, None,   608         ['input_1[0][0]']                
                                32)                                                               
                                                                                                  
 batch_normalization (BatchNorm  (None, None, None,   128        ['conv2d[0][0]']                 
 alization)                     32)                                                           

Train the model

In [12]:
m.fit(
    x=training,
    epochs=EPOCHS,
    steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE),
    validation_data=evaluation,
    validation_steps=EVAL_SIZE)

m.save_weights('./params/normalized')

Epoch 1/10


2023-07-26 00:35:45.107647: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1439 of 2000
2023-07-26 00:35:49.055869: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.
2023-07-26 00:35:51.219446: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8200
2023-07-26 00:36:02.507282: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7f1ce4007420 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-07-26 00:36:02.507333: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2023-07-26 00:36:02.557073: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-07-26 00:36:03.406221: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster usin



2023-07-26 00:41:42.642749: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1015 of 2000




2023-07-26 00:41:49.183904: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


Epoch 2/10

2023-07-26 00:49:32.877956: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1283 of 2000
2023-07-26 00:49:37.135057: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


Epoch 3/10
 220/1000 [=====>........................] - ETA: 5:32 - loss: 0.0133 - root_mean_squared_error: 0.1155

2023-07-26 00:57:12.949364: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1011 of 2000


 229/1000 [=====>........................] - ETA: 5:28 - loss: 0.0133 - root_mean_squared_error: 0.1154

2023-07-26 00:57:20.274686: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.




2023-07-26 01:02:20.566877: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1100 of 2000




2023-07-26 01:02:28.557129: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


Epoch 4/10

2023-07-26 01:09:59.603406: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1100 of 2000




2023-07-26 01:10:08.837919: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


Epoch 5/10

2023-07-26 01:17:38.489294: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1106 of 2000




2023-07-26 01:17:46.538199: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


Epoch 6/10
 161/1000 [===>..........................] - ETA: 5:56 - loss: 0.0127 - root_mean_squared_error: 0.1128

2023-07-26 01:24:54.444970: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1100 of 2000


 181/1000 [====>.........................] - ETA: 5:48 - loss: 0.0127 - root_mean_squared_error: 0.1129

2023-07-26 01:25:02.912463: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.




2023-07-26 01:30:11.279791: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 917 of 2000




2023-07-26 01:30:20.810722: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


Epoch 7/10

2023-07-26 01:37:50.484579: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1087 of 2000




2023-07-26 01:37:58.866174: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


Epoch 8/10

2023-07-26 01:45:06.798235: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 818 of 2000




2023-07-26 01:45:16.799730: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1740 of 2000




2023-07-26 01:45:19.248689: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


Epoch 9/10
 132/1000 [==>...........................] - ETA: 6:14 - loss: 0.0121 - root_mean_squared_error: 0.1099

2023-07-26 01:52:46.643252: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1055 of 2000


 153/1000 [===>..........................] - ETA: 6:05 - loss: 0.0121 - root_mean_squared_error: 0.1101

2023-07-26 01:52:55.339778: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.




2023-07-26 01:58:03.834192: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 892 of 2000




2023-07-26 01:58:13.825523: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1730 of 2000




2023-07-26 01:58:16.144478: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


Epoch 10/10

2023-07-26 02:05:44.255915: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 551 of 2000




2023-07-26 02:05:53.232844: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1236 of 2000




2023-07-26 02:06:03.233074: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 1928 of 2000
2023-07-26 02:06:03.827760: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.




In [15]:
# Restore the weights
m.load_weights('./params/normalized')

# Evaluate the model
loss, acc = m.evaluate(x=evaluation, verbose=2, steps=1000)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))

1000/1000 - 16s - loss: 0.0166 - root_mean_squared_error: 0.1290 - 16s/epoch - 16ms/step
Restored model, accuracy: 12.90%
