In [1]:
# Copyright 2024 NASA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Rice mapping in Bhutan with U-Net using high resolution satellite imagery

### This notebook shows an example of using the TFRecord images for prediction using saved U-Net Model.

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/SERVIR/servir-aces/blob/main/notebook/prediction_unet.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/SERVIR/servir-aces/blob/main/notebook/prediction_unet.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
</table>
</br>
</br>
</br>

This notebook is also available in this github repo: https://github.com/SERVIR/servir-aces. Navigate to the `notebooks` folder.

## Setup environment

In [2]:
!pip install servir-aces

Collecting servir-aces
  Downloading servir_aces-0.0.14-py2.py3-none-any.whl (32 kB)
Collecting python-dotenv>=1.0.0 (from servir-aces)
  Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv, servir-aces
Successfully installed python-dotenv-1.0.1 servir-aces-0.0.14


In [3]:
!git clone https://github.com/SERVIR/servir-aces

Cloning into 'servir-aces'...
remote: Enumerating objects: 731, done.[K
remote: Counting objects: 100% (90/90), done.[K
remote: Compressing objects: 100% (50/50), done.[K
remote: Total 731 (delta 55), reused 40 (delta 40), pack-reused 641[K
Receiving objects: 100% (731/731), 3.35 MiB | 10.63 MiB/s, done.
Resolving deltas: 100% (478/478), done.


Now the repo is downloaded. We will create an environment file file to place point to our training data and customize parameters for the model. To do this, we make a copy of the `.env.example` file provided.

Under the hood, all the configuration provided via the environment file are parsed as a config object and can be accessed programatically.

Note current version does not expose all the model intracacies through the environment file but future version may include those depending on the need.

In [4]:
!cp servir-aces/.env.example servir-aces/config.env

In [5]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


## Setup config file variables

Okay, now we have the `config.env` file, we will use this to provide our environments and parameters.

Note there are several parameters that can be changed. Let's start by changing the BASEDIR as below. Also since we already have the model, let's specify that path using the `OUTPUT_DIR`.

```
BASEDIR = "/content/"
OUTPUT_DIR = "/content/drive/MyDrive/Colab Notebooks/DL_Book/Chapter_1/output"
```

For the prediction, we are using growing season and pre-growing season information. Thus, we have 8 optical bands, namely `red_before`, `green_before`, `blue_before`, `nir_before`, `red_during`, `green_during`, `blue_during`, and  `nir_during`. In adidition, you can use `USE_ELEVATION` and `USE_S1` config to include the topographic and radar information. Since currently we are not including these, so we won't be settting these config values.

Similarly, we are using 256x256 pixels, so let's also change that. In addition, if you want to keep buffer on the export images buffer for prediction purpose, you can use `KERNEL_BUFFER` to specify that. Half this will extend on the sides of each patch. You can specify the size as tupe (e.g. 72 x 72). If zero is used; it will not buffer. I will keep this to zero one this one.

```
# For model training, USE_ELEVATION extends FEATURES with "elevation" & "slope"
# USE_S1 extends FEATURES with "vv_asc_before", "vh_asc_before", "vv_asc_during", "vh_asc_during",
# "vv_desc_before", "vh_desc_before", "vv_desc_during", "vh_desc_during"
# In case these are not useful and you have other bands in your training data, you can do set
# USE_ELEVATION and USE_S1 to False and update FEATURES to include needed bands
USE_ELEVATION = False
USE_S1 = False

PATCH_SHAPE = (256, 256)

KERNEL_BUFFER = 0
```

Next, we will specify the `MODEL_DIR_NAME`. The `MODEL_DIR` is then constructed as
MODEL_DIR = OUTPUT_DIR / MODEL_DIR_NAME. The `MODEL_DIR_NAME` in my case is `unet_v1`, so we will use that. Similarly, you can specify your output of the file from the prediction using `OUTPUT_NAME` variable. Other config to change are `GCS_PROJECT`, `GCS_BUCKET`, `GCS_IMAGE_DIR`, and `GCS_IMAGE_PREFIX` (for prediction image direction, see this [notebook](https://colab.research.google.com/drive/1MZexam3GZKsQySQO9Jk_RPNyyMLmciEq?usp=drive_link)). For exporting our prediction to the GEE Asset, we will use `EE_OUTPUT_ASSET` to update it.

```
MODEL_DIR_NAME = "unet_v1"
OUTPUT_NAME = "prediction_unet_v1"
GCS_BUCKET = "dl-book"

# GCS settings
GCS_PROJECT = "servir-ee"
# prediction image directory
GCS_IMAGE_DIR = "chapter-1/images"
# prediction image prefix
GCS_IMAGE_PREFIX = "image_2021"

# where the prediction output will be stored
EE_OUTPUT_ASSET = "projects/servir-ee/assets/dl-book/chapter-1/prediction"
```

## Update the config file programtically

Let's make a dictionary so we can change these config settings programatically.

In [6]:
BASEDIR = "/content/" # @param {type:"string"}
OUTPUT_DIR = "/content/drive/MyDrive/Colab Notebooks/DL_Book/Chapter_1/output" # @param {type:"string"}
# DATADIR = "datasets/dnn_planet_wo_indices" # @param {type:"string"}
# PATCH_SHAPE, USE_ELEVATION, USE_S1
# BATCH_SIZE, EPOCHS are converted to their appropriate type.
USE_ELEVATION = "False" # @param {type:"string"}
USE_S1 = "False" # @param {type:"string"}
PATCH_SHAPE = "(256, 256)" # @param {type:"string"}
KERNEL_BUFFER = "0" # @param {type:"string"}

MODEL_DIR_NAME = "unet_v1" # @param {type:"string"}
OUTPUT_NAME = "prediction_unet_v1" # @param {type:"string"}
GCS_BUCKET = "dl-book" # @param {type:"string"}
MODEL_TYPE = "unet" # @param {type:"string"}

# GCS settings
GCS_PROJECT = "servir-ee" # @param {type:"string"}
# prediction image directory
GCS_IMAGE_DIR = "chapter-1/images" # @param {type:"string"}
# prediction image prefix
GCS_IMAGE_PREFIX = "image_2021" # @param {type:"string"}

# where the prediction output will be stored
EE_OUTPUT_ASSET = "projects/servir-ee/assets/dl-book/chapter-1/prediction" # @param {type:"string"}



In [7]:
config_settings = {
    "BASEDIR" : BASEDIR,
    "OUTPUT_DIR": OUTPUT_DIR,
    "USE_ELEVATION": USE_ELEVATION,
    "USE_S1": USE_S1,
    "PATCH_SHAPE": PATCH_SHAPE,
    "KERNEL_BUFFER": KERNEL_BUFFER,
    "MODEL_DIR_NAME": MODEL_DIR_NAME,
    "OUTPUT_NAME": OUTPUT_NAME,
    "GCS_PROJECT": GCS_PROJECT,
    "MODEL_TYPE": MODEL_TYPE,
    "GCS_BUCKET": GCS_BUCKET,
    "GCS_IMAGE_DIR": GCS_IMAGE_DIR,
    "GCS_IMAGE_PREFIX": GCS_IMAGE_PREFIX,
    "EE_OUTPUT_ASSET": EE_OUTPUT_ASSET,
}


In [8]:
import dotenv

config_file = "servir-aces/config.env"

for config_key in config_settings:
    dotenv.set_key(dotenv_path=config_file,
                   key_to_set=config_key,
                   value_to_set=config_settings[config_key]
                   )


## Load config file variables

In [9]:
from aces import Config, EEUtils

import json
import os
import tensorflow as tf
import numpy as np
import subprocess


In [10]:
config = Config(config_file)

BASEDIR: /content
DATADIR: /content/data
using features: ['red_before', 'green_before', 'blue_before', 'nir_before', 'red_during', 'green_during', 'blue_during', 'nir_during']
using labels: ['class']


In [11]:
OUTPUT_IMAGE_FILE = str(config.MODEL_DIR / "prediction" / f"{config.OUTPUT_NAME}.TFRecord")
print(f"OUTPUT_IMAGE_FILE: {OUTPUT_IMAGE_FILE}")


OUTPUT_IMAGE_FILE: /content/drive/MyDrive/Colab Notebooks/DL_Book/Chapter_1/output/unet_v1/prediction/prediction_unet_v1.TFRecord


## Get files for export

In [12]:
ls = f"gsutil ls gs://{config.GCS_BUCKET}/{config.GCS_IMAGE_DIR}"
print(f"ls >> : {ls}")

files_list = subprocess.check_output(ls, shell=True)
files_list = files_list.decode("utf-8")
files_list = files_list.split("\n")

ls >> : gsutil ls gs://dl-book/chapter-1/images


In [13]:
files_list

['gs://dl-book/chapter-1/images/',
 'gs://dl-book/chapter-1/images/image_202100000.tfrecord.gz',
 'gs://dl-book/chapter-1/images/image_202100001.tfrecord.gz',
 'gs://dl-book/chapter-1/images/image_202100002.tfrecord.gz',
 'gs://dl-book/chapter-1/images/image_202100003.tfrecord.gz',
 'gs://dl-book/chapter-1/images/image_202100004.tfrecord.gz',
 'gs://dl-book/chapter-1/images/image_202100005.tfrecord.gz',
 'gs://dl-book/chapter-1/images/image_2021mixer.json',
 '']

In [14]:
# Get only the files generated by the image export.
exported_files_list = [s for s in files_list if config.GCS_IMAGE_PREFIX in s]

print(f"exported_files_list: {exported_files_list}")

exported_files_list: ['gs://dl-book/chapter-1/images/image_202100000.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100001.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100002.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100003.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100004.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100005.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_2021mixer.json']


## Get the list of image files and the JSON mixer file.

In [15]:
image_files_list = []
json_file = None
for f in exported_files_list:
    if f.endswith(".tfrecord.gz"):
        image_files_list.append(f)
    elif f.endswith(".json"):
        json_file = f

In [16]:
# Make sure the files are in the right order.
image_files_list.sort()

In [17]:
print(f"image_files_list: {image_files_list}")

print(f"json_file: {json_file}")

image_files_list: ['gs://dl-book/chapter-1/images/image_202100000.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100001.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100002.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100003.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100004.tfrecord.gz', 'gs://dl-book/chapter-1/images/image_202100005.tfrecord.gz']
json_file: gs://dl-book/chapter-1/images/image_2021mixer.json


In [18]:
print(f"Loading model from {str(config.MODEL_DIR)}/trained-model")
this_model = tf.keras.models.load_model(f"{str(config.MODEL_DIR)}/trained-model")


Loading model from /content/drive/MyDrive/Colab Notebooks/DL_Book/Chapter_1/output/unet_v1/trained-model


In [19]:
this_model.summary()

Model: "unet"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, None, None, 8)]      0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, None, None, 32)       2336      ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, None, None, 32)       128       ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 activation (Activation)     (None, None, None, 32)       0         ['batch_normalization[0][0]

## Get relevant info from the JSON mixer file.

In [20]:
cat = f"gsutil cat {json_file}"
read_t = subprocess.check_output(cat, shell=True)
read_t = read_t.decode("utf-8")

# Get a single string w/ newlines from the IPython.utils.text.SList
mixer = json.loads(read_t)

# Get relevant info from the JSON mixer file.
patch_width = mixer["patchDimensions"][0]
patch_height = mixer["patchDimensions"][1]
patches = mixer["totalPatches"]
patch_dimensions_flat = [patch_width * patch_height, 1]

## Load the buffer size for prediction

In [21]:
if config.KERNEL_BUFFER:
    x_buffer = config.KERNEL_BUFFER[0] // 2
    y_buffer = config.KERNEL_BUFFER[1] // 2

    buffered_shape = [
        config.PATCH_SHAPE[0] + config.KERNEL_BUFFER[0],
        config.PATCH_SHAPE[1] + config.KERNEL_BUFFER[1],
    ]
else:
    x_buffer = 0
    y_buffer = 0
    buffered_shape = config.PATCH_SHAPE

print(f"buffered_shape: {buffered_shape}")

buffered_shape: (256, 256)


## Setup features

In [22]:
if config.USE_ELEVATION:
    config.FEATURES.extend(["elevation", "slope"])


if config.USE_S1:
    config.FEATURES.extend(["vv_asc_before", "vh_asc_before", "vv_asc_during", "vh_asc_during",
                            "vv_desc_before", "vh_desc_before", "vv_desc_during", "vh_desc_during"])

print(f"Config.FEATURES: {config.FEATURES}")


Config.FEATURES: ['red_before', 'green_before', 'blue_before', 'nir_before', 'red_during', 'green_during', 'blue_during', 'nir_during']


### Some useful functions

In [23]:
def parse_image(example_proto):
    columns = [
        tf.io.FixedLenFeature(shape=buffered_shape, dtype=tf.float32) for k in config.FEATURES
    ]
    image_features_dict = dict(zip(config.FEATURES, columns))
    return tf.io.parse_single_example(example_proto, image_features_dict)

def to_tuple_image(inputs):
    inputs_list = [inputs.get(key) for key in config.FEATURES]
    stacked = tf.stack(inputs_list, axis=0)
    stacked = tf.transpose(stacked, [1, 2, 0])
    return stacked


## Create a dataset

In [24]:
# Create a dataset from the TFRecord file(s) in Cloud Storage.
image_dataset = tf.data.TFRecordDataset(image_files_list, compression_type="GZIP")
image_dataset = image_dataset.map(parse_image, num_parallel_calls=5)
image_dataset = image_dataset.map(to_tuple_image).batch(1)


## Perform Inference

In [25]:
predictions = this_model.predict(image_dataset, steps=patches, verbose=1)
print(f"predictions shape: {predictions.shape}")


predictions shape: (252, 256, 256, 5)


## Write Predictions

In [26]:
from pathlib import Path

# Create the target directory if it doesn't exist
Path(OUTPUT_IMAGE_FILE).parent.mkdir(parents=True, exist_ok=True)

In [None]:
print(f"Writing predictions to {OUTPUT_IMAGE_FILE} ...")
writer = tf.io.TFRecordWriter(OUTPUT_IMAGE_FILE)

for i, prediction_patch in enumerate(predictions):
    if i == 0:
        print(f"Starting with patch {i}...")
        print(f"predictionPatch: {prediction_patch.shape}")

    if i % 50 == 0:
        print(f"Writing patch {i}...")

    prediction_patch = prediction_patch[
        x_buffer: x_buffer+config.PATCH_SHAPE[0],
        y_buffer: y_buffer+config.PATCH_SHAPE[1]
    ]

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
            "prediction": tf.train.Feature(
                int64_list=tf.train.Int64List(
                    value=np.argmax(prediction_patch, axis=-1).flatten())),
            "cropland_etc": tf.train.Feature(
                float_list=tf.train.FloatList(
                    value=prediction_patch[:, :, 0:1].flatten())),
            "rice": tf.train.Feature(
                float_list=tf.train.FloatList(
                    value=prediction_patch[:, :, 1:2].flatten())),
            "forest": tf.train.Feature(
                float_list=tf.train.FloatList(
                    value=prediction_patch[:, :, 2:3].flatten())),
            "urban": tf.train.Feature(
                float_list=tf.train.FloatList(
                    value=prediction_patch[:, :, 3:4].flatten())),
            "others_etc": tf.train.Feature(
                float_list=tf.train.FloatList(
                    value=prediction_patch[:, :, 4:5].flatten())),
            }
        )
    )

    i += 1

    # Write the example.
    writer.write(example.SerializeToString())

writer.close()

Writing predictions to /content/drive/MyDrive/Colab Notebooks/DL_Book/Chapter_1/output/unet_v1/prediction/prediction_unet_v1.TFRecord ...
Starting with patch 0...
predictionPatch: (256, 256, 5)
Writing patch 0...
Writing patch 50...
Writing patch 100...
Writing patch 150...
Writing patch 200...
Writing patch 250...


## Upload to Google Earth Engine (GEE)

Now we have write the prediction to the `OUTPUT_IMAGE_FILE`. You can upload this to GEE for visualization. To do this, you will need to upload to GCP and then to GEE.

Make sure you have proper permission

In [None]:
# Cloud authentication.
from google.colab import auth
auth.authenticate_user()

In [None]:
OUTPUT_GCS_PATH = f"gs://{config.GCS_BUCKET}/chapter-1/prediction/{config.OUTPUT_NAME}.TFRecord"
print(f"OUTPUT_GCS_PATH: {OUTPUT_GCS_PATH}")

OUTPUT_GCS_PATH: gs://dl-book/chapter-1/prediction/prediction_unet_v1.TFRecord


In [None]:
# upload to gcp
upload_to_gcp = f'gsutil cp "{OUTPUT_IMAGE_FILE}" "{OUTPUT_GCS_PATH}"'
print(f"upload_to_gcp: {upload_to_gcp}")
result = subprocess.check_output(upload_to_gcp, shell=True)
print(f"uploading classified image to gcp: {result}")

upload_to_gcp: gsutil cp "/content/drive/MyDrive/Colab Notebooks/DL_Book/Chapter_1/output/unet_v1/prediction/prediction_unet_v1.TFRecord" "gs://dl-book/chapter-1/prediction/prediction_unet_v1.TFRecord"
uploading classified image to gcp: b''


Next we will upload this to the GEE asset.

In [None]:
config.EE_OUTPUT_ASSET, OUTPUT_GCS_PATH

('projects/servir-ee/assets/dl-book/chapter-1/prediction',
 'gs://dl-book/chapter-1/prediction/prediction_unet_v1.TFRecord')

Make sure you have proper permission.

In [None]:
# Import, authenticate and initialize the Earth Engine library.
import ee
ee.Authenticate()
# ee.Initialize(project=f"{config.GCS_PROJECT}")
EEUtils.initialize_session(use_highvolume=True, project=config.GCS_PROJECT)


In [None]:
!earthengine set_project {config.GCS_PROJECT}

Successfully saved project id


In [None]:
upload_image = f"earthengine upload image --asset_id={config.EE_OUTPUT_ASSET}/{config.OUTPUT_NAME} --pyramiding_policy=mode {OUTPUT_GCS_PATH} {json_file}"
result = subprocess.check_output(upload_image, shell=True)
print(f"uploading classified image to earth engine: {result}")


uploading classified image to earth engine: b'Started upload task with ID: EC4KXAWNTUH2MFR6YEWTPVE7\n'
