### 0. Import libraries and authenticate

In [1]:
import ee
import os
import sys
import geemap
import shutil
import contextlib
from tqdm import tqdm

LAKE_NAME = "george"
DATA_PATH = "../data/processed/"
GROUND_TRUTH_PATH = "../data/ground_truth/"

In [2]:
# authenticate in GEE
ee.Authenticate()

try:
    # Initialize the library.
    ee.Initialize()
    print('Google Earth Engine has initialized successfully!')
except ee.EEException as e:
    print('Google Earth Engine has failed to initialize!')
except:
    print("Unexpected error:", sys.exc_info()[0])
    raise

Google Earth Engine has initialized successfully!


In [3]:
water_index_dates = [file_name[-14:-7] for file_name in os.listdir(DATA_PATH) if ".tif" in file_name]

### 1. Get the ground truth data and store it

In [4]:
# define the region of interest
coordinates = [149.35704470513303, -34.98, 149.49550371751621, -35.20614998677668]
region = ee.Geometry.Rectangle(coordinates)

In [5]:
# this dataset does not have metadata about cloud cover
# get the image collection for the entire period
image_collection = ee.ImageCollection('JRC/GSW1_4/MonthlyHistory') \
    .filterBounds(region) \
    .filterDate('1984-03-01', '2021-12-31')

In [6]:
# check the number of images
number_of_images = len(image_collection.aggregate_array('system:index').getInfo())
print('Number of images in the collection: ', number_of_images)

Number of images in the collection:  454


In [7]:
# remove directory and create an empty one (this is done because geemap does not overwrite the images)
shutil.rmtree(GROUND_TRUTH_PATH)
os.mkdir(GROUND_TRUTH_PATH)

In [8]:
# convert the list of images to a list
images = image_collection.toList(image_collection.size())

for image_index in tqdm(range(number_of_images)):
    with contextlib.redirect_stdout(None):
        # get the water band
        image = ee.Image(images.get(image_index)).select("water")

        # get the capture date
        date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd').getInfo()
        if date[:-3] in water_index_dates:
            # store the band as a tif file
            geemap.ee_export_image(image, filename=f"{GROUND_TRUTH_PATH}{LAKE_NAME}_{date}.tif", scale=30, region=region, file_per_band=False)


100%|██████████| 454/454 [06:00<00:00,  1.26it/s]
