# Monitor the active tasks on Google Earth Engine

Run this notebook to monitor download tasks in progress on Google Earth Engine.

In [None]:
# Necessary imports
import os
import json
from tqdm.notebook import tqdm
import random
import pandas as pd
import time
from db_utils import DB
from dotenv import load_dotenv

## Load environment and project details

As with the other notebooks, we load credentials and project details from a hidden ```.env``` file.

In [2]:
# Load environment variables (including path to credentials) from '.env' file
env_file_path = "C:/Users/User/floodmapper/.env"

assert load_dotenv(dotenv_path=env_file_path) == True, "[ERR] Failed to load environment!"
assert "GOOGLE_APPLICATION_CREDENTIALS" in os.environ, "[ERR] Missing $GOOGLE_APPLICATION_CREDENTIAL!"
assert "GS_USER_PROJECT" in os.environ, "[ERR] Missing $GS_USER_PROJECT!"
key_file_path = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
assert os.path.exists(key_file_path), f"[ERR] Google credential key file does not exist: \n{key_file_path} "
assert "ML4FLOODS_BASE_DIR" in os.environ, "[ERR] Missing $ML4FLOODS_BASE_DIR!"
base_path = os.environ["ML4FLOODS_BASE_DIR"]
assert os.path.exists(base_path), f"[ERR] Base path does not exist: \n{base_path} "
print("[INFO] Successfully loaded FloodMapper environment.")

[INFO] Successfully loaded FloodMapper environment.


**Set the path to the JSON task file here**

In [3]:
# Path to the JSON file outout by download script
json_path = "C:/Users/User/floodmapper/scripts/2023-03-28_11.52.00.json"
os.path.exists(json_path)

True

## Display a progress bar

The cells here can be quickly run in sequence to produce progress bars for the tasks being tracked by the database. Note that the ```01_download_images.py``` script must remain running for this notebook to work. 

In [4]:
# Connect to the database
db_conn = DB(env_file_path)

[INFO] Connecting to DB 'floodmapper-db'.
[INFO] Connection successfully established.


In [5]:
# Load the JSON 
task_list = json.load(open(json_path, "r"))
n_tasks = len(task_list)
print(f"JSON currently contains {n_tasks} task entries.")

# Convert to a DataFrame
tasks_df = pd.DataFrame(task_list)
tasks_df["gridname"] = tasks_df["description"].str.split("_").str[0]
len(tasks_df[tasks_df["state"] == "COMPLETED"])

JSON currently contains 155 task entries.


49

In [6]:
# Query the DB for the download status of the images
image_ids = tuple(tasks_df['description'].unique())
query = (f"SELECT image_id, status "
         f"FROM image_downloads "
         f"WHERE image_id IN %s;")
data = (tuple(image_ids),)
image_db = db_conn.run_query(query, data, fetch=True)
image_db
image_db[image_db["status"] == -1]

Unnamed: 0,image_id,status
0,GRID31996_S2_2022-06-13,-1
1,GRID31996_Landsat_2022-06-15,-1
2,GRID31996_S2_2022-07-08,-1
3,GRID31996_Landsat_2022-07-09,-1
4,GRID31996_Landsat_2022-07-10,-1
...,...,...
150,GRID32843_S2_2022-07-15,-1
151,GRID32843_Landsat_2022-07-18,-1
152,GRID32843_S2_2022-07-18,-1
153,GRID32843_S2_2022-07-20,-1


In [None]:
# Initialise progress bar for all available tasks. 
batch_bar = tqdm(total=len(tasks_df), 
                 dynamic_ncols=True, 
                 leave=False, 
                 position=0, 
                 desc="All Tasks",
                 colour="GREEN")

# Logic : Check all tasks, keep removing them as and when the 
# in_progress flag is set to 0 for the task in the database.
while len(tasks_df) >= 1:
    
    # Loop through the tasks grouped by gridname
    for name, gdf in tasks_df.groupby(by='gridname'):
        for i, task in gdf.iterrows():
            
            # Check if download is still marked as in-progress in the DB
            desc = task['description']
            ip = image_db[image_db['image_id'] == desc]['status'].item()
            
            # Do nothing if still in-progress
            if ip == -1:
                continue

            ## Drop entry if not still in-progress
            if ip == 0 or ip == 1:
                tasks_df.drop(i, inplace = True)
                batch_bar.update()
        
        time.sleep(0.25)

All Tasks:   0%|          | 0/155 [00:00<?, ?it/s]