# Observation Selector Notebook

This notebook allows you to select regions of interest from a set of observations detailed in this [Google Spreadsheet](https://docs.google.com/spreadsheets/d/1Q1VfZWmh_BubeTz9Umjofx6Xz8bSb46xvrxBGNJupaE/edit#gid=0)


In order to access the GCP bucket (only for those with credentials), you will need to follow [these instructions](https://cloud.google.com/iam/docs/keys-create-delete).
Once you've done that, move the *.json file to `<home>/gcloud_keys/`

In [2]:
import ee
import geemap
import os
import numpy as np
from datetime import datetime
import logging
import matplotlib.pyplot as plt

import gc
gc.enable()

import ipyleaflet
import ipywidgets as widgets

from tqdm.notebook import tqdm

import pandas as pd
import geopandas as gpd

import json
    
from ipyleaflet import WidgetControl
from google.cloud import storage
from google.oauth2 import service_account
    
project_id = 'gee-sand'
home_dir = os.environ['HOME']

##Currently only 1 file lives in this folder. 
### Individual users can also directly set the file name. 
cred_file_name = os.listdir(os.environ['HOME']+"/gcloud_keys")[0]

with open(f"{home_dir}/gcloud_keys/{cred_file_name}") as source:
    info = json.load(source)

storage_credentials = service_account.Credentials.from_service_account_info(info)
    
storage_client = storage.Client(project=project_id, credentials=storage_credentials)


In [3]:
try:
        ee.Initialize()
except Exception as e:
        ee.Authenticate()
        ee.Initialize()


Successfully saved authorization token.


In [4]:
#### Define Globals
start_year = 2022
end_year = 2022
start_month = 1
end_month = 12

s1_bands = ['VV', 'VH']
s2_bands_rgb = ['B4', 'B3' , 'B2']
s2_bands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12']

display_buffer = 50000

In [5]:

def gcs_list_folders(bucket, prefix="", delimeter="/", guess_lexicographically_last_item="~", gcs_client=None):
    """
    Function to read folders from GCS buckets
    """
    folders = set()
    prefix_parts = prefix.split(delimeter)
    start_offset = "/".join(prefix_parts)
    last_blob_name = None
    while True:
        blobs = list(gcs_client.list_blobs(
            bucket_or_name=bucket,
            prefix=prefix,
            start_offset=start_offset,
            max_results=1
        ))
        if not blobs:
            break
        blob = blobs[0]
        if last_blob_name == blob.name:
            raise Exception("Saw blob {} twice, try setting a different guess_lexicographically_last_item={}.".format(
                repr(blob.name), repr(guess_lexicographically_last_item)
            ))
        folder = delimeter.join(blob.name.split(delimeter)[0:len(prefix_parts)] + [""])
        folders.add(folder)
        start_offset = folder + guess_lexicographically_last_item
        last_blob_name = blob.name
        try_characters = 1

    return folders

def toDb(image):
    """
    Converts S1 image to decibel scale
    """
    return image.addBands(
    ee.Image().expression('10 * log10(linear)', {
      'linear': image.select(['VV', 'VH'])
      }),None, True); # Replace the bands to keep image properties

def dbNorm(image):
    """
    Normalizes an S1 image
    """
    return ee.Image(image.divide(30.0).add(1.0))

def getCover(image, aoi, scale):

    # calculate the number of inputs
    totPixels = ee.Number(image.unmask(1).reduceRegion(**{
      'reducer': ee.Reducer.count(),
      'scale': scale,
      'geometry': aoi,
    }).values().get(0))

    # Calculate the actual amount of pixels inside the aoi
    actPixels = ee.Number(image.reduceRegion(**{
      'reducer': ee.Reducer.count(),
      'scale': scale,
      'geometry': aoi,
    }).values().get(0))

    # calculate the perc of cover
    percCover = actPixels.divide(totPixels).multiply(100).round()
    percCover = percCover.getInfo()
    return percCover

def get_s1_median(year, month, aoi, clip = True):
    """
    Get median composite S1 image (in dB), for a given area, year and month. 
    
    """
    img = ee.ImageCollection('COPERNICUS/S1_GRD_FLOAT')\
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))\
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH'))\
        .filter(ee.Filter.eq('instrumentMode', 'IW'))\
        .filterBounds(aoi)\
        .filter(ee.Filter.calendarRange(int(year), int(year), 'year'))\
        .filter(ee.Filter.calendarRange(int(month), int(month), 'month'))\
        .median()
    
    if clip:
        img = img.clip(aoi)
    
    img = toDb(img)

    img = img.set({'month': ee.Date.fromYMD(int(year), int(month), 1)})
    
    return img

def get_s2_median(year, month, aoi, clip = True):
    """
    Get median composite S2, for a given area, year and month. 
    """
    img = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")\
        .filterBounds(ee.FeatureCollection(aoi).geometry().centroid().buffer(display_buffer))\
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\
        .filter(ee.Filter.calendarRange(int(year), int(year), 'year'))\
        .filter(ee.Filter.calendarRange(int(month), int(month), 'month'))\
        .median()
        
    img = img.set({'month': ee.Date.fromYMD(int(year), int(month), 1)})
    if clip:
        img = img.clip(aoi)
    
    return img

def get_progress_bar(task_dict):
    """
    Returns progress bar. Takes a couple of seconds each time. 
    """
    n_tasks = len(task_dict)
    for rec in tqdm([1 for t in task_dict.values() if t.status()['state'] == 'COMPLETED'], 
                             total=n_tasks, 
                             desc="Progress"):
        # any code prcessing the elements in the iterable
        pass
    

def remove_layers(layer_list):
    """
    Given a list of map layers, remove it.
    """
    for l in layer_list:
        Map.remove_ee_layer(l)
        
def get_new_feats(new_aoi_name):
    """
    Given an AOI, get the point location, river name, and district name
    """
    temp2 = label_candidates[label_candidates['Name'] == new_aoi_name]
    new_pt = ee.Geometry.Point(list(temp2[['longitude', 'latitude']].values[0]))
    river = temp2['River'].values[0]
    district = temp2['District'].values[0]
    
    return new_pt, river, district

layer_map = None
all_tasks = {}

def get_tasks(aoi1):
    """
    Create all export tasks. 
    """       
    cent =  aoi1.centroid().geometry().coordinates()
    
    ### For th
    check_names = ['Betwa RIver, Hamirpur',
                   'Ken river, Banda district', 
                   'Kathajodi River, Cuttack',
                   'Narmada River, Sehore, MP', 
                   'Chambal 2', 
                   'Sone, Dehri',
                   'Tawa river, Hoshangabad, MP']
    #For these aois, the lat lon values in the location key are based
    #on the lat lon values provided in the Google Spreadsheet, and not on the centroid of the AOI. 
    
    if curr_aoi_name in check_names:
        lon = label_candidates[label_candidates['Name'] == curr_aoi_name]['longitude'].values[0]
        lat = label_candidates[label_candidates['Name'] == curr_aoi_name]['latitude'].values[0]
    else:
        lon = cent.get(0).getInfo()
        lat = cent.get(1).getInfo()
    river_name = river
    district_name = district
    
    start_val = date1.value
    end_val = date2.value
    
    start_year = start_val.year
    end_year = end_val.year
    
    start_month = start_val.month
    end_month = end_val.month
    
    stub = f"{river_name}_{district_name}_{str(np.round(lon, 2)).replace('.', '-')}_{str(np.round(lat, 2)).replace('.', '-')}"

    task_dict = {}
    for dt in pd.date_range(start = f"{start_year}-{start_month}-01", end = f"{end_year}-{end_month}-01", freq = 'MS'):
        year = int(dt.year)
        month = int(dt.month)
        if month <10:
            mstub = '0'+str(month)
        else:
            mstub = str(month)


        s1_img = get_s1_median(year, month, aoi1.geometry())
        s2_img = get_s2_median(year, month, aoi1.geometry())

        ### Added logic to export images iff both s1 and s2 images have data. 
        try:
            ##Easiest way to check -- if VV and VH bands are not available, the line below will trigger an error. 
            l1 = s1_img.bandNames().length().getInfo()
        except:
            l1 = 0

        try:
            l2 = s2_img.bandNames().length().getInfo()
        except:
            l2 = 0

        if (l1 >0) & (l2 >0):
            
            percCover_s1 = getCover(s1_img, aoi1.geometry(), 100)
            percCover_s2 = getCover(s2_img, aoi1.geometry(), 100)
            
            if (percCover_s1 == 100) & (percCover_s2 == 100):

                task_s1 = ee.batch.Export.image.toCloudStorage(
                                        image = s1_img.select(s1_bands),
                                        description = stub + "_" + str(year) + "_" + mstub + '_s1',
                                        bucket = 'sand_mining_median',
                                        fileNamePrefix=  'labels/' + stub + "_median/s1/"+stub + "_" + str(year) + "-" + mstub + "-01"  + "_s1",
                                        region = aoi1.geometry(),
                                        scale = 10,
                                        crs = 'EPSG:4326', 
                                        maxPixels = 1e13
                                        )
                task_s2_rgb = ee.batch.Export.image.toCloudStorage(
                                        image = s2_img.select(s2_bands_rgb).visualize(**{"bands":['B4', 'B3', 'B2'], 
                                                                                       "min":0, 
                                                                                       "max":3500}),
                                        description = stub + "_" + str(year) + "_" + mstub + '_rgb',
                                        bucket = 'sand_mining_median',
                                        fileNamePrefix=  'labels/' + stub + "_median/rgb/"+stub + "_" + str(year) + "-" + mstub + "-01" + "_rgb",
                                        region = aoi1.geometry(),
                                        scale = 10,
                                        crs = 'EPSG:4326', 
                                        maxPixels = 1e13
                                        )

                task_s2_bs = ee.batch.Export.image.toCloudStorage(
                                image = s2_img.select(s2_bands),
                                description = stub + "_" + str(year) + "_" + str(month) + '_s2',
                                bucket = 'sand_mining_median',
                                fileNamePrefix=  'labels/' + stub + "_median/s2/" + stub + "_" + str(year) + "-" + mstub + "-01" + "_s2",
                                region = aoi1.geometry(),
                                scale = 10,
                                crs = 'EPSG:4326', 
                                maxPixels = 1e13
                                )
                task_s1.start()
                task_dict[f'{stub}_{year}_{month}_s1'] = task_s1
                task_s2_rgb.start()
                task_dict[f'{stub}_{year}_{month}_s2_rgb'] = task_s2_rgb
                task_s2_bs.start()
                task_dict[f'{stub}_{year}_{month}_s2_bs'] = task_s2_bs
            else:
                logging.info(f"{stub}_{year}_{month} not Exported!")
                logging.info(f"""S2 has {percCover_s2}% Coverage, and S1 has {percCover_s1}% Coverage.""")

        else:
            logging.info(f"{stub}_{year}_{month} not Exported!")
            logging.info(f"""S2 has {l1} bands, and S1 has {l2} bands.""")

    
    if aoi_is_new:
        logging.info("AOI does not exist -- exporting to GC Bucket")
        task_shp = ee.batch.Export.table.toCloudStorage(
                          collection = ee.FeatureCollection(aoi1),
                          description = stub,
                          bucket = 'sand_mining_median',
                          fileNamePrefix = 'labels/' + stub+'_median/shp/'+stub,
                          fileFormat = 'GeoJSON')

        task_shp.start()
        task_dict[f'{stub}_shp'] = task_shp
            
    return task_dict


def on_dropdown_change(change):
    """
    Event handler for the dropdown:
    each time a selection is made, moves the map to the new selection, updates globals, 
    and removes old layers. 
    """
    global old_names
    global old_river_name
    global old_district_name
    global river
    global district
    global layer_map
    global curr_aoi_name
    global curr_aoi
    
    clear_button.disabled = True
    button.disabled = True
    date1.value = None
    date2.value = None
    
    if change['type'] == 'change' and change['name'] == 'value':
        new_aoi_name = change['new']
        old_aoi_name = change['old']
    
        curr_aoi_name = new_aoi_name
        
        new_pt, river, district = get_new_feats(new_aoi_name)
        curr_aoi = new_pt
        
        if old_aoi_name is not None:
            Map.remove_ee_layer(old_aoi_name)
            
        if old_river_name:
            layers = list(Map.layers)
            remove_list = [l.name for l in layers if (old_river_name in l.name) & (old_district_name in l.name) ]
            if len(remove_list) > 0 :
                remove_layers(remove_list)
                
        Map.remove_last_drawn()
            
        Map.addLayer(new_pt, {}, new_aoi_name)
        Map.centerObject(new_pt, zoom = 10)
        
        folderlist = gcs_list_folders(bucket="sand_mining_median", 
                                      prefix = f'labels/{river}_{district}',
                                      gcs_client=storage_client)
        
        layer_map = {}
        
        for f in folderlist:
            fname = f.split("/")[1].replace("_median", "")
            try:
                tmp1 = gpd.read_file(f"https://storage.googleapis.com/sand_mining_median/{f}shp/{fname}.geojson")
                featureCollection = ee.FeatureCollection(json.loads(tmp1.to_json()))
                layer_map[fname] = featureCollection
                Map.addLayer(featureCollection, {}, fname)
            except:
                pass

        old_river_name = river
        old_district_name = district


def get_final_aoi():
    """
    Returns either the last AOI layer, or the last drawn layer. 
    If neither is available, returns error. 
    """
    global aoi_is_new
    keep = [
        l.name for l in Map.layers if (l.visible) & 
                                      (river in l.name) & 
                                      (district in l.name) & 
                                      (curr_aoi_name != l.name)
       ]
    
    if len(keep) > 0:
        old_aoi = layer_map[keep[0]].first()
    else:
        old_aoi = None
        
    new_aoi = Map.draw_last_feature
    
    if new_aoi is not None:
        final_aoi = new_aoi
        aoi_is_new = True
    elif (new_aoi is None) & (old_aoi is not None):
        final_aoi = old_aoi
        aoi_is_new = False
    else:
        print("Select an AOI!")
        aoi_is_new = False
        
    return ee.Feature(final_aoi)
        
def callback1(b):
    """
    Event handler for the export button:
    On click, creates and kicks off all tasks
    """
    global all_tasks
    print("Exporting to GC Bucket, please wait...")
    last_feature = get_final_aoi()
    last_feature_type = last_feature.geometry().type().getInfo()
    assert last_feature_type == 'Polygon', 'Pick/Draw a Polygon!'
    new_tasks = get_tasks(last_feature)
    all_tasks.update(new_tasks)
    
    
# Handle click event
def on_button_clicked(b):
    """
    Event handler for the clear drawing button
    """
    dc.clear()
    Map.remove_last_drawn()
    
def handle_draw(self, action, geo_json):
    """
    handler to disable the clear button, upon adding a drawing
    """
    clear_button.disabled = False

def on_start_date_change(change):
    """
    handler to deal with new start date
    """
    
    if change['type'] == 'change' and change['name'] == 'value' and change['new'] is not None: 
        #This should happen only upon a real date selection, 
        #Not when the date picker gets reset. 
        new_year = change['new'].year
        new_month = change['new'].month

        if change['old'] is not None:
            old_year = change['old'].year
            old_month = change['old'].month
            old_layer_name = f"Start: S2 - Median {old_year} - {old_month}"
            Map.remove_ee_layer(old_layer_name)

        new_img = get_s2_median(new_year, new_month, curr_aoi, clip = False)
        Map.addLayer(new_img,  {"bands":['B4', 'B3', 'B2'], 'min':0, 'max':3500}, f"Start: S2 - Median {new_year} - {new_month}" )
        button.disabled = True
    else:
        layer_names = [l.name for l in Map.layers if 'Start: S2 - Median' in l.name]
        for l in layer_names:
            Map.remove_ee_layer(l)
            
        
    
def on_end_date_change(change):
    """
    handler to deal with new end date. 
    """
    if change['type'] == 'change' and change['name'] == 'value' and change['new'] is not None:
        #This should happen only upon a real date selection, 
        #Not when the date picker gets reset. 
        new_year = change['new'].year
        new_month = change['new'].month

        if change['old'] is not None:
            old_year = change['old'].year
            old_month = change['old'].month
            old_layer_name = f"End: S2 - Median {old_year} - {old_month}"
            Map.remove_ee_layer(old_layer_name)

        new_img = get_s2_median(new_year, new_month, curr_aoi, clip = False)
        Map.addLayer(new_img,  {"bands":['B4', 'B3', 'B2'], 'min':0, 'max':3500}, f"End: S2 - Median {new_year} - {new_month}" )

        button.disabled = False
    else:
        layer_names = [l.name for l in Map.layers if 'End: S2 - Median' in l.name]
        for l in layer_names:
            Map.remove_ee_layer(l)
            

# from google.cloud.storage.client import Client

# gcs_client = Client()

# print(gcs_list_folders(bucket="sand_mining_median", gcs_client=gcs_client))
# print(gcs_list_folders(bucket="my-bucket", prefix="foo/", gcs_client=gcs_client))
# print(gcs_list_folders(bucket="my-bucket", prefix="foo/bar/", gcs_client=gcs_client))

## Get observations from Google Sheet

In [6]:
sheet_id = "1Q1VfZWmh_BubeTz9Umjofx6Xz8bSb46xvrxBGNJupaE"
sheet_name = "Sheet1"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
        
label_candidates = pd.read_csv(url)

In [7]:
# Resetting logger (important for Jupyter)
logger = logging.getLogger()
for handler in logger.handlers[:]:
    handler.close()
    logger.removeHandler(handler)

# Configure logging settings
logging.basicConfig(filename=f'{str(datetime.now().replace(second=0, microsecond = 0)).replace(" ", "_")}.log',  # the file where logs will be saved
                    level=logging.INFO,  # level of logging to capture (DEBUG captures all levels)
                    format='%(asctime)s - %(levelname)s - %(message)s')  # format for each log entry


folders = gcs_list_folders(bucket="sand_mining_median", prefix = 'labels/' , gcs_client=storage_client)

old_names = None
old_river_name = None
old_district_name = None
curr_aoi_name = None

river, district = None, None
aoi_is_new = False
curr_aoi = None
        
names1  = label_candidates.Name.values
dropdown = widgets.Dropdown(
    options=names1, value=None, description='Label candidates'
)
dropdown.observe(on_dropdown_change)
dropdown_ctrl = WidgetControl(widget=dropdown, position='topright')


button = widgets.Button(
    description='Export Medians',
    button_style='info',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check',  # (FontAwesome names without the `fa-` prefix)
    disabled = True
)
button.on_click(callback1)

date1 = widgets.DatePicker(
    description='Pick Start Date',
    disabled=False
)
date1.observe(on_start_date_change, names='value')

date2 = widgets.DatePicker(
    description='Pick End Date',
    disabled=False
)
date2.observe(on_end_date_change, names='value')

clear_button = widgets.Button(description="Clear drawings", disabled = True)
clear_button.on_click(on_button_clicked)

btn_control = WidgetControl(widget=clear_button, position='topright')


Map = geemap.Map(center = (25, 82), zoom = 10)
Map.add_basemap('SATELLITE')


dc = Map.draw_control  
dc.on_draw(handle_draw)


s2_median = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")\
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\
        .filter(ee.Filter.calendarRange(start_year, end_year, 'year'))\
        .filter(ee.Filter.calendarRange(int(start_month), int(end_month), 'month'))\
        .median()

Map.addLayer(s2_median, {"bands":['B4', 'B3', 'B2'], 'min':0, 'max':3500}, 'S2 Median')

Map.add_control(dropdown_ctrl)
Map.add_control(btn_control)

Map.add_widget(button, position = "bottomright")
Map.add_widget(date2, position = "bottomright")
Map.add_widget(date1, position = "bottomright")
# # Get the DrawControl
# dc = Map.draw_control

Map

Map(center=[25, 82], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Tog…

## Check progress status

In [8]:
#Monitor Progress
get_progress_bar(all_tasks)

for key in all_tasks.keys():
    print(key, all_tasks[key].status()['state'])
    # display error messages per failed task
    if all_tasks[key].status()['state'] == 'FAILED':
        print(all_tasks[key].status()['error_message'])

Progress: 0it [00:00, ?it/s]