# Exercise

### Add project directory to Python path

This code defines two functions and retrieves the project directory path. It's useful when we want to define some generic functions that can be imported. If you retrieve the project directoy path like this, it both works in an Ipython and Python environment. 

- `is_interactive()`: Checks if the code is running in an interactive environment.
- `get_proj_dir()`: Determines the project directory path based on the execution context. If running interactively, it infers the project directory from the Jupyter kernel. Otherwise, it infers it from the Python file. The function returns the project directory as a `pathlib.Path` object.

In [1]:
import os
import pathlib
import sys


def is_interactive() -> bool:
    """
    Check if the code is running in a Jupyter Notebook environment.
    """
    try:
        shell = get_ipython().__class__.__name__
        if shell == "ZMQInteractiveShell":
            print("In Jupyter notebook or lab shell. ")
            return True  # Jupyter Notebook or JupyterLab
        elif shell == "TerminalInteractiveShell":
            print("In IPython console shell. ")
            return False  # Terminal or IPython console
        else:
            return False  # Other interactive shells
    except NameError:
        print("Not in an interactive shell. ")
        return False  # Not in an interactive shell


def get_proj_dir() -> pathlib.Path:
    """
    Get the project directory path.

    Returns:
        A `pathlib.Path` object representing the project directory path.
    """
    if is_interactive():
        print("Inferring project directory from the Jupyter kernel.")
        cwd = pathlib.Path().resolve()
        proj_dir = cwd.parent
    else:
        print("Inferring project directory from the Python file.")
        cwd = pathlib.Path(__file__)
        proj_dir = cwd.parent.parent

    return proj_dir


proj_dir: pathlib.Path = get_proj_dir()
src_dir = str(proj_dir / "src")
print(f"Adding {src_dir} to system path.")
sys.path.append(src_dir)

In Jupyter notebook or lab shell. 
Inferring project directory from the Jupyter kernel.
Adding /Users/calkoen/dev/py-sense/src to system path.


### Very extensive list of libraries - I'll cleanup later

In [2]:
import time
import warnings
from copy import deepcopy
from typing import Any, Dict, List, Union

import cartopy.crs as crs

# import adlfs
# import azure.storage.blob
import colorcet as cc
import dask
import dask.array as da
import dask.bag as db
import dask.dataframe as dd

# import dask_gateway
import dask_geopandas
import geopandas as gpd

# import geoviews.tile_sources as gvts
import holoviews as hv
import hvplot.pandas  # noqa
import hvplot.xarray  # noqa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import panel as pn
import planetary_computer
import pooch
import pyproj
import pystac
import pystac_client
import rasterio
import rioxarray
import rioxarray as rio
import shapely
import stackstac
import tqdm
import xarray as xr

# from azure.storage.blob import BlobServiceClient
from dask.distributed import Client
from geopandas.array import GeometryDtype
from ipyleaflet import Map, basemaps
from matplotlib.colors import ListedColormap
from odc.stac import configure_rio, stac_load
from xrspatial.multispectral import true_color

## 01 - Data Access

In [3]:
m = Map(basemap=basemaps.Esri.WorldImagery, scroll_wheel_zoom=True)
m.center = 53.2, 5.5
m.zoom = 9
m.layout.height = "800px"
m

Map(center=[53.2, 5.5], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_…

### EO team discussion point 

Do we show how to import generic functions from a project directory? Otherwise I'll just redefine the functions here in the notebook

### Extract the coords from the interactive map -- IMPORTANT: wait 2 seconds until map is rendered, othewise you cannot extract the coords

In [4]:
from coastmonitor.geo.geometries import bbox_to_geometry, geo_bbox, geometry_to_bbox

bbox = [m.west, m.south, m.east, m.north]
bbox_geom = bbox_to_geometry(bbox)
roi = geo_bbox(*bbox, src_crs=4326, dst_crs=4326)
roi.explore()

## Discussion: Unfortunately the full STAC catalog for the Coastal mask is not available yet, so maybe we should take a smaller roi in the NL?

In [5]:
catalog = pystac_client.Client.open(
    "https://storage.googleapis.com/dgds-data-public/coclico/coclico-stac/catalog.json"
)
cm_collection = catalog.get_collection("cm")
cm_items = list(cm_collection.get_all_items())

/Users/calkoen/mambaforge/envs/pysense/lib/python3.11/site-packages/pystac_client/client.py:187: NoConformsTo: Server does not advertise any conformance classes.
/Users/calkoen/mambaforge/envs/pysense/lib/python3.11/site-packages/pystac_client/client.py:406: FallbackToPystac: Falling back to pystac. This might be slow.
  self._warn_about_fallback("COLLECTIONS", "FEATURES")
/Users/calkoen/mambaforge/envs/pysense/lib/python3.11/site-packages/pystac_client/collection_client.py:138: FallbackToPystac: Falling back to pystac. This might be slow.
  root._warn_about_fallback("ITEM_SEARCH")


In [6]:
cm_bboxes = pd.concat([geo_bbox(*i.to_dict()["bbox"]) for i in cm_items])
cm_bboxes = cm_bboxes.reset_index(drop=True)
cm_bboxes.explore()

In [7]:
cm_bboxes_roi = gpd.sjoin(cm_bboxes, roi)[cm_bboxes.columns]
cm_bboxes_roi.explore()

In [8]:
# obtain STAC items that cover the ROI
items_roi = [cm_items[i] for i in cm_bboxes_roi.index]
cm_hrefs = [i.assets["cm"].href for i in items_roi]

In [9]:
cm_hrefs

['https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=4.17-miny=52.66.tif',
 'https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=4.17-miny=53.08.tif',
 'https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=4.17-miny=53.51.tif',
 'https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=4.59-miny=52.66.tif',
 'https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=4.59-miny=53.08.tif',
 'https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=4.59-miny=53.51.tif',
 'https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=5.02-miny=52.66.tif',
 'https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=5.02-miny=53.08.tif',
 'https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=5.02-miny=53.51.tif',
 'https://storage.googleapis.com/dgds-data-public/coclico/coastal_mask/minx=5.45-miny=52.66.tif',
 'https://storage.go

### Local Dask cluster

Here we launch a local Dask cluster, a Python-based multiprocessing library, which will speed up the computation. The cluster we make here is local, when you want to upscale your computations you should use a Dask gateway, hosted on a remote server, close to the data.

In [10]:
# when running locally (parallel)
client = Client(local_directory="/tmp")
client

# asking for plots (.plot()) or numerical values (.compute()) will trigger the computation, which you can see in the dask dashboard

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 5
Total threads: 10,Total memory: 64.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:59497,Workers: 5
Dashboard: http://127.0.0.1:8787/status,Total threads: 10
Started: Just now,Total memory: 64.00 GiB

0,1
Comm: tcp://127.0.0.1:59511,Total threads: 2
Dashboard: http://127.0.0.1:59512/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:59500,
Local directory: /tmp/dask-worker-space/worker-h96zqhug,Local directory: /tmp/dask-worker-space/worker-h96zqhug

0,1
Comm: tcp://127.0.0.1:59510,Total threads: 2
Dashboard: http://127.0.0.1:59513/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:59501,
Local directory: /tmp/dask-worker-space/worker-naz717gf,Local directory: /tmp/dask-worker-space/worker-naz717gf

0,1
Comm: tcp://127.0.0.1:59515,Total threads: 2
Dashboard: http://127.0.0.1:59517/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:59502,
Local directory: /tmp/dask-worker-space/worker-av09lywo,Local directory: /tmp/dask-worker-space/worker-av09lywo

0,1
Comm: tcp://127.0.0.1:59519,Total threads: 2
Dashboard: http://127.0.0.1:59520/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:59503,
Local directory: /tmp/dask-worker-space/worker-2rp2r5f7,Local directory: /tmp/dask-worker-space/worker-2rp2r5f7

0,1
Comm: tcp://127.0.0.1:59522,Total threads: 2
Dashboard: http://127.0.0.1:59523/status,Memory: 12.80 GiB
Nanny: tcp://127.0.0.1:59504,
Local directory: /tmp/dask-worker-space/worker-aku6yw5o,Local directory: /tmp/dask-worker-space/worker-aku6yw5o


In [11]:
%%time
@dask.delayed
def lazy_open(href):
    chunks = dict(band=1, x=512, y=512)
    return xr.open_dataset(href, chunks=chunks, engine="rasterio")


das = dask.compute(*[lazy_open(href) for href in cm_hrefs])
print(f"len das: {len(das)}")
das[0]

len das: 18
CPU times: user 175 ms, sys: 173 ms, total: 349 ms
Wall time: 2.23 s


Unnamed: 0,Array,Chunk
Bytes,1.00 MiB,1.00 MiB
Shape,"(1, 512, 512)","(1, 512, 512)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.00 MiB 1.00 MiB Shape (1, 512, 512) (1, 512, 512) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",512  512  1,

Unnamed: 0,Array,Chunk
Bytes,1.00 MiB,1.00 MiB
Shape,"(1, 512, 512)","(1, 512, 512)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [12]:
%%time
cm = xr.combine_by_coords(das).compute()

CPU times: user 113 ms, sys: 31 ms, total: 144 ms
Wall time: 966 ms


## DISCUSSION/TODO, mask land/water values to seee the esri tiles?

In [13]:
%%time
import hvplot.xarray  # qa

cm.squeeze("band").where(cm > 0).hvplot(
    rasterize=True, x="x", y="y", aspect="equal", tiles="EsriImagery"
)

CPU times: user 112 ms, sys: 27.8 ms, total: 140 ms
Wall time: 137 ms


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


## Discussion: Add S2 from Planetary/ODC Stac?

## Discussion/Etienne: add ERA