In [None]:
import geopandas
import geoviews as gv
import holoviews as hv
import hvplot.xarray
import hvplot.pandas
import panel as pn
import stmtools
import xarray as xr

from dask.distributed import Client, LocalCluster
from holoviews import opts
from holoviews import streams

In [None]:
# hv configurations
hv.extension('bokeh')
opts.defaults(opts.Points(tools=['box_select', 'lasso_select']))
gv.output(dpi=120)

# Visualize Space-Time Matrix (STM) & Contextual Data

## 1. Introduction

The goal of this notebook is to build an interactive visualization of a space-time matrix (STM) dataset for data exploration. Specifically, we want to visualize results from radar interferometry measurements together with some space- and time-dependent contextual information.

We make use of packages from the [HoloViz](https://holoviz.org) Python ecosystem, in particular [hvPlot](https://hvplot.holoviz.org), [HoloViews](https://holoviews.org) and [Panel](https://panel.holoviz.org).


## 2. Input variables and paths

In [None]:
# input data paths
DATA_DIR = '/project/caroline/Public/demo_mobyle/data'

# Space-time matrix data
DATA_STEM = 'full-pixel_psi_amsterdam_tsx_asc_t116_v4_ampl_std_H_c16643'
CSV_STM_PATH = f'{DATA_DIR}/depsi_products/{DATA_STEM}.csv'
ZARR_STM_PATH = f'{DATA_DIR}/depsi_products/{DATA_STEM}.zarr'

# time-dependent variable: total precipitation
TP_DATA_PATH = f'{DATA_DIR}/ERA5/ERA5-land-monthly_2015-2023_NL.nc'
# space-dependent variable: BAG dataset for AMS
BAG_DATA_PATH = f'{DATA_DIR}/BAG/bag_light_AMS_WGS84.gpkg'

## 3. Setup Dask cluster

NOTE: if the notebook is running inside a container, only the `LocalCluster` works (no `SLURMCluster`):

In [None]:
cluster = LocalCluster(n_workers=1, threads_per_worker=4)
client = Client(cluster)
client

## 4. Convert STM data format: CSV -> Zarr

We use the [STMTools](https://github.com/MotionbyLearning/stmtools/tree/main) package to load the STM dataset from a CSV file and convert it to the Zarr format. **This step needs to run only once!** Using a `LocalCluster` with 2 workers with 4 threads each on 8 cores on Spider, the following cell runs in ~6 min.  

In [None]:
! du -h $CSV_STM_PATH

In [None]:
# %%time
# stm = stmtools.from_csv(CSV_STM_PATH)
# stm.to_zarr(ZARR_STM_PATH, mode='w')

## 5. STM and contextual data

We consider three datasets:

* The **STM** dataset, with space- and time-dependent variables (e.g. deformation);
* ERA5-land monthly **total precipitation** data, of which we consider the only dependence on time;
* **Building footprings** from the BAG dataset (space dependence only).

In the following cells we open and manipulate the datasets:

In [None]:
# STM dataset, space-time dependent
stm = xr.open_zarr(ZARR_STM_PATH)

In [None]:
# Total precipitation, (space-)time dependent
ds = xr.open_dataset(TP_DATA_PATH)
tp = ds['tp'].sel(
    latitude=stm['lat'].mean(), 
    longitude=stm['lon'].mean(),
    expver=1,
    method='nearest',
)

In [None]:
# BAG dataset, space dependent
bbox = (4.88, 52.36, 4.92, 52.38) 
bag = geopandas.read_file(BAG_DATA_PATH, bbox=bbox)

## 4. Visualizing the datasets

We create a scatter plot on a base map for the STM data points:

In [None]:
# create points plot
xy = stm[['lat', 'lon', 'pnt_linear']].to_dataframe()
xy = xy.sample(frac=0.2)  # randomly select 20% of the points
points = xy.hvplot.points(
    'lon', 
    'lat', 
    geo=True, 
    c='red',
    size=5,
    tiles='ESRI',
    hover=False,
)

In [None]:
points = points.opts(frame_width=500, frame_height=500)
points

Let's add some information and color the points using the slope of the line fitting the points:

In [None]:
# create points plot
xy = stm[['lat', 'lon', 'pnt_linear']].to_dataframe()
xy = xy.sample(frac=0.2)
points = xy.hvplot.points(
    'lon', 
    'lat', 
    geo=True, 
    c='pnt_linear',
    clim=(-5e-3, 5e-3),
    cmap='jet_r',
    size=5,
    tiles='ESRI',
    hover=False,
)

In [None]:
points = points.opts(frame_width=500, frame_height=500)
points

Let's add a separate panel to visualize the deformation associated to each point as a function of time. Let's add a selection stream so that the only points selected in the previous panel will be considered!

In [None]:
TOO_MANY_POINTS = 10
VARIABLE = 'deformation'

# create stream for a selection of points
selection = streams.Selection1D(source=points)

def plot_variable(index):
    """ Plot STM variable vs time for a sub-set of points. """
    if not index or len(index) > TOO_MANY_POINTS:
        # for no or too many points, plot point 0
        return plot_variable([0])
    else:
        lines = [
            stm.isel(space=i).hvplot(x='time', y=VARIABLE)
            for i in index
        ]
        return hv.Overlay(lines)

# create interactive variable plot
deformation = hv.DynamicMap(plot_variable, streams=[selection])

In [None]:
deformation = deformation.opts(frame_width=500)
(points + deformation).cols(1)

Let's add two more panels to the plot to visualize the contextual data. In a new panel, we plot the total precipitation as a function of time:

In [None]:
precipitation = tp.hvplot.line(x='time', y='tp')

Finally, the last panel will show the building footprints. We'll color these using the year of construction:

In [None]:
buildings = gv.Polygons(bag, vdims=[('bouwjaar', 'Year Built')])

We compose the final layout:

In [None]:
buildings = buildings.opts(frame_width=500, frame_height=500, tools=['hover'])
precipitation = precipitation.opts(frame_width=500)

plot = (points + buildings + deformation + precipitation).cols(2)
plot

You can also run the visualization with a dedicated Panel server:

In [None]:
server = pn.serve(plot, port=5006)