In [None]:
#Imports
import os
import xarray as xr
import folium
import numpy as np
import datetime as dt

from eodag import EODataAccessGateway
from eodag import setup_logging

from rasterio.crs import CRS

import eotools.shortcut as eoshort
import eotools.loading as eoload

# Setup Verbose Values:
# 0: no logging and no progress bar
# 1: no logging but progress bars displayed
# 2: log at the INFO level
# 3: log at the DEBUG level (even more information)

setup_logging(verbose=1)


# EODAG - deserialized post process

This Notebook is intended to demonstrate how to deserialize a previously made search and download the data and to work with it.

## Step 1
### Configuration
In the configuration we pass the username and password from the Copernicus Dataspace Ecosystem (CDSE) to eodag. Also we define the path for the downloads.

In [None]:
# Get Credentials from .env file and make dirs from paths.yml
# These functions are just shortcuts from a python script and just help to keep the notebooks short and simple
workspace = eoshort.read_paths(filepath='paths.yml')
dag = EODataAccessGateway()
dag.set_preferred_provider("cop_dataspace")

## Step 2
### Deserialize
Since the search is already done (see Notebook `eodag_search`) and the search result has been serialized, we are going to deserialize the search result and register it. If it is only deserialized it won't be able to download the data.

In [None]:
# Deserialize the Search Results
output_file = os.path.join(workspace['serialize'], "search_results.geojson")
deserialized_search_results = dag.deserialize_and_register(output_file)

print(f"Got {len(deserialized_search_results)} deserialized products.")

In [None]:
#Plot Quicklooks of Search Results
eoshort.plot_quicklooks(deserialized_search_results)

## Step 3
### Download 
Now either a single product or multiple products from the search will be downloaded. If the product has already been downloaded it will not load it again, if it is saved in the right workingspace.

In [None]:
# Download multiple Products
products = deserialized_search_results
paths = dag.download_all(products)


In [None]:
# Set Boundingbox for Area inside the Tile.
latmin, latmax = 48.1, 48.35
lonmin, lonmax = 16.1, 16.6
extent = {'lonmin': lonmin, 'latmin': latmin, 'lonmax': lonmax, 'latmax': latmax}

# Folium Map
fmap = folium.Map(location=(np.array([latmin, latmax]).mean(), np.array([lonmin, lonmax]).mean()), zoom_start=9, tiles='OpenStreetMap')
folium.Rectangle(bounds=[[latmin, lonmin],[latmax, lonmax]], color="red").add_to(fmap)
folium.GeoJson(
    data=deserialized_search_results[:],  # SearchResult has a __geo_interface__ interface used by folium to get its GeoJSON representation, single results dont work (this [2:3] instead of [2])
    tooltip=folium.GeoJsonTooltip(fields=["title"])
).add_to(fmap)
fmap

In [None]:
# Setting common Parameters for all further image processing
common_params = dict(
    crs=CRS.from_epsg(4326),               # the downloaded images are in 4326, don't reproject them
    resolution=0.0001,                     # but lower their resolution (0.0006 should be 60m in 100km)
    extent=(lonmin,latmin,lonmax,latmax)   # and zoom over/crop the area of interest
)

# Define basic Functions for future operations
def normalized_difference(a, b):
    return (a - b*1.)/(a + b)

def normalize(a):
    return (a - a.min())/(a.max() - a.min())
    

## Step 4 
### Post Process

#### Simple NDVI Image

In [None]:
# Load Bands 4 and 8 into memory
def band_regex(band, res=60):
    return rf'^(?!.*MSK).*{band}_(60|20<10)m.*$'

RED = products[0].get_data(band=band_regex('B04', res=10), **common_params)
NIR = products[0].get_data(band=band_regex('B08', res=10), **common_params)

NDVI = normalized_difference(NIR, RED)

In [None]:
# Plot the NDVI Image
NDVI.plot(cmap="RdYlGn", center=False, size=6, aspect='equal')

#### More complex datasets
As `EODAG` only has a function to load a single band from a single product into an `xarray` a new function for more complex xarray Datasets is needed.

In [None]:
# Function for loading multiple bands of a product into a single xarray Dataset. Each Band corresponds to a Variable in the dataset.
def load_single_product(product, bands:list[str], params=common_params):
    loaded_data = {}
    for band in bands:
        # Load Band into an xarray Dataarray
        data = product.get_data(band=band, **params)

        # Get rid of Dimensions of size 1 [e.g.: shapes from (1,300,500) to (300,500)]
        data = data.squeeze()

        # Get time information from the product properties
        time_str = product.properties['startTimeFromAscendingNode']
        date = dt.datetime.strptime(time_str,'%Y-%m-%dT%H:%M:%S.%f%z')

        # Add a timestamp to the xarray dataarray (taken from product properties)
        data = data.expand_dims(dim={'time':[date.date()]})

        # Name the Dataarray (band name is used) -> Dataset uses the Dataarray name to name its variables
        data.name = band

        # Add Dataarray of a single band to the loaded_data dictionary
        loaded_data[band] = data
    # Create a xarray Dataset from a dictionary of Dataarrays
    ds = xr.Dataset(loaded_data)
    return ds

Now we have a function to load multiple bands into a single Dataset, we can expand on that and make another function, which alows us to load multiple times from a single tile into a Dataset.

In [None]:
# Function for loading multiple datasets
def load_multiple_timestamps(products, bands:list, *args, **kwargs):
    # Empty List where datasets are stored
    single_ds = []
    for product in products:
        # Load each dataarray and add to single_ds List
        single_product = load_single_product(product=product, bands=bands, *args, **kwargs)
        single_ds.append(single_product)
    # Merge datasets from List
    ds = xr.merge(single_ds)
    return ds

In [None]:
# Function for finding all assets of a product
def load_assets(root:str, res=60, only_spectral:bool=True, include_tci:bool=False):
    jp2_files = [file for dirs in os.walk(root, topdown=True)
                     for file in dirs[2] if file.endswith(f"_{res}m.jp2")]
    assets = [file.split('_')[2] for file in jp2_files if file.startswith('T')]

    if only_spectral and include_tci==False:
        assets = [a for a in assets if a[0]=='B']
    elif only_spectral and include_tci:
        assets = [a for a in assets if a[0] == 'B' or a[0] == 'T']
    else:
        pass
    return assets

In [None]:
bands_2_load = load_assets(paths[0], res=60, only_spectral=True, include_tci=False)

ds = eoload.load_multiple_timestamps_regex(products=products, bands=bands_2_load)
ds