In [1]:
import rioxarray as rxr
import xarray as xr
from dataclasses import asdict
from dscreator.cfarray.attributes import DatasetAttrsGrid, VariableAttrs
from datetime import datetime
from dscreator import utils
from dscreator.storage import get_storage_handler

If you want to run this on the hub in a venv of its own use

```bash
curl -sSL https://install.python-poetry.org | python3 -
export PATH="/home/jovyan/.local/bin:$PATH"

# inside datasetcreator directory
poetry install
python -m ipykernel install --user --name=dscreator 

```

you can now select a kernel named dscreator. It is also possible to install dependencies on the default kernel.

# Introduction

For [S-ENDA](https://s-enda.met.no/), NILU suggested that we could use the project `2022 Tålegrenseprosjekt` as a use case for data sharing. The sharepoint site can be found [here](https://niva365.sharepoint.com/:f:/r/sites/projects/3383/Shared%20Documents/Projects/2022%20T%C3%A5legrenseprosjekt?csf=1&web=1&e=Udp351). 

Notes:

* The Steady-State Water Chemistry (SSWC), som beregner tålegrenser for sterk syre og overskridelse med utgangspunkt i at utlekkingen av nitrogen forblir konstant på dagens nivå.  
* The First-order Acidity Balance (FAB), calculates limits for sulphur og nitrogen

This script adds metadata according to S-ENDA [acdd+cf](https://metno.github.io/data-management-handbook/#acdd-elements) and converts the `GeoTIFF` to `CF-NetCDF` that we can share over `OPeNDAP` and `WMS` the iso 19115-2 metadata can be generated from the netcdf or ncml. 

The source code for the datasets used in this notebook can be found in [critical_loads_2](https://github.com/JamesSample/critical_loads_2), while in NILUs example dataset can be found [here](https://dev-thredds.nilu.no/thredds/catalog/SENDA/catalog.html?dataset=SENDA/xso4_2021.nc).

## Source files

These files are required:
* fab_exns_meqpm2pyr_1721_metgrid_120m.tif
* sswc_ex_meqpm2pyr_1721_metgrid_120m.tif
* veg_ex_meqpm2pyr_1721_metgrid_120m.tif

They can be found on `~/shared/common/critical_loads/raster/for_espen/exceedance` on the hub.p.niva.no or in [Raster_Output](https://niva365.sharepoint.com/sites/projects/3383/Shared%20Documents/Forms/AllItems.aspx?csf=1&web=1&e=Udp351&cid=5437584c%2D4eea%2D4b43%2Db7cc%2Dd30200cd9d23&FolderCTID=0x01200023B9EEB8E9C0094580920107C7844708&id=%2Fsites%2Fprojects%2F3383%2FShared%20Documents%2FProjects%2F2022%20T%C3%A5legrenseprosjekt%2FRaster%5FOutput&viewid=aa29078f%2D5cb7%2D41dd%2Da376%2D38bbe9dc6fa4)

### Load source datasets

Currently using the once sent to `espen`:) but can also use other I guess

In [None]:
data_path = "/home/jovyan/shared/common/critical_loads/raster/for_espen/exceedance"
unit, unit_long = "meqpm2pyr", "mEkv/m^2/year"
dep_serie = "1721_metgrid" # new deposition grid
cell_size = "120m"

name = "fab"
da_fab = rxr.open_rasterio(f"{data_path}/{name}_exns_{unit}_{dep_serie}_{cell_size}.tif").drop_vars("band")
da_fab.name = name
# Haven't checked if there is any CF standard names to use so setting `short-name` instead
da_fab.attrs = asdict(VariableAttrs(long_name="First-order Acidity Balance", units=unit_long, short_name=name))
da_fab = da_fab.rio.reproject("EPSG:4326")

name = "sswc"
da_sswc = rxr.open_rasterio(f"{data_path}/{name}_ex_{unit}_{dep_serie}_{cell_size}.tif").drop_vars("band")
da_sswc.name = name
da_sswc.attrs = asdict(VariableAttrs(long_name="Steady-State Water Chemistry", units=unit_long, short_name=name))
da_sswc = da_sswc.rio.reproject("EPSG:4326")

name = "veg"
da_veg = rxr.open_rasterio(f"{data_path}/{name}_ex_{unit}_{dep_serie}_{cell_size}.tif").drop_vars("band")
da_veg.name = name
da_veg.attrs = asdict(VariableAttrs(long_name="Vegetation", units=unit_long, short_name=name))
da_veg = da_veg.rio.reproject("EPSG:4326")

Merge the dataarrays into a dataset

In [None]:
ds = xr.merge([da_fab, da_sswc, da_veg]).sel(band=0)

In [None]:
ds.attrs = asdict(
    DatasetAttrsGrid(
        title="Test exceedence limits water and vegetation 2017-2021",
        title_no="Test overskridelser av tålegrenser vann og vegetasjon 2017-2021",
        summary="Exceedence limits for water using the SSWCoaa or FABoaa models, and vegetation using emperical derived data on the new deposition grid from Met.no.",
        summary_no="Overskridelser av tålegrenser for vann ved SSWCoaa or FABoaa modellene og vegetasjon ved empirisk data på nytt rutenett fra Met.no. ",
        # https://gcmd.earthdata.nasa.gov/KeywordViewer
        keywords=",".join(
            [
                "GCMDSK:EARTH SCIENCE > LAND SURFACE > SOILS > NITROGEN",
                "GCMDSK:EARTH SCIENCE > LAND SURFACE > SOILS > SULFUR",
                "GCMDLOC:CONTINENT > EUROPE > NORTHERN EUROPE > SCANDINAVIA > NORWAY",
            ]
        ),
        keywords_vocabulary=",".join(
            [
                "GCMDSK:GCMD Science Keywords:https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/sciencekeywords",
                "GCMDLOC:GCMD Locations:https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/locations",
            ]
        ),
        iso_topic_category="environment",
        spatial_representation="grid",
        date_created=str(datetime.now()),
        project="Tålegrense",
        time_coverage_start=utils.to_isoformat(datetime(2017, 1, 1)),
        time_coverage_end=utils.to_isoformat(datetime(2021, 12, 31)),
        geospatial_lon_min=float(ds.x.min()),
        geospatial_lon_max=float(ds.x.max()),
        geospatial_lat_min=float(ds.y.min()),
        geospatial_lat_max=float(ds.y.max()),
        history=f"dscreator({str(datetime.now())}) reprojected to EPSG:4326,populated metadata",
    )
)
ds.attrs["references"] = "https://LINK_TO_REPORT.pdf"

In [None]:
ds

In [None]:
# plotting can be a bit slow
#ds.fab.plot(vmin=0, vmax=50, cmap="coolwarm")

In [None]:
get_storage_handler(
    project_name="exceedance-limits",
    dataset_name="fab-sswc-veg-exceedence",
    filename_prefix="2017-2021").save_dataset(ds)

This stores the dataset locally, to automatically upload the dataset set the env variable 

```.env
STORAGE_PATH=gs://nivatest-1-senda
```

and generate default credentials using gcloud, the storage handler will then store the dataset on object store.