In [None]:
import glob
import os

import geopandas as gpd
import matplotlib.pyplot as plt
import nivapy3 as nivapy
import numpy as np
import pandas as pd
import seaborn as sn
import xarray as xr

plt.style.use("ggplot")

# ICP Waters GP review 2022

The notebook compiles data for the 2022 GP Review for the WGE. I don't actually know what these acronyms mean (!), but the data required are described by Øyvind G in e-mails received 24.03.2022 at 11.18 and 18.17.

## 1. Get data from RESA2

**Note:** Code in this section requires a direct connection to Nivabasen.

In [None]:
eng = nivapy.da.connect()

In [None]:
st_dt = "2000-01-01"
end_dt = "2019-12-31"
par_list = ["Ca", "Mg", "Na", "K", "SO4", "NO3-N", "Cl"]

### 1.1. Get stations

The previous analysis in 2015 considered 61 stations. Some of these have been replaced in ICPW and others removed from the dataset completely (see e-mail sent to Heleen and Øyvind on 09.05.2022 at 15:41 for details). For the current review, we will use 59 sites (see reply from Heleen received 09.05.2022 at 16.09).

In [None]:
oga_df = pd.read_excel("stations_used_2015.xlsx")
oga_df.dropna(subset="Tr18_Code", inplace=True)
stn_cds = list(oga_df["Tr18_Code"].unique())

stn_df = nivapy.da.select_resa_stations(eng)
stn_df = stn_df.query("station_code in @stn_cds")

assert len(oga_df) == len(stn_df), "Not all stations identified in RESA."
stn_df.to_csv("stations_used_2022.csv", index=False)
stn_df.head()

In [None]:
nivapy.spatial.quickmap(stn_df, popup="station_name", cluster=True, kartverket=True)

### 1.2. Get parameters

In [None]:
par_df = nivapy.da.select_resa_station_parameters(stn_df, st_dt, end_dt, eng)
par_df = par_df.query("parameter_name in @par_list")
assert len(par_df) == len(par_list), "Not all parameters available."
par_df

### 1.3. Get water chemistry

In [None]:
def convert_to_microequivalents(df, sea_salt_corr=False):
    """Basic conversion from mass/l to microequivalents/l.

    Arguments:
        df {DataFrame} -- Raw water chemistry from Nivabasen
        sea_salt_corr {bool} -- Whether to also apply sea-salt correction

    Raises:
        ValueError: If the appropriate unit factor cannot be identified.
        Assertion error if sea_salt_corr is True and 'Cl_mg/l' is not in 'df'

    Returns:
        DataFrame -- 'df' is returned with additional columns added.
    """
    if sea_salt_corr:
        assert (
            "Cl_mg/l" in df.columns
        ), "Column 'Cl_mg/l' is required if 'sea_salt_corr' is True."

    chem_prop_df = pd.read_csv(r"chemical_properties.csv", sep=";")

    for idx, row in chem_prop_df.iterrows():
        par_unit = row["par"]
        valency = row["valency"]
        molar_mass = row["molar_mass"]
        cl_ratio = row["cl_ratio"]

        if par_unit in df.columns:
            par, unit = par_unit.split("_")

            if unit[0] == "m":
                factor = 1000
            elif unit[0] == "µ":
                factor = 1
            else:
                raise ValueError("Unit factor could not be identified.")

            df[f"{par}_µekv/l"] = df[par_unit] * valency * factor / molar_mass

            if sea_salt_corr and cl_ratio:
                df[f"{par}*_µekv/l"] = df[f"{par}_µekv/l"] - (
                    cl_ratio * df["Cl_µekv/l"]
                )

    # Remove unwanted columns
    for col in ["Cl*_µekv/l", "NO3-N*_µekv/l"]:
        if col in df.columns:
            del df[col]

    return df

In [None]:
wc_df, dup_df = nivapy.da.select_resa_water_chemistry(
    stn_df, par_df, st_dt, end_dt, eng, lod_flags=False, drop_dups=True
)
wc_df.rename({"NO3-N_µg/l N": "NO3-N_µg/l"}, axis="columns", inplace=True)
wc_df = convert_to_microequivalents(wc_df, sea_salt_corr=True)
wc_df = wc_df.query("(depth1 == 0) and (depth2 == 0)")
wc_df["year"] = wc_df["sample_date"].dt.year
wc_df = (
    wc_df.groupby(["station_id", "station_code", "station_name", "year"])
    .mean()
    .reset_index()
)
wc_df.to_csv("water_chem.csv", index=False)
wc_df.head()

## 2. Get deposition

Code from here onwards should be run on JupyterHub.

In [None]:
# Read data from Nivabasen
stn_df = pd.read_csv("stations_used_2022.csv")
wc_df = pd.read_csv("water_chem.csv")

### 4.1. "Standard" EMEP data from 2000 to 2019

Available from [here](https://www.emep.int/mscw/mscw_moddata.html) (`2000-2018 (Type2) and 2019 (Type1)`).

In [None]:
# List files to process
file_paths = [
    f"https://thredds.met.no/thredds/dodsC/data/EMEP/2021_Reporting/EMEP01_rv4.42_year.{year}met_{year}emis_rep2021.nc"
    for year in range(2000, 2019)
]

# Data for 2019 have a different naming convention/url
file_paths += [
    r"https://thredds.met.no/thredds/dodsC/data/EMEP/2021_Reporting/EMEP01_rv4.42_year.2019met_2019emis.nc"
]

In [None]:
# Open datasets
ds = xr.open_mfdataset(
    file_paths,
    combine="by_coords",
)
ds

In [None]:
# Load data from URL into memory to improve performance later
ds = ds.load()

In [None]:
# Pars of interest
par_list = [
    "DDEP_SOX_m2Grid",
    "WDEP_SOX",
    "DDEP_OXN_m2Grid",
    "WDEP_OXN",
    "DDEP_RDN_m2Grid",
    "WDEP_RDN",
]

# Check N units are consistent
for par in par_list:
    unit = ds[par].attrs["units"]
    print(f"{par: <20}", unit)
    assert unit in ["mgS/m2", "mgN/m2"], "Units not consistent."

In [None]:
# Calculate total oxidised S
ds["DEP_SOX"] = ds["WDEP_SOX"] + ds["DDEP_SOX_m2Grid"]
ds["DEP_SOX"].attrs["units"] = "mgS/m2"

# Calculate total oxidised N
ds["DEP_OXN"] = ds["WDEP_OXN"] + ds["DDEP_OXN_m2Grid"]
ds["DEP_OXN"].attrs["units"] = "mgN/m2"

# Calculate total reduced N
ds["DEP_RDN"] = ds["WDEP_RDN"] + ds["DDEP_RDN_m2Grid"]
ds["DEP_RDN"].attrs["units"] = "mgN/m2"

# Calculate total N
ds["DEP_TOTN"] = (
    ds["WDEP_OXN"] + ds["WDEP_RDN"] + ds["DDEP_OXN_m2Grid"] + ds["DDEP_RDN_m2Grid"]
)
ds["DEP_TOTN"].attrs["units"] = "mgN/m2"

In [None]:
%%time

par_list = ["DEP_SOX", "DEP_TOTN"]
df_list = []
for idx, row in stn_df.iterrows():
    lat = row["latitude"]
    lon = row["longitude"]
    stn_id = row["station_id"]

    # Get time series
    df = (
        ds[par_list]
        .sel(lat=lat, lon=lon, method="nearest")
        .to_dataframe()
        .reset_index()
    )
    df["station_id"] = stn_id
    df["year"] = df["time"].dt.year

    df_list.append(df)

# Combine results
df = pd.concat(df_list, sort=True).reset_index(drop=True)
df = df[["station_id", "year"] + par_list]
par_unit = [i + "_mgpm2" for i in par_list]
df.columns = ["station_id", "year"] + par_unit

df.head()

In [None]:
%%time

par_list = ["DEP_SOX", "DEP_TOTN"]
df_list = []

# Get time series
df2 = (
    ds[par_list]
    .sel(lat=stn_df["latitude"], lon=stn_df["longitude"], method="nearest")
    .to_dataframe()
    .reset_index()
)
df2["station_id"] = stn_df["station_id"]

# Combine results
df2["year"] = df2["time"].dt.year
df2 = df2[["station_id", "year"] + par_list]
par_unit = [i + "_mgpm2" for i in par_list]
df2.columns = ["station_id", "year"] + par_unit

df2.head()

In [None]:
sn.relplot(
    data=df,
    x="year",
    y="DEP_TOTN_mgpm2",
    height=4,
    aspect=3,
    kind="line",
    legend=False,
    alpha=0.5,
)

In [None]:
sn.relplot(
    data=df,
    x="year",
    y="DEP_SOX_mgpm2",
    height=4,
    aspect=3,
    kind="line",
    legend=False,
    alpha=0.5,
)

### 4.2. EMEP Scenarios

Prepared for the GP review and downloaded from [here](https://aerocom-classic.met.no/DATA/download/GP_review_WGE/) (see e-mail from Heleen received 05.05.2022 at 12:25 for details).

In [None]:
# Pars of interest
par_list = [
    "DDEP_SOX_m2Grid",
    "WDEP_SOX",
    "DDEP_OXN_m2Grid",
    "WDEP_OXN",
    "DDEP_RDN_m2Grid",
    "WDEP_RDN",
]

In [None]:
search_path = r"../../../gp_review_2022/emep_data/*.nc"
flist = glob.glob(search_path)
for fpath in flist:
    met, emis, scen = os.path.split(fpath)[1].split("_")[2:5]

    ds = xr.open_dataset(fpath)
    df2 = (
        ds[par_list]
        .sel(lat=stn_df["latitude"], lon=stn_df["longitude"], method="nearest")
        .to_dataframe()
        .reset_index()
    )
    df2["station_id"] = stn_df["station_id"]

In [None]:
ds

## 5. Get runoff

In [None]:
par_df = nivapy.da.get_nve_gts_api_parameters()
par_df

In [None]:
pars = ["gwb_q"]

st_dt = "2000-01-01"
end_dt = "2019-12-31"

res_df = nivapy.da.get_nve_gts_api_time_series(
    stn_df,
    pars,
    st_dt,
    end_dt,
    id_col="station_id",
    xcol="longitude",
    ycol="latitude",
    crs="epsg:4326",
)
res_df.head()