In [1]:
# # Run this and then restart the kernel at the start of each session to install
# # 'teotil3' in development mode
# !pip install -e /home/jovyan/projects/teotil3/

In [2]:
import warnings

import matplotlib.pyplot as plt
import networkx as nx
import nivapy3 as nivapy
import numpy as np
import pandas as pd
import seaborn as sn
import teotil3 as teo
from sqlalchemy import text

warnings.simplefilter(action="ignore", category=FutureWarning)
sn.set_context("notebook")

# Estimating loads in unmonitored regions - 2023
# Using TEOTIL3

The standard workflow for OSPAR reporting uses TEOTIL2. This year (i.e. the 2024 analysis reporting 2023 data) we will switch to using TEOTIL3, because NIBIO are no longer able to generate the required input data for TEOTIL2.

The most recent notebook generating OSPAR data using TEOTIL2 is [here](https://nbviewer.org/github/JamesSample/rid/blob/master/notebooks/prog_2021-25/2022/10_loads_unmonitored_regions_2022.ipynb) (reporting of the 2022 data, undertaken during 2023). **This notebook translates the workflow for TEOTIL3**.

In [3]:
# Connect to db
engine = nivapy.da.connect_postgis()

Connection successful.


## 1. Read model results

In [4]:
# Year of interest
year = 2023

In [5]:
mod_csv = f"/home/jovyan/shared/common/teotil3/evaluation/teo3_results_nve{year+1}_2013-{year}_agri-annual-loss.csv"
mod_df = pd.read_csv(mod_csv).query("year == @year")
mod_df.head()

Unnamed: 0,regine,regine_down,accum_agriculture-background_din_kg,accum_agriculture-background_ss_kg,accum_agriculture-background_tdp_kg,accum_agriculture-background_toc_kg,accum_agriculture-background_ton_kg,accum_agriculture-background_totn_kg,accum_agriculture-background_totp_kg,accum_agriculture-background_tpp_kg,...,local_urban_tpp_kg,local_wood_din_kg,local_wood_ss_kg,local_wood_tdp_kg,local_wood_toc_kg,local_wood_ton_kg,local_wood_totn_kg,local_wood_totp_kg,local_wood_tpp_kg,year
242020,001.10,001.,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,44.2,849.2,0.8,12180.8,228.2,272.4,6.7,5.9,2023
242021,001.1A2B,001.1A2A,134.896662,0.989844,4.190777,7769.727724,74.879508,209.77617,4.421938,0.23116,...,6.8,1614.9,29798.4,29.3,470665.6,8746.0,10360.9,257.3,228.0,2023
242022,001.1A4D,001.1A4C,16.342464,0.382333,0.143554,569.571004,6.909292,23.251756,0.183389,0.039835,...,0.0,290.7,5272.5,5.3,85814.9,1588.9,1879.6,46.9,41.6,2023
242023,001.1M,001.1L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,580.1,11104.0,10.7,174941.2,3236.4,3816.5,95.9,85.2,2023
242024,001.21,001.,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,6.1,117.0,0.1,1699.0,31.7,37.8,0.9,0.8,2023


In [6]:
# Save version with main catchments only
main_list = ["%03d." % i for i in range(1, 316)]
main_mod_df = mod_df.query("regine in @main_list").copy()
main_mod_df.sort_values("regine", inplace=True)
main_csv = f"/home/jovyan/shared/common/JES/teotil3_data/results/unmon_loads/teotil3_results_{year}_main_catchs.csv"
main_mod_df.to_csv(main_csv, index=False, encoding="utf-8")

## 4. Explore results

###  4.1. Identify areas with monitoring data

Where observations are available, we want to use them in preference to the model output. This means identifying all the catchments with observed data and substracting the model results for these locations. This is more complicated than it appears, because a small number of observed catchments are upstream of others, so subtracting all the loads for the 155 monitored catchments involves "double accounting", which we want to avoid. The first step is therefore to identify the downstream-most nodes for the monitored areas i.e. for the cases where one monitoring site/catchment is upstream of another, we just want the downstream node.

In [7]:
# Get regine polygons
reg_gdf = teo.io.get_regine_geodataframe(engine, year)

# Determine hydrological connectivity
reg_gdf = teo.io.assign_regine_hierarchy(
    reg_gdf,
    regine_col="regine",
    regine_down_col="regine_down",
    order_coastal=False,
    nan_to_vass=True,
    land_to_vass=True,
    add_offshore=True,
).dropna(subset="geometry")

# Read RID station data
in_xlsx = r"/home/jovyan/shared/common/JES/teotil2_data/RID_Sites_List_2017-2020.xlsx"
stn_df = pd.read_excel(in_xlsx, sheet_name="RID_All")

# Assign stations to regines
stn_df = nivapy.spatial.identify_point_in_polygon(
    stn_df,
    reg_gdf,
    pt_col="station_id",
    poly_col="regine",
    lat_col="lat",
    lon_col="lon",
)
assert stn_df["regine"].isna().sum() == 0

# Get just cols of interest and drop duplicates
# (some sites are in the same regine)
stn_df = stn_df[["ospar_region", "regine"]].drop_duplicates()

# Get regine IDs with observed data
obs_nds = set(stn_df["regine"].values)

# Build catchment network
g = teo.model.build_graph(
    reg_gdf[["regine", "regine_down"]], id_col="regine", next_down_col="regine_down"
)

# Get nodes upstream of each monitored site
nd_set = set()
for nd in obs_nds:
    nds = nx.dfs_tree(g.reverse(), nd).nodes()
    nd_set.update(nds)

# Get subgraph and ordered node list for all upstream nodes
g = g.subgraph(nd_set).copy()
nd_list = list(nx.topological_sort(g))

# Get downstream-most nodes in the subgraph
ds_nds = []
for nd in g:
    # If no downstream nodes
    if g.out_degree(nd) == 0:
        # Node is of interest
        ds_nds.append(nd)

# Get just the downstream catchments
stn_df = stn_df[stn_df["regine"].isin(ds_nds)]

100.00 % of regines assigned.


### 4.2. Sum model results for monitored locations

In [8]:
# Join accumulated outputs to stns of interest => modelled results for monitored areas
mod_mon_df = pd.merge(stn_df, mod_df, how="left", on="regine")

# Groupby OSPAR region
mod_mon_df = mod_mon_df.groupby("ospar_region").sum()

# Get just accum cols
pars = ["totn", "ton", "din", "totp", "tdp", "tpp", "toc", "ss"]
cols = ["accum_q_m3/s"] + [
    i
    for i in mod_mon_df.columns
    if (i.split("_")[0] == "accum") and (i.split("_")[-2] in pars)
]
mod_mon_df = mod_mon_df[cols]

# kg => tonnes
for col in mod_mon_df.columns:
    if col.endswith("_kg"):
        new_col = col.replace("_kg", "_tonnes")
        mod_mon_df[new_col] = mod_mon_df[col] / 1000
        del mod_mon_df[col]

mod_mon_df

Unnamed: 0_level_0,accum_q_m3/s,accum_agriculture-background_din_tonnes,accum_agriculture-background_ss_tonnes,accum_agriculture-background_tdp_tonnes,accum_agriculture-background_toc_tonnes,accum_agriculture-background_ton_tonnes,accum_agriculture-background_totn_tonnes,accum_agriculture-background_totp_tonnes,accum_agriculture-background_tpp_tonnes,accum_agriculture_din_tonnes,...,accum_urban_totp_tonnes,accum_urban_tpp_tonnes,accum_wood_din_tonnes,accum_wood_ss_tonnes,accum_wood_tdp_tonnes,accum_wood_toc_tonnes,accum_wood_ton_tonnes,accum_wood_totn_tonnes,accum_wood_totp_tonnes,accum_wood_tpp_tonnes
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
LOFOTEN-BARENTS SEA,988.431487,21.808215,153.957717,0.197331,251.294244,6.153001,27.961216,0.574467,0.377136,196.273932,...,3.129397,1.170179,139.446188,5102.968089,9.738273,29216.705042,1459.06367,1598.509858,30.488293,20.75002
NORTH SEA,1568.938802,348.159079,2750.500664,3.808399,4366.209212,131.254823,479.413902,9.135189,5.32679,3136.348499,...,24.417848,7.541944,630.987257,1675.711133,13.94102,40016.903061,1678.565207,2309.552464,30.86369,16.922669
NORWEGIAN SEA2,1809.086871,280.75799,3164.129624,3.163844,4385.234409,81.638631,362.396621,8.579579,5.415735,2526.821914,...,20.92947,7.664675,360.604121,7260.463133,20.663089,79636.21776,2944.887359,3305.49148,64.00797,43.344881
SKAGERAK,2722.28981,1310.122412,7492.696299,12.354076,24903.24439,396.509395,1706.631808,26.206189,13.852114,11791.101711,...,73.358462,18.095606,2136.990657,5174.573702,51.761016,222221.601006,8494.530716,10631.521374,110.179885,58.418869


This table gives the **modelled** inputs to each OSPAR region **from catchments for which we have observed data**. We want to subtract these values from the overall modelled inputs to each region and substitute the observed data instead.

### 4.3. Aggregate model output according to OSPAR regions

This code sums the model output for all regions (i.e. including the monitored areas).

In [9]:
# Define OSPAR regions (ranges are inclusive)
os_dict = {
    "SKAGERAK": (1, 23),
    "NORTH SEA": (24, 90),
    "NORWEGIAN SEA2": (91, 170),
    "LOFOTEN-BARENTS SEA": (171, 247),
}

# Container for results
df_list = []

# Loop over model output
for reg in os_dict.keys():
    min_id, max_id = os_dict[reg]

    vassoms = ["%03d." % i for i in range(min_id, max_id + 1)]

    # Get data for this region
    os_reg_df = mod_df.query("regine in @vassoms").copy()

    # Get just accum cols
    cols = ["accum_q_m3/s"] + [
        i
        for i in os_reg_df.columns
        if (i.split("_")[0] == "accum") and (i.split("_")[-2] in pars)
    ]
    os_reg_df = os_reg_df[cols]

    # Add region
    os_reg_df["ospar_region"] = reg

    # Add to output
    df_list.append(os_reg_df)

# Build df
os_df = pd.concat(df_list, axis=0)

# Aggregate
os_df = os_df.groupby("ospar_region").sum()

# kg => tonnes
for col in os_df.columns:
    if col.endswith("_kg"):
        new_col = col.replace("_kg", "_tonnes")
        os_df[new_col] = os_df[col] / 1000
        del os_df[col]

os_df.head()

Unnamed: 0_level_0,accum_q_m3/s,accum_agriculture-background_din_tonnes,accum_agriculture-background_ss_tonnes,accum_agriculture-background_tdp_tonnes,accum_agriculture-background_toc_tonnes,accum_agriculture-background_ton_tonnes,accum_agriculture-background_totn_tonnes,accum_agriculture-background_totp_tonnes,accum_agriculture-background_tpp_tonnes,accum_agriculture_din_tonnes,...,accum_urban_totp_tonnes,accum_urban_tpp_tonnes,accum_wood_din_tonnes,accum_wood_ss_tonnes,accum_wood_tdp_tonnes,accum_wood_toc_tonnes,accum_wood_ton_tonnes,accum_wood_totn_tonnes,accum_wood_totp_tonnes,accum_wood_tpp_tonnes
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
LOFOTEN-BARENTS SEA,2542.634654,200.809696,2937.794851,2.447637,2356.263175,63.303271,264.112967,7.53283,5.085193,1808.943236,...,41.900725,16.3266,427.433141,12719.88873,27.916742,87411.136516,3428.738082,3856.171223,90.861783,62.945041
NORTH SEA,3349.471533,1073.010303,13513.451262,12.292723,13886.168939,403.497179,1476.507481,34.025276,21.732553,9671.958146,...,170.653199,63.226042,2138.47148,8745.837486,46.430149,135792.586456,5052.799517,7191.270997,112.31001,65.879861
NORWEGIAN SEA2,3859.262123,1052.432324,15745.335807,12.655361,19490.387195,312.115555,1364.547879,36.800403,24.145042,9472.294332,...,122.467032,46.638014,943.500112,16877.598888,50.924103,232736.87487,7106.046273,8049.546386,163.223587,112.299484
SKAGERAK,2905.710908,1561.233117,10286.401096,14.721484,30637.654365,463.923183,2025.156299,33.018535,18.297051,14051.098049,...,138.412102,42.235119,2465.785355,7113.096251,57.002433,260421.247762,9526.210551,11991.995906,129.651219,72.648786


### 4.4. Estimate loads in unmonitored areas

We can now calculate the unmonitored component by subtracting the values modelled upstream of monitoring stations (`mod_mon_df`) from the overall modelled inputs to each OSPAR region (`os_df`).

In [10]:
# Calc unmonitored loads
assert (mod_mon_df.columns == os_df.columns).all()
assert (mod_mon_df.index == os_df.index).all()
unmon_df = (os_df - mod_mon_df).copy()

# Save
out_csv = f"/home/jovyan/shared/common/JES/teotil3_data/results/unmon_loads/teotil3_raw_unmonitored_loads_{year}.csv"
unmon_df.to_csv(out_csv, encoding="utf-8", index_label="ospar_region")

### 4.5. Aggregate to required quantities for OSPAR reporting

In [11]:
# Sources considered in OSPAR template
srcs = ["wastewater", "industry", "aquaculture", "diffuse"]

# Some variables with values of zero are not included in the model output.
# Add columns of zeros for completeness
unmon_df["accum_spredt_ss_tonnes"] = 0
unmon_df["accum_aquaculture_ss_tonnes"] = 0

# Aggregate
unmon_df["flow_1000m3pday"] = (
    unmon_df["accum_q_m3/s"] * 60 * 60 * 24 / 1000.0
)  # 1000s m3/day
for par in pars:
    # Lake deposition is zero for most pars, but not explicitly included in the
    # model output. Add here for completeness
    lake_par_col = f"accum_lake_{par}_tonnes"
    if lake_par_col not in unmon_df.columns:
        unmon_df[lake_par_col] = 0

    # Combine model outputs
    unmon_df[f"wastewater_{par}"] = (
        unmon_df[f"accum_large-wastewater_{par}_tonnes"]
        + unmon_df[f"accum_spredt_{par}_tonnes"]
    )
    unmon_df[f"industry_{par}"] = unmon_df[f"accum_industry_{par}_tonnes"]
    unmon_df[f"aquaculture_{par}"] = unmon_df[f"accum_aquaculture_{par}_tonnes"]
    unmon_df[f"diffuse_{par}"] = (
        unmon_df[f"accum_agriculture-background_{par}_tonnes"]
        + unmon_df[f"accum_agriculture_{par}_tonnes"]
        + unmon_df[f"accum_lake_{par}_tonnes"]
        + unmon_df[f"accum_upland_{par}_tonnes"]
        + unmon_df[f"accum_urban_{par}_tonnes"]
        + unmon_df[f"accum_wood_{par}_tonnes"]
    )

cols = ["flow_1000m3pday"] + [f"{src}_{par}" for src in srcs for par in pars]
unmon_df = unmon_df[cols].copy()

# Total for Norway
unmon_df.loc["NORWAY"] = unmon_df.sum(axis=0)

## 5. Other N and P species

Originally, TEOTIL2 only simulated TOTN and TOTP, and the table below was used to divide these into relevant subfractions for OSPAR reporting.

|   Source    | Phosphate | Nitrate | Ammonium |
|:-----------:|:---------:|:-------:|:--------:|
|    Sewage   |     0.600 |   0.050 |    0.750 |
|   Industry  |     0.600 |   0.050 |    0.750 |
| Aquaculture |     0.690 |   0.110 |    0.800 |
|   Diffuse   |     0.246 |   0.625 |    0.055 |

In addition, discharges from point sources of TOC, SS and metals were included where reported, and it was assumed that all these discharges reach the coast. 

TEOTIL3 explicitly includes DIN, TDP, TOC and SS. I will therefore use simulated values from TEOTIL3 where possible, and will only use directly reported point discharges for metals. To match the OSPAR parameters, I will assume that:

 * TDP in TEOTIL3 is approximately equal to PO4.
   
 * DIN in TEOTIL3 can be split into NO3 and NH4 using the same proportions as defined previously for the TEOTIL2 workflow (see table above). For example, I will assume that $\frac{5}{80} \times DIN = NO3$ and $\frac{75}{80} \times DIN = NH4$ etc.

In [12]:
# Factors for subdividing DIN
din_conv_dict = {
    ("wastewater", "no3"): 5 / 80,
    ("wastewater", "nh4"): 75 / 80,
    ("industry", "no3"): 5 / 80,
    ("industry", "nh4"): 75 / 80,
    ("aquaculture", "no3"): 11 / 91,
    ("aquaculture", "nh4"): 80 / 91,
    ("diffuse", "no3"): 625 / 680,
    ("diffuse", "nh4"): 55 / 680,
}

for col in unmon_df.drop(columns="flow_1000m3pday").columns:
    src, par = col.split("_")
    if par == "tdp":
        # Assume TDP = PO4
        unmon_df.rename(columns={col: f"{src}_po4"}, inplace=True)
    if par == "din":
        # Assume DIN = NO3 + NH4 and keep same proportions per source as previously
        unmon_df[f"{src}_no3"] = din_conv_dict[(src, "no3")] * unmon_df[col]
        unmon_df[f"{src}_nh4"] = din_conv_dict[(src, "nh4")] * unmon_df[col]
        del unmon_df[col]

unmon_df.round().astype(int).T

ospar_region,LOFOTEN-BARENTS SEA,NORTH SEA,NORWEGIAN SEA2,SKAGERAK,NORWAY
flow_1000m3pday,134283,153838,177135,15848,481104
wastewater_totn,1378,4503,2935,5136,13953
wastewater_ton,435,941,864,1062,3301
wastewater_totp,168,482,384,96,1130
wastewater_po4,78,213,172,33,496
wastewater_tpp,90,269,213,63,634
wastewater_toc,2581,8004,6899,5613,23097
wastewater_ss,2284,6045,5265,2927,16521
industry_totn,74,435,650,845,2004
industry_ton,74,348,326,132,880


## 6. Metals

All reported discharges of metals are assumed to reach the coast. We only want data for catchments that are not monitored i.e. for regine IDs **not** in the graph of monitored catchments created above (not in `nd_list`).

In [13]:
def assign_ospar_region(vassom):
    if vassom in range(1, 24):
        return "SKAGERAK"
    elif vassom in range(24, 91):
        return "NORTH SEA"
    elif vassom in range(91, 171):
        return "NORWEGIAN SEA2"
    elif vassom in range(171, 248):
        return "LOFOTEN-BARENTS SEA"
    else:
        return np.nan

In [14]:
sectors = ["aquaculture", "industry", "large wastewater"]
metal_pars = [
    "as_kg",
    "cd_kg",
    "cr_kg",
    "cu_kg",
    "hg_kg",
    "ni_kg",
    "pb_kg",
    "zn_kg",
]
df_list = []
for sector in sectors:
    # Get reported discharges
    sec_pt_df = teo.io.get_annual_point_data(
        engine,
        year,
        sector,
        par_list=metal_pars,
    )

    # Filter to only consider unmonitoried regions
    sec_pt_df = sec_pt_df.query("regine not in @nd_list")

    # Group to OSPAR region
    sec_pt_df["vassom"] = sec_pt_df["regine"].str[:3].astype(int)
    sec_pt_df["osp_reg"] = sec_pt_df["vassom"].apply(assign_ospar_region)
    sec_pt_df = sec_pt_df.groupby("osp_reg").sum(numeric_only=True)
    del sec_pt_df["vassom"]
    df_list.append(sec_pt_df)
pt_df = pd.concat(df_list, axis="columns")

# Rename
col_map = {
    col: col.replace("large-wastewater_", "wastewater_") for col in pt_df.columns
}
pt_df.rename(columns=col_map, inplace=True)

# kg => tonnes and remove units
for col in pt_df.columns:
    if col.endswith("_kg"):
        new_col = col.replace("_kg", "")
        pt_df[new_col] = pt_df[col] / 1000
        del pt_df[col]

# Total for Norway
pt_df.loc["NORWAY"] = pt_df.sum(axis=0)

pt_df.T

osp_reg,LOFOTEN-BARENTS SEA,NORTH SEA,NORWEGIAN SEA2,SKAGERAK,NORWAY
,,,,,
aquaculture_cu,102.3907,105.2336,166.1824,0.1147,373.9214
industry_as,0.0002,1.7158,0.2429,0.2131,2.172
industry_cd,0.0,0.0316,0.0236,0.0115,0.0667
industry_cr,0.0,0.3243,0.2219,0.5171,1.0633
industry_cu,0.0003,1.3483,0.9166,2.7785,5.0437
industry_hg,0.0001,0.0036,0.0022,0.0027,0.0086
industry_ni,0.0007,3.3297,0.2757,1.2442,4.8503
industry_pb,0.0,0.3266,0.0818,0.0852,0.4936
industry_zn,0.6087,3.9162,2.7645,2.846,10.1354


In [15]:
# Join reported discharges of metals to model results
unmon_df = unmon_df.join(pt_df)
unmon_df.round(0).astype(int).T

ospar_region,LOFOTEN-BARENTS SEA,NORTH SEA,NORWEGIAN SEA2,SKAGERAK,NORWAY
flow_1000m3pday,134283,153838,177135,15848,481104
wastewater_totn,1378,4503,2935,5136,13953
wastewater_ton,435,941,864,1062,3301
wastewater_totp,168,482,384,96,1130
wastewater_po4,78,213,172,33,496
wastewater_tpp,90,269,213,63,634
wastewater_toc,2581,8004,6899,5613,23097
wastewater_ss,2284,6045,5265,2927,16521
industry_totn,74,435,650,845,2004
industry_ton,74,348,326,132,880


In [16]:
# Save
out_csv = f"/home/jovyan/shared/common/JES/teotil3_data/results/unmon_loads/teotil3_ospar_unmonitored_loads_{year}.csv"
unmon_df.to_csv(out_csv)

This dataset can be used to create Table 3 in the report and/or the OSPAR template.