In [1]:
import os
import shutil
import warnings

import nivapy3 as nivapy
import numpy as np
import pandas as pd
from openpyxl import load_workbook
from sqlalchemy import text

warnings.simplefilter("ignore")

In [2]:
ora_eng = nivapy.da.connect()

Username:  ········
Password:  ········


Connection successful.


In [3]:
pg_eng = nivapy.da.connect_postgis()

Connection successful.


# RID OSPAR Reporting Template

# Using TEOTIL3

In [4]:
# Set year of interest
year = 2023

## 1. Get summary data

### 1.1. Monitored areas

In [5]:
# Read data
in_csv = f"/home/jovyan/shared/common/elveovervakingsprogrammet/results/measured_loads/loads_and_flows_all_sites_{year}.csv"
mon_df = pd.read_csv(in_csv)
del mon_df["new_rid_group"]

# Group by OSPAR region
mon_df1 = mon_df.groupby(["ospar_region", "old_rid_group"]).sum(numeric_only=True)

# Totals for Norway
mon_df2 = mon_df.groupby("old_rid_group").sum(numeric_only=True).reset_index()
mon_df2["ospar_region"] = "NORWAY"
mon_df2.set_index(["ospar_region", "old_rid_group"], inplace=True)

# Combine
mon_df = pd.concat([mon_df1, mon_df2], axis=0)

# Cols of interest
cols = [i for i in mon_df.columns if i.split("_")[1] != "Est"]
mon_df = mon_df[cols]
del mon_df["station_id"], mon_df["mean_q_1000m3/day"]

# Convert units to match template
mon_df["Hg_kg"] = mon_df["Hg_kg"] / 1000.0  # kg to tonnes
mon_df["NH4-N_tonnes"] = mon_df["NH4-N_tonnes"] / 1000.0  # tonnes to ktonnes
mon_df["NO3-N_tonnes"] = mon_df["NO3-N_tonnes"] / 1000.0  # tonnes to ktonnes
mon_df["TOTN_tonnes"] = mon_df["TOTN_tonnes"] / 1000.0  # tonnes to ktonnes
mon_df["TOTP_tonnes"] = mon_df["TOTP_tonnes"] / 1000.0  # tonnes to ktonnes
mon_df["PO4-P_tonnes"] = mon_df["PO4-P_tonnes"] / 1000.0  # tonnes to ktonnes
mon_df["SPM_tonnes"] = mon_df["SPM_tonnes"] / 1000.0  # tonnes to ktonnes

# Units are now as needed, so remove from col names
mon_df.columns = [i.split("_")[0] for i in mon_df.columns]

mon_df.round(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Ag,As,Cd,Cr,Cu,Hg,NH4-N,NO3-N,Ni,PO4-P,Pb,SPM,SiO2,Si,TOC,TOTN,TOTP,Zn
ospar_region,old_rid_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
LOFOTEN-BARENTS SEA,rid_108,0.0,1.0,0.0,1.0,5.0,0.0,0.0,0.0,4.0,0.0,0.0,8.0,18616.0,8570.0,21556.0,1.0,0.0,5.0
LOFOTEN-BARENTS SEA,rid_11,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,8.0,15172.0,6936.0,9739.0,0.0,0.0,1.0
LOFOTEN-BARENTS SEA,rid_36,0.0,1.0,0.0,3.0,11.0,0.0,0.0,0.0,17.0,0.0,0.0,25.0,71260.0,34762.0,40202.0,2.0,0.0,12.0
NORTH SEA,rid_108,0.0,1.0,0.0,1.0,7.0,0.0,0.0,2.0,5.0,0.0,2.0,18.0,19039.0,8963.0,20722.0,4.0,0.0,21.0
NORTH SEA,rid_11,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,3515.0,1577.0,5201.0,1.0,0.0,4.0
NORTH SEA,rid_36,0.0,2.0,0.0,2.0,8.0,0.0,0.0,3.0,3.0,0.0,3.0,34.0,28055.0,12993.0,35253.0,5.0,0.0,40.0
NORWEGIAN SEA2,rid_108,0.0,1.0,0.0,1.0,5.0,0.0,0.0,1.0,3.0,0.0,1.0,17.0,14704.0,6939.0,18358.0,2.0,0.0,7.0
NORWEGIAN SEA2,rid_11,0.0,1.0,0.0,1.0,11.0,0.0,0.0,0.0,4.0,0.0,0.0,19.0,13239.0,6156.0,15315.0,1.0,0.0,27.0
NORWEGIAN SEA2,rid_36,0.0,3.0,0.0,8.0,24.0,0.0,0.0,2.0,18.0,0.0,2.0,100.0,45387.0,27209.0,67002.0,6.0,0.0,57.0
SKAGERAK,rid_108,0.0,1.0,0.0,1.0,4.0,0.0,0.0,2.0,3.0,0.0,1.0,19.0,11798.0,5634.0,20269.0,3.0,0.0,26.0


### 1.2. Unmonitored areas

In [6]:
# Read data
in_csv = f"/home/jovyan/shared/common/JES/teotil3_data/results/unmon_loads/teotil3_ospar_unmonitored_loads_{year}.csv"
umon_df = pd.read_csv(in_csv, index_col=0)

# Convert units to match template
kt_pars = ["nh4", "no3", "po4", "totn", "totp", "ss"]
for col in umon_df.columns:
    src, par = col.split("_")
    if par in kt_pars:
        umon_df[col] = umon_df[col] / 1000

umon_df.round(0)

Unnamed: 0_level_0,flow_1000m3pday,wastewater_totn,wastewater_ton,wastewater_totp,wastewater_po4,wastewater_tpp,wastewater_toc,wastewater_ss,industry_totn,industry_ton,...,industry_pb,industry_zn,wastewater_as,wastewater_cd,wastewater_cr,wastewater_cu,wastewater_hg,wastewater_ni,wastewater_pb,wastewater_zn
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
LOFOTEN-BARENTS SEA,134283.0,1.0,435.0,0.0,0.0,90.0,2581.0,2.0,0.0,74.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NORTH SEA,153838.0,5.0,941.0,0.0,0.0,269.0,8004.0,6.0,0.0,348.0,...,0.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0
NORWEGIAN SEA2,177135.0,3.0,864.0,0.0,0.0,213.0,6899.0,5.0,1.0,326.0,...,0.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0
SKAGERAK,15848.0,5.0,1062.0,0.0,0.0,63.0,5613.0,3.0,1.0,132.0,...,0.0,3.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,7.0
NORWAY,481104.0,14.0,3301.0,1.0,0.0,634.0,23097.0,17.0,2.0,880.0,...,0.0,10.0,0.0,0.0,0.0,5.0,0.0,2.0,0.0,13.0


### 1.3. Loads for 11 main rivers

In [7]:
# Read data
in_csv = f"/home/jovyan/shared/common/elveovervakingsprogrammet/results/measured_loads/loads_and_flows_all_sites_{year}.csv"
rid11_df = pd.read_csv(in_csv, index_col=0)
del rid11_df["new_rid_group"]

# Get data for RID11
rid11_df = rid11_df.query('old_rid_group == "rid_11"')

# Tidy
del rid11_df["station_code"], rid11_df["station_name"]
del rid11_df["old_rid_group"], rid11_df["ospar_region"]
del rid11_df["mean_q_1000m3/day"]

cols = [i for i in rid11_df.columns if i.split("_")[1] != "Est"]
rid11_df = rid11_df[cols]

# Convert units to match template
rid11_df["Hg_kg"] = rid11_df["Hg_kg"] / 1000.0  # kg to tonnes
rid11_df["NH4-N_tonnes"] = rid11_df["NH4-N_tonnes"] / 1000.0  # tonnes to ktonnes
rid11_df["NO3-N_tonnes"] = rid11_df["NO3-N_tonnes"] / 1000.0  # tonnes to ktonnes
rid11_df["TOTN_tonnes"] = rid11_df["TOTN_tonnes"] / 1000.0  # tonnes to ktonnes
rid11_df["TOTP_tonnes"] = rid11_df["TOTP_tonnes"] / 1000.0  # tonnes to ktonnes
rid11_df["PO4-P_tonnes"] = rid11_df["PO4-P_tonnes"] / 1000.0  # tonnes to ktonnes
rid11_df["SPM_tonnes"] = rid11_df["SPM_tonnes"] / 1000.0  # tonnes to ktonnes

# Units are now as needed, so remove from col names
rid11_df.columns = [i.split("_")[0] for i in rid11_df.columns]

rid11_df.head()

Unnamed: 0_level_0,Ag,As,Cd,Cr,Cu,Hg,NH4-N,NO3-N,Ni,PO4-P,Pb,SPM,SiO2,Si,TOC,TOTN,TOTP,Zn
station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
29612,0.0,2.901886,0.127727,3.72057,15.850077,0.034208,0.093415,3.661175,10.186484,0.121232,3.724548,46.620606,53550.921714,24303.803218,66699.068929,6.431795,0.209949,39.761753
29779,0.0,0.339333,0.0,0.581519,1.405242,0.002143,0.001564,0.089994,0.829775,0.009197,0.035711,7.795914,15172.366963,6935.728989,9739.307321,0.494673,0.020191,0.503985
29821,0.0,0.264006,0.011211,0.175816,1.204422,0.002792,0.010831,0.22115,0.824147,0.002463,0.205009,1.699197,3076.086093,1366.598244,3834.161005,0.42402,0.009124,2.780113
29782,0.0,0.546891,0.013823,0.727245,2.334445,0.00442,0.012568,0.101146,1.829102,0.007136,0.41695,15.449643,6391.073901,2961.028051,8348.398197,0.468453,0.0174,3.818701
36225,0.000359,0.025148,0.003137,0.045917,0.239493,0.000196,0.00315,0.042684,0.079957,0.002876,0.065383,1.470662,395.026175,177.822329,286.46383,0.081384,0.004512,0.853038


## 2. Fill-in template

The template is usually sent each year by Csilla at NIBIO. However, it doesn't seem to change, so the code below copies an old version.

In [8]:
# Make a copy of the template
template_path = r"/home/jovyan/shared/common/elveovervakingsprogrammet/results/ospar/blank_template/blank_ospar_template.xlsx"
osp_xlsx = f"/home/jovyan/shared/common/elveovervakingsprogrammet/results/ospar/01_OSPAR_Norway_{year}_teotil3.xlsx"
if os.path.exists(osp_xlsx):
    raise FileExistsError(f"The file '{osp_xlsx}' already exists.")
print("Template copied to:")
shutil.copy(template_path, osp_xlsx)

Template copied to:


'/home/jovyan/shared/common/elveovervakingsprogrammet/results/ospar/01_OSPAR_Norway_2023_teotil3.xlsx'

In [9]:
def update_spreadsheet_unmonitored_areas(xlsx, sheet, pars, src, df, year):
    """Update the OSPAR template with model estimates for unmonitored areas.

    Args:
        xslx:  Str. Path to Excel template
        sheet: Str. Sheet name to update
        pars:  List. Parameter names in template to fill-in
        src:   Str. Type of input ('sew', 'ind', 'fish')
        df:    Dataframe. Values to fill-in
        year:  Int. Year being processed

    Returns:
        None. The template is updated and saved.
    """
    # Map Excel headings to df cols
    par_dict = {
        "SPM": "ss",
        "TOC": "toc",
        "PO4-P": "po4",
        "P-Total": "totp",
        "NO3-N": "no3",
        "NH4-N": "nh4",
        "N-Total": "totn",
        "As": "as",
        "Pb": "pb",
        "Cd": "cd",
        "Cu": "cu",
        "Zn": "zn",
        "Ni": "ni",
        "Total Cr": "cr",
        "Hg": "hg",
    }

    # Map template names to df names
    names_dict = {
        "Norwegian Sea (NO)": "NORWEGIAN SEA2",
        "Barents Sea (NO)": "LOFOTEN-BARENTS SEA",
        "Skagerrak (NO)": "SKAGERAK",
        "North Sea (NO)": "NORTH SEA",
        "Norway Total": "NORWAY",
    }

    try:
        # Open the Excel file and get the specified sheet
        wb = load_workbook(filename=xlsx)
        ws = wb[sheet]
    except FileNotFoundError:
        print(f"Error: The file '{xlsx}' was not found.")
        return
    except KeyError:
        print(f"Error: The sheet '{sheet}' does not exist in the workbook.")
        return

    # Update year
    ws["B2"] = year

    # Get row numbers
    row_dict = {}
    for item in ws["B12" : "B%s" % ws.max_row]:
        # Get cell properties
        cell = item[0]
        name = cell.value
        row = cell.row
        row_dict[name] = row

    # Get col numbers
    col_dict = {}
    for cell in ws["E9":"AK9"][0]:
        # Get cell properties
        par = cell.value
        col = cell.column
        col_dict[par] = col

    # Update spreadsheet
    for template_region, df_region in names_dict.items():
        for par in pars:
            # Get value from df
            val = df.loc[df_region, f"{src}_{par_dict[par]}"]

            # Get cell co-ords
            row = row_dict[template_region] + 2
            col = col_dict[par]

            # Write value
            ws.cell(row=row, column=col).value = val

    # Save
    wb.save(xlsx)


def update_spreadsheet_monitored_rivers(xlsx, sheet, pars, df_dict, year):
    """Update the OSPAR template for monitored rivers.

    Args:
        xslx:    Str. Path to Excel template
        sheet:   Str. Sheet name to update
        pars:    List. Parameter names in template to fill-in
        df_dict: Dict. {'tot':tot_df, 'trib':trib_df, 'main':rid11_df}
                 Values to fill-in
        year:    Int. Year being processed

    Returns:
        None. The template is updated and saved.
    """
    # Map Excel headings to df cols
    par_dict = {
        "SPM": "SPM",
        "TOC": "TOC",
        "PO4-P": "PO4-P",
        "P-Total": "TOTP",
        "NO3-N": "NO3-N",
        "NH4-N": "NH4-N",
        "N-Total": "TOTN",
        "As": "As",
        "Pb": "Pb",
        "Cd": "Cd",
        "Cu": "Cu",
        "Zn": "Zn",
        "Ni": "Ni",
        "Total Cr": "Cr",
        "Hg": "Hg",
    }

    # Map template names to df names and rows
    names_dict = {
        "Norwegian Sea (NO)": ("NORWEGIAN SEA2", "tot"),
        "Barents Sea (NO)": ("LOFOTEN-BARENTS SEA", "tot"),
        "Skagerrak (NO)": ("SKAGERAK", "tot"),
        "North Sea (NO)": ("NORTH SEA", "tot"),
        "Norway Total": ("NORWAY", "tot"),
        "Tributary Rivers - Norwegian Sea": ("NORWEGIAN SEA2", "trib"),
        "Tributary Rivers - Barents Sea": ("LOFOTEN-BARENTS SEA", "trib"),
        "Tributary Rivers - Skagerak": ("SKAGERAK", "trib"),
        "Tributary Rivers - North Sea": ("NORTH SEA", "trib"),
        "Orkla": (29778, "main"),
        "Vefsna": (29782, "main"),
        "Alta": (29779, "main"),
        "Glomma": (29617, "main"),
        "Drammenselva": (29612, "main"),
        "Numedalslågen": (29615, "main"),
        "Skienselva": (29613, "main"),
        "Otra": (29614, "main"),
        "Orreelva": (29783, "main"),
        "Vosso": (29821, "main"),
    }

    try:
        # Open the Excel file and get the specified sheet
        wb = load_workbook(filename=xlsx)
        ws = wb[sheet]
    except FileNotFoundError:
        print(f"Error: The file '{xlsx}' was not found.")
        return
    except KeyError:
        print(f"Error: The sheet '{sheet}' does not exist in the workbook.")
        return

    # Update year
    ws["B2"] = year

    # Get row numbers
    row_dict = {}
    for item in ws["B12" : "B%s" % ws.max_row]:
        # Get cell properties
        cell = item[0]
        name = cell.value
        row = cell.row
        row_dict[name] = row

    # Get col numbers
    col_dict = {}
    for cell in ws["E9":"AK9"][0]:
        # Get cell properties
        par = cell.value
        col = cell.column
        col_dict[par] = col

    # Update spreadsheet
    for template_region, (df_region, df_name) in names_dict.items():
        df = df_dict[df_name]
        for par in pars:
            # Get value from df
            val = df.loc[df_region, par_dict[par]]

            # Get cell co-ords
            row = row_dict[template_region] + 2
            col = col_dict[par]

            # Write value
            ws.cell(row=row, column=col).value = val

    # Save
    wb.save(xlsx)

### 2.1. Sheet 5a: Sewage effluents

Note that these are the values for **unmonitored areas** and not the total inputs to each OSPAR area.

In [10]:
# Get Sewage data
cols = [i for i in umon_df.columns if i.split("_")[0] == "wastewater"]
sew_df = umon_df[cols].copy()

# Update sheet 5a
pars = [
    "Cd",
    "Hg",
    "Cu",
    "Pb",
    "Zn",
    "NH4-N",
    "NO3-N",
    "PO4-P",
    "N-Total",
    "P-Total",
    "SPM",
    "As",
    "Ni",
    "TOC",
    "Total Cr",
]
update_spreadsheet_unmonitored_areas(osp_xlsx, "5a", pars, "wastewater", sew_df, year)

### 2.2. Sheet 5b: Industrial effluents

Note that these are the values for **unmonitored areas** and not the total inputs to each OSPAR area.

In [11]:
# Get industrial data
cols = [i for i in umon_df.columns if i.split("_")[0] == "industry"]
ind_df = umon_df[cols].copy()

# Update sheet 5b
pars = [
    "Cd",
    "Hg",
    "Cu",
    "Pb",
    "Zn",
    "NH4-N",
    "NO3-N",
    "PO4-P",
    "N-Total",
    "P-Total",
    "SPM",
    "As",
    "Ni",
    "TOC",
    "Total Cr",
]
update_spreadsheet_unmonitored_areas(osp_xlsx, "5b", pars, "industry", ind_df, year)

### 2.3. Sheet 5c: Aquaculture discharges

Note that these are the values for **unmonitored areas** and not the total inputs to each OSPAR area.

In [12]:
# Get aquaculture data
cols = [i for i in umon_df.columns if i.split("_")[0] == "aquaculture"]
aqu_df = umon_df[cols].copy()

# Update sheet 5c
pars = [
    "Cu",
    "NH4-N",
    "NO3-N",
    "PO4-P",
    "N-Total",
    "P-Total",
    "TOC",
]
update_spreadsheet_unmonitored_areas(osp_xlsx, "5c", pars, "aquaculture", aqu_df, year)

### 2.4. Sheet 5d: Other discharges

This sheet is left blank

### 2.5. Sheet 5e: Total direct discharges

The sum of sewage, industrial and fish-farm discharges from **unmonitored areas**.

In [13]:
# Combine sew, ind and aqu, then aggregate
for df in [sew_df, ind_df, aqu_df]:
    df.reset_index(inplace=True)
    df.columns = [i.split("_")[1] for i in df.columns]
td_df = pd.concat([sew_df, ind_df, aqu_df], axis=0, sort=True)
td_df = td_df.groupby("region").sum()
td_df.columns = ["tot-dir_" + i for i in td_df.columns]

# Update sheet 5e
pars = [
    "Cd",
    "Hg",
    "Cu",
    "Pb",
    "Zn",
    "NH4-N",
    "NO3-N",
    "PO4-P",
    "N-Total",
    "P-Total",
    "SPM",
    "As",
    "Ni",
    "TOC",
    "Total Cr",
]
update_spreadsheet_unmonitored_areas(osp_xlsx, "5e", pars, "tot-dir", td_df, year)

### 2.6. Sheet 6a: Monitored rivers

**Does "Inner Oslofjord" in the template correspond to "Alna"?** If so, we can fill-in one additional row in this table.

In [14]:
mon_df.reset_index(inplace=True)
tot_df = mon_df.groupby("ospar_region").sum()
trib_df = mon_df[mon_df["old_rid_group"] != "rid_11"].groupby("ospar_region").sum()

In [15]:
# Update sheet 6a
pars = [
    "Cd",
    "Hg",
    "Cu",
    "Pb",
    "Zn",
    "NH4-N",
    "NO3-N",
    "PO4-P",
    "N-Total",
    "P-Total",
    "SPM",
    "As",
    "Ni",
    "TOC",
    "Total Cr",
]
df_dict = {"tot": tot_df, "main": rid11_df, "trib": trib_df}
update_spreadsheet_monitored_rivers(osp_xlsx, "6a", pars, df_dict, year)

### 2.7. Sheet 6b: Unmonitored areas

Diffuse discharges for unmonitored areas only (i.e. results from TEOTIL3 excluding point discharges).

In [16]:
# Get diffuse data
cols = [i for i in umon_df.columns if i.split("_")[0] == "diffuse"]
diff_df = umon_df[cols].copy()

# Update sheet 6b
pars = [
    "NH4-N",
    "NO3-N",
    "PO4-P",
    "N-Total",
    "P-Total",
    "SPM",
    "TOC",
]
update_spreadsheet_unmonitored_areas(osp_xlsx, "6b", pars, "diffuse", diff_df, year)

### 2.8. Sheet 6c: Total inputs

**Note:** See e-mail from Csilla received 07/11/2017 at 13.43. This table should **not** include "point" discharges (`td_df`) - it's just the sum of tables 6a and 6b.

In [17]:
# Rename cols in diff_df
col_map = {
    "diffuse_totn": "TOTN",
    "diffuse_totp": "TOTP",
    "diffuse_po4": "PO4-P",
    "diffuse_no3": "NO3-N",
    "diffuse_nh4": "NH4-N",
    "diffuse_ss": "SPM",
    "diffuse_toc": "TOC",
}
diff_df.rename(columns=col_map, inplace=True)

# Add to total_df
for col in list(col_map.values()):
    tot_df[col] = tot_df[col] + diff_df[col]

# Update sheet 6c
pars = [
    "Cd",
    "Hg",
    "Cu",
    "Pb",
    "Zn",
    "NH4-N",
    "NO3-N",
    "PO4-P",
    "N-Total",
    "P-Total",
    "SPM",
    "As",
    "Ni",
    "TOC",
    "Total Cr",
]
df_dict = {"tot": tot_df, "main": rid11_df, "trib": trib_df}
update_spreadsheet_monitored_rivers(osp_xlsx, "6c", pars, df_dict, year)

The following two cells are no longer used as I originally misunderstood what table 6c represents.

### 2.9. Sheet 7: Concentrations

In [18]:
# Read data
in_csv = f"/home/jovyan/shared/common/elveovervakingsprogrammet/results/measured_loads/concs_and_flows_rid_20_{year}.csv"
conc_df = pd.read_csv(in_csv, index_col=0, encoding="utf-8")

# Convert units
conc_df["Hg_ng/l"] = conc_df["Hg_ng/l"] / 1000  # ng to ug
conc_df["NH4-N_µg/l N"] = conc_df["NH4-N_µg/l N"] / 1000  # ug to mg
conc_df["NO3-N_µg/l N"] = conc_df["NO3-N_µg/l N"] / 1000  # ug to mg
conc_df["TOTN_µg/l N"] = conc_df["TOTN_µg/l N"] / 1000  # ug to mg
conc_df["TOTP_µg/l P"] = conc_df["TOTP_µg/l P"] / 1000  # ug to mg
conc_df["PO4-P_µg/l P"] = conc_df["PO4-P_µg/l P"] / 1000  # ug to mg
conc_df["TOC_mg C/l"] = conc_df["TOC_mg C/l"] * 1000  # mg to ug

# Get flags
cols = [i for i in conc_df.columns if i.split("_")[1] == "flag"]
lod_df = conc_df[cols]
lod_df.columns = [i.split("_")[0] for i in lod_df.columns]

# Get vals
cols = [
    i
    for i in conc_df.columns
    if ((i.split("_")[0] in lod_df.columns) and (i.split("_")[1] != "flag"))
]
conc_df = conc_df[cols]
conc_df.columns = [i.split("_")[0] for i in conc_df.columns]

# Rename
col_dict = {
    "SPM": "S.P.M.",
    "TOTN": "n",
    "NH4-N": "nh4",
    "NO3-N": "no3",
    "TOTP": "p",
    "PO4-P": "po4",
}
for col, new_col in col_dict.items():
    lod_df[new_col] = lod_df[col]
    conc_df[new_col] = conc_df[col]
    del conc_df[col], lod_df[col]

# Map Excel headings to df cols
par_dict = {
    "SPM": "S.P.M.",
    "TOC": "TOC",
    "PO4-P": "po4",
    "P-Total": "p",
    "NO3-N": "no3",
    "NH4-N": "nh4",
    "N-Total": "n",
    "As": "As",
    "Pb": "Pb",
    "Cd": "Cd",
    "Cu": "Cu",
    "Zn": "Zn",
    "Ni": "Ni",
    "Total Cr": "Cr",
    "Hg": "Hg",
}

# Map names to stns
names_dict = {
    "Orkla": 29778,
    "Vefsna": 29782,
    "Alta": 29779,
    "Glomma": 29617,
    "Drammenselva": 29612,
    "Numedalslågen": 29615,
    "Skienselva": 29613,
    "Otra": 29614,
    "Orreelva": 29783,
    "Vosso": 29821,
}

# Open new file and get sheet
wb = load_workbook(filename=osp_xlsx)
ws = wb["7"]

# Get row numbers
row_dict = {}
for item in ws["B12" : "B%s" % ws.max_row]:
    # Get cell properties
    cell = item[0]
    name = cell.value
    row = cell.row
    row_dict[name] = row

# Get col numbers
col_dict = {}
for cell in ws["E9":"AK9"][0]:
    # Get cell properties
    par = cell.value
    col = cell.column
    col_dict[par] = col

pars = [
    "Cd",
    "Hg",
    "Cu",
    "Pb",
    "Zn",
    "NH4-N",
    "NO3-N",
    "PO4-P",
    "N-Total",
    "P-Total",
    "SPM",
    "As",
    "Ni",
    "TOC",
    "Total Cr",
]

# Update spreadsheet
for stn_name, stn_id in names_dict.items():
    for par in pars:
        # Get values from df
        # 1a. Lower average
        vals = conc_df.loc[stn_id, par_dict[par]].values.copy()
        lods = lod_df.loc[stn_id, par_dict[par]].fillna("0").values.copy()
        vals[(lods == "<")] = 0
        val = np.nanmean(vals)

        row = row_dict[stn_name]
        col = col_dict[par]
        ws.cell(row=row, column=col).value = val

        # 1b. Upper average
        val = conc_df.loc[stn_id, par_dict[par]].mean()
        row = row_dict[stn_name]
        col = col_dict[par]
        ws.cell(row=row, column=col).value = val

        # 2. Min
        val = conc_df.loc[stn_id, par_dict[par]].min()
        row = row_dict[stn_name] + 2
        col = col_dict[par]
        ws.cell(row=row, column=col).value = val

        # 3. Max
        val = conc_df.loc[stn_id, par_dict[par]].max()
        row = row_dict[stn_name] + 3
        col = col_dict[par]
        ws.cell(row=row, column=col).value = val

        # 4. N
        n_samp = len(conc_df.loc[stn_id, par_dict[par]].dropna())
        row = row_dict[stn_name] + 5
        col = col_dict[par]
        ws.cell(row=row, column=col).value = n_samp

        # 5. N LOD
        n_lod = (~pd.isnull(lod_df.loc[stn_id, par_dict[par]])).sum()
        pct_lod = 100 * float(n_lod) / float(n_samp)

        if pct_lod < 30:
            val = "Yes"
        else:
            val = "No"

        row = row_dict[stn_name] + 4
        col = col_dict[par]
        ws.cell(row=row, column=col).value = val

        # 6. Std
        val = conc_df.loc[stn_id, par_dict[par]].std()
        row = row_dict[stn_name] + 7
        col = col_dict[par]
        ws.cell(row=row, column=col).value = val

# Save
wb.save(osp_xlsx)

### 2.10. Sheet 9: Discharge

Adapted from the original notebook [here](http://nbviewer.jupyter.org/github/JamesSample/rid/blob/master/notebooks/recalculate_ospar_flows.ipynb).

#### 2.10.1. Discharges from monitored locations

The code below extracts summary statistics for 10 of the` RID_1`1 stations from 1990 to 2016 inclusive Suldalslågen is also included from the `RID_36` stations..

In [19]:
# Read site data for RID_11 and RID_36
in_xlsx = r"/home/jovyan/shared/common/JES/teotil2_data/RID_Sites_List.xlsx"
rid_11_df = pd.read_excel(in_xlsx, sheet_name="RID_11")
rid_36_df = pd.read_excel(in_xlsx, sheet_name="RID_36")

# Get just Suldalslågen from rid_36
rid_36_df = rid_36_df.query("station_id == 29781")

# Combine
mon_df = pd.concat([rid_11_df, rid_36_df], axis=0)

# Get OSPAR region for stations
sql = "SELECT station_id, value FROM resa2.stations_par_values WHERE var_id = 262"
ospar_reg = pd.read_sql_query(sql, ora_eng)
ospar_reg.columns = ["station_id", "ospar_region"]

# Join OSPAR regions to station data
mon_df = pd.merge(mon_df, ospar_reg, how="left", on="station_id")

# Get cols of interest
mon_df["ospar_region"] = mon_df["ospar_region_x"]
mon_df = mon_df[["station_id", "station_code", "station_name", "ospar_region"]]

mon_df

Unnamed: 0,station_id,station_code,station_name,ospar_region
0,29615,VESENUM,Numedalslågen,SKAGERAK
1,29821,HOREVOS,Vosso(Bolstadelvi),NORTH SEA
2,29783,ROGEORR,Orreelva,NORTH SEA
3,29613,TELESKI,Skienselva,SKAGERAK
4,29614,VAGEOTR,Otra,SKAGERAK
5,29782,NOREVEF,Vefsna,NORWEGIAN SEA2
6,36225,OSLEALN,Alna,SKAGERAK
7,29617,ØSTEGLO,Glomma ved Sarpsfoss,SKAGERAK
8,29779,FINEALT,Altaelva,LOFOTEN-BARENTS SEA
9,29612,BUSEDRA,Drammenselva,SKAGERAK


In [20]:
# Summarise flow data since 1990
df_list = []
for stn_id in mon_df["station_id"]:
    # Get catch area for chem station
    sql = f"SELECT catchment_area FROM resa2.stations WHERE station_id = {stn_id}"
    area_df = pd.read_sql_query(sql, ora_eng)
    wc_area = area_df["catchment_area"].iloc[0]

    # Get linked discharge station
    sql = f"SELECT * FROM resa2.default_dis_stations WHERE station_id = {stn_id}"
    dis_df = pd.read_sql_query(sql, ora_eng)
    dis_stn_id = dis_df["dis_station_id"].iloc[0]

    # Get catchment area for discharge station
    sql = (
        "SELECT area FROM resa2.discharge_stations "
        "WHERE dis_station_id = %s" % dis_stn_id
    )
    area_df = pd.read_sql_query(sql, ora_eng)
    dis_area = area_df["area"].iloc[0]

    # Get annual summary flow stats for this station
    sql = (
        "SELECT TO_CHAR(xdate, 'YYYY') as year, "
        "       AVG(xvalue) as mean, "
        "       MIN(xvalue) as min, "
        "       MAX(xvalue) as max "
        "FROM resa2.discharge_values "
        "WHERE dis_station_id = %s "
        "AND xdate >= date '1990-01-01' "
        "AND xdate <= date '%s-12-31' "
        "GROUP BY TO_CHAR(xdate, 'YYYY') "
        "ORDER BY year" % (dis_stn_id, year)
    )
    q_df = pd.read_sql_query(sql, ora_eng)

    # Set index
    q_df["year"] = q_df["year"].astype(int)
    q_df.index = q_df["year"]
    del q_df["year"]

    # Scale flows by area ratio
    q_df = q_df * wc_area / dis_area

    # Convert m3/s to 1000 m3/d
    q_df = q_df * 60 * 60 * 24 / 1000

    # Reset index
    q_df.reset_index(inplace=True)

    # Add LTA and n_yrs
    q_df["lta"] = q_df["mean"].mean()
    q_df["n_yrs"] = len(q_df)

    # Add ospar region ID, n_sites and 'mean' cols
    q_df["area_id"] = stn_id
    q_df["stat"] = "Mean"
    q_df["n_sites"] = 1

    # Re-order cols to match template
    q_df = q_df[
        ["area_id", "year", "mean", "lta", "min", "max", "n_yrs", "n_sites", "stat"]
    ]

    # Add to results
    df_list.append(q_df)

# Combine to single df
q_mon_df = pd.concat(df_list, axis=0)

# Round to integer
q_mon_df[q_mon_df.select_dtypes(include=["number"]).columns] = (
    q_mon_df.select_dtypes(include=["number"]).round().astype(int)
)

q_mon_df.head()

Unnamed: 0,area_id,year,mean,lta,min,max,n_yrs,n_sites,stat
0,29615,1990,10119,10363,3413,39300,34,1,Mean
1,29615,1991,7885,10363,1353,30882,34,1,Mean
2,29615,1992,7838,10363,3106,35247,34,1,Mean
3,29615,1993,9868,10363,3257,29220,34,1,Mean
4,29615,1994,10823,10363,3106,54601,34,1,Mean


#### 2.10.2. Modelled discharges for vassdragsområder

Calculate summary statistics for each OSPAR region based on the modelled data from HBV.

In [21]:
def combine_flow_data(vassom_list, ospar_reg):
    """Sums HBV flows for a list of vassdragsområder and calculates
    summary statistics.

    Args:
        vassom_list: List of strings. Vassdragsområder to combine
        ospar_reg:   Str. Name of OSPAR region

    Returns:
        Dataframe. Annual summary stats calculated from the
        combined series
    """
    # Aggregate flows for region and calculate annual min, mean and max
    sql = text(
        """
        SELECT TO_CHAR(date, 'YYYY') as year,
               AVG("flow_m3/s") as mean,
               MIN("flow_m3/s") as min,
               MAX("flow_m3/s") as max
        FROM (
            SELECT DATE_TRUNC('day', date) AS date,
                   SUM("flow_m3/s") AS "flow_m3/s"
            FROM teotil3.nve_hbv_discharge
            WHERE vassom IN :vassom_list
            AND data_supply_year = :supply_year
            AND date >= DATE '1990-01-01'
            AND date <= DATE :end_dt
            GROUP BY DATE_TRUNC('day', date)
            ORDER BY DATE_TRUNC('day', date)
        ) subquery
        WHERE date >= DATE '1990-01-01'
          AND date <= DATE :end_dt
        GROUP BY TO_CHAR(date, 'YYYY')
        ORDER BY year;
    """
    )
    q_df = pd.read_sql_query(
        sql,
        pg_eng,
        params={
            "vassom_list": tuple(vassom_list),
            "supply_year": year + 1,
            "end_dt": f"{year}-12-31",
        },
    )

    # Set index
    q_df["year"] = q_df["year"].astype(int)
    q_df.index = q_df["year"]
    del q_df["year"]

    # Convert m3/s to 1000 m3/d
    q_df = q_df * 60 * 60 * 24 / 1000

    # Reset index
    q_df.reset_index(inplace=True)

    # Add LTA and n_yrs
    q_df["lta"] = q_df["mean"].mean()
    q_df["n_yrs"] = len(q_df)

    # Add ospar region ID, n_sites and 'mean' cols
    q_df["area_id"] = ospar_reg
    q_df["stat"] = "Mean"
    q_df["n_sites"] = len(vassom_list)

    # Re-order cols to match template
    q_df = q_df[
        ["area_id", "year", "mean", "lta", "min", "max", "n_yrs", "n_sites", "stat"]
    ]

    return q_df

In [22]:
# Define OSPAR regions (ranges are inclusive)
osp_dict = {
    "SKAGERAK": (1, 23),
    "NORTH SEA": (24, 90),
    "NORWEGIAN SEA2": (91, 170),
    "LOFOTEN-BARENTS SEA": (171, 247),
    "NORWAY": (1, 247),
}

# Flow stats.
df_list = []
for reg, (vassom_min, vassom_max) in osp_dict.items():
    vassom_list = [f"{vassom:03d}" for vassom in range(vassom_min, vassom_max + 1)]
    q_df = combine_flow_data(vassom_list, reg)
    df_list.append(q_df)
q_mod_df = pd.concat(df_list, axis=0)

# Round to integer
q_mod_df[q_mod_df.select_dtypes(include=["number"]).columns] = (
    q_mod_df.select_dtypes(include=["number"]).round().astype(int)
)

q_mod_df.head()

Unnamed: 0,area_id,year,mean,lta,min,max,n_yrs,n_sites,stat
0,SKAGERAK,1990,177983,165581,51505,512511,34,23,Mean
1,SKAGERAK,1991,131958,165581,40951,374739,34,23,Mean
2,SKAGERAK,1992,139415,165581,60185,529073,34,23,Mean
3,SKAGERAK,1993,161933,165581,53587,740406,34,23,Mean
4,SKAGERAK,1994,183136,165581,37551,736947,34,23,Mean


#### 2.10.3. Write results to template

In [23]:
# Dict mapping names in template to IDs in dfs
name_dict = {
    "Orkla": 29778,
    "Vefsna": 29782,
    "Norwegian Sea (NO)": "NORWEGIAN SEA2",
    "Alta": 29779,
    "Barents Sea (NO)": "LOFOTEN-BARENTS SEA",
    "Glomma": 29617,
    "Drammenselva": 29612,
    "Numedalslågen": 29615,
    "Skienselva": 29613,
    "Otra": 29614,
    "Inner Oslofjord": 36225,  # Assume just Alna for now(?)
    "Skagerrak (NO)": "SKAGERAK",
    "Orreelva": 29783,
    "Suldalslågen": 29781,
    "Vosso": 29821,
    "North Sea (NO)": "NORTH SEA",
    "Norway Total": "NORWAY",
}


# Open new file and get sheet
wb = load_workbook(filename=osp_xlsx)
ws = wb["9"]

# Set year
ws["B2"] = year

# Loop over cells
for item in ws["B12":"B28"]:
    # Get cell properties
    cell = item[0]
    area = cell.value
    row = cell.row

    # Get area ID
    ar_id = name_dict[area]

    if ar_id != 999:
        # Get data from relevant df
        if isinstance(ar_id, int):
            # Monitored df
            df = q_mon_df.query("(area_id == @ar_id) and (year == @year)")
        else:
            # Modelled df
            df = q_mod_df.query("(area_id == @ar_id) and (year == @year)")

        assert len(df) == 1

        # Write values
        # 1. Mean
        ws.cell(column=5, row=row, value=df.iloc[0]["mean"])

        # 2. LTA
        ws.cell(column=7, row=row, value=df.iloc[0]["lta"])

        # 3. Min
        ws.cell(column=9, row=row, value=df.iloc[0]["min"])

        # 4. Max
        ws.cell(column=11, row=row, value=df.iloc[0]["max"])

        # 5. Years
        ws.cell(column=13, row=row, value=df.iloc[0]["n_yrs"])

        # 6. N_Sites
        ws.cell(column=15, row=row, value=df.iloc[0]["n_sites"])

        # 7. Stat
        ws.cell(column=17, row=row, value=df.iloc[0]["stat"])

# Save
wb.save(osp_xlsx)