In [1]:
%matplotlib inline
import os
import shutil
from pathlib import Path

import matplotlib.pyplot as plt
import nivapy3 as nivapy
import numpy as np
import pandas as pd
import seaborn as sn
import useful_rid_code as rid

sn.set_context("notebook")

# RID

## Data processing for "monitored rivers" (parameterised)

This notebook is "parameterised" for use with Papermill. The cell below has the tag `parameters`, which means the entire notebook can be called from `01_recalculate_ospar_1990-2016_main.ipynb`.

**Note:** Some settings in this notebook are specific to the RID programme from 1990 to 2016. Take care for years outside this range.

In [2]:
# This cell is tagged 'parameters' for use with Papermill
# https://papermill.readthedocs.io/en/latest/index.html
year = 1990
user = ""
pw = ""

In [3]:
# Parameters
user = "jes"
pw = "BeakabusNov21.."
year = 2000


In [4]:
# Read sites
in_xlsx = r"../../../Data/RID_Sites_List_2017-2020.xlsx"
rid_156_df = pd.read_excel(in_xlsx, sheet_name="RID_All")
rid_155_df = rid_156_df.query("station_id != 38005")  # Ignore TROEMÅL2
rid_11_df = rid_155_df.query("old_rid_group == 'rid_11'")
rid_47_df = rid_155_df.query("old_rid_group != 'rid_108'")

## 2. Tabulate raw water chemistry and flow

### 2.1. Data for year of interest

Just for the RID 11 sites.

In [5]:
%%capture

# Connect to db
engine = rid.connect_to_nivabase(user=user, pw=pw)

# Output CSV
out_csv = f"../../../Results/Loads_CSVs/concs_and_flows_rid_11_{year}.csv"
df = rid.write_csv_water_chem(rid_11_df, year, out_csv, engine)

### 2.2. Data for all years

Not necessary for OSPAR reporting, so commented out below.

In [6]:
# %%capture

# # Connect to db
# engine = rid.connect_to_nivabase(user=user, pw=pw)

# # Container for data
# df_list = []

# # Dummy path for intermediate output (which isn't needed here)
# out_csv = r"../../../Results/Loads_CSVs/cons_and_flows_intermed.csv"

# # Loop over years
# for data_yr in range(1990, year + 1):
#     # Get data
#     df = rid.write_csv_water_chem(rid_155_df, data_yr, out_csv, engine)

#     # Add to output
#     df_list.append(df)

# # Delete intermediate
# os.remove(out_csv)

# # Combine
# df = pd.concat(df_list, axis=0)

# # Reorder cols and tidy
# st_cols = [
#     "station_id",
#     "station_code",
#     "station_name",
#     "old_rid_group",
#     "new_rid_group",
#     "ospar_region",
#     "sample_date",
#     "Qs_m3/s",
# ]
# par_cols = [i for i in df.columns if i not in st_cols]
# par_cols.sort()
# df = df[st_cols + par_cols]

# # Output CSV
# out_csv = f"../../../Results/Loads_CSVs/concs_and_flows_rid_155_1990-{year}.csv"
# df.to_csv(out_csv, encoding="utf-8", index=False)

## 3. Estimate observed loads

### 3.1. Annual flows

First get a dataframe of annual flow volumes to join to the summary output. **NB:** This dataframe isn't actually used in the loads calculations - they are handled separately - it's just for the output CSVs.

In [7]:
# Connect to db
engine = rid.connect_to_nivabase(user=user, pw=pw)

# Get flow data
q_df = rid.get_flow_volumes(rid_155_df, 1990, year, engine)
q_df.head()

Connection successful.


Unnamed: 0,station_id,year,mean_q_1000m3/day
0,29612,1990,25891.13466
1,29612,1991,19274.318392
2,29612,1992,22209.901227
3,29612,1993,28155.888465
4,29612,1994,27384.945933


### 3.2. Loads for all rivers

In [8]:
%%capture

# Connect to db
engine = rid.connect_to_nivabase(user=user, pw=pw)

# Pars of interest
par_list = [
    "SPM",
    "TOC",
    "PO4-P",
    "TOTP",
    "NO3-N",
    "NH4-N",
    "TOTN",
    "SiO2",
    "Ag",
    "As",
    "Pb",
    "Cd",
    "Cu",
    "Zn",
    "Ni",
    "Cr",
    "Hg",
]

# Container for results from each site
loads_list = []

# Loop over sites
for stn_id in rid_155_df["station_id"].values:
    # Estimate loads at this site
    loads_list.append(
        rid.estimate_loads(stn_id, par_list, year, engine, infer_missing=True)
    )

# Concatenate to new df
lds_all = pd.concat(loads_list, axis=0)
lds_all.index.name = "station_id"
lds_all.reset_index(inplace=True)

# Get flow data for year
q_yr = q_df.query("year == @year")

# Join
lds_all = pd.merge(lds_all, rid_155_df, how="left", on="station_id")
lds_all = pd.merge(lds_all, q_yr, how="left", on="station_id")

# Reorder cols and tidy
st_cols = [
    "station_id",
    "station_code",
    "station_name",
    "old_rid_group",
    "new_rid_group",
    "ospar_region",
    "mean_q_1000m3/day",
]
unwant_cols = [
    "nve_vassdrag_nr",
    "lat",
    "lon",
    "utm_north",
    "utm_east",
    "utm_zone",
    "station_type",
    "year",
]
par_cols = [i for i in lds_all.columns if i not in (st_cols + unwant_cols)]

for col in unwant_cols:
    del lds_all[col]

lds_all = lds_all[st_cols + par_cols]

# Write output
out_csv = f"../../../Results/Loads_CSVs/loads_and_flows_all_sites_{year}.csv"
lds_all.to_csv(out_csv, encoding="utf-8", index=False)

### 3.3. Loads through time

The code below is taken from Section 3 of [notebook 3](http://nbviewer.jupyter.org/github/JamesSample/rid/blob/master/notebooks/estimate_loads.ipynb).

Not necessary for OSPAR reporting, so commented out below.

In [9]:
# %%capture

# # Connect to db
# engine = rid.connect_to_nivabase(user=user, pw=pw)

# # Period of interest
# st_yr, end_yr = 1990, year

# # Container for results
# loads_list = []

# # Loop over sites
# for stn_id in rid_155_df["station_id"].values:
#     # Loop over years
#     for data_yr in range(st_yr, end_yr + 1):
#         print("Processing Station ID %s for %s" % (stn_id, data_yr))

#         # Get loads
#         l_df = rid.estimate_loads(stn_id, par_list, data_yr, engine, infer_missing=True)

#         if l_df is not None:
#             # Name and reset index
#             l_df.index.name = "station_id"
#             l_df.reset_index(inplace=True)

#             # Add year
#             l_df["year"] = data_yr

#             # Add to outout
#             loads_list.append(l_df)

# # Concatenate to new df
# lds_ts = pd.concat(loads_list, axis=0)

# # Join
# lds_q_ts = pd.merge(lds_ts, rid_155_df, how="left", on="station_id")
# lds_q_ts = pd.merge(lds_q_ts, q_df, how="left", on=["station_id", "year"])

# # Reorder cols and tidy
# st_cols = [
#     "station_id",
#     "station_code",
#     "station_name",
#     "old_rid_group",
#     "new_rid_group",
#     "ospar_region",
#     "mean_q_1000m3/day",
# ]
# unwant_cols = [
#     "nve_vassdrag_nr",
#     "lat",
#     "lon",
#     "utm_north",
#     "utm_east",
#     "utm_zone",
#     "station_type",
# ]
# par_cols = [i for i in lds_q_ts.columns if i not in (st_cols + unwant_cols)]

# for col in unwant_cols:
#     del lds_q_ts[col]

# lds_q_ts = lds_q_ts[st_cols + par_cols]

# # Save output
# out_csv = f"../../../Results/Loads_CSVs/loads_and_flows_rid_155_{st_yr}-{end_yr}.csv"
# lds_q_ts.to_csv(out_csv, encoding="utf-8", index=False)

# # Build multi-index on lds_ts for further processing
# lds_ts.set_index(["station_id", "year"], inplace=True)

In [10]:
# %%capture

# # Output folder for plots
# out_fold = f"../../../Results/TS_Plots/RID_Plots_To_{year}"
# if os.path.isdir(out_fold) == False:
#     os.mkdir(out_fold)

# # Loop over df
# for stn_id in rid_11_df["station_id"].values:
#     # Get data for this station
#     df = lds_ts.loc[stn_id]

#     # Separate est and val cols to two dfs
#     cols = df.columns
#     est_cols = [i for i in cols if i.split("_")[1] == "Est"]
#     val_cols = [i for i in cols if i.split("_")[1] != "Est"]
#     val_df = df[val_cols]
#     est_df = df[est_cols]

#     # Convert to "long" format
#     val_df.reset_index(inplace=True)
#     val_df = pd.melt(val_df, id_vars="year", var_name="par_unit")
#     est_df.reset_index(inplace=True)
#     est_df = pd.melt(est_df, id_vars="year", var_name="par_est", value_name="est")

#     # Get just par for joining
#     val_df["par"] = val_df["par_unit"].str.split("_", expand=True)[0]
#     est_df["par"] = est_df["par_est"].str.split("_", expand=True)[0]

#     # Join
#     df = pd.merge(val_df, est_df, how="left", on=["year", "par"])

#     # Extract cols of interest
#     df = df[["year", "par_unit", "value", "est"]]

#     # Plot
#     g = sn.factorplot(
#         x="year",
#         y="value",
#         hue="est",
#         col="par_unit",
#         col_wrap=3,
#         data=df,
#         kind="bar",
#         dodge=False,
#         sharex=False,
#         sharey=False,
#         alpha=0.5,
#         aspect=2,
#         legend=False,
#     )

#     # Rotate tick labels and tidy
#     for ax in g.axes.flatten():
#         for tick in ax.get_xticklabels():
#             tick.set(rotation=45)
#     plt.tight_layout()

#     # Save
#     out_path = os.path.join(out_fold, f"{stn_id}.png")
#     plt.savefig(out_path, dpi=200)
#     plt.close()

## 4. Generate output tables for Word

Not necessary for OSPAR reporting, so commented out below.

### 4.1. Table 1: Raw water chemistry

The code below is based on Section 2 of [notebook 5](http://nbviewer.jupyter.org/github/JamesSample/rid/blob/master/notebooks/word_data_tables.ipynb).

In [11]:
# engine = rid.connect_to_nivabase(user=user, pw=pw)
# tab_path = rid.copy_word_template(1, year)
# rid.write_word_water_chem_tables(rid_47_df, year, tab_path, engine)

### 4.2. Table 2: Estimated loads at each site

The code below is based on Section 3 of [notebook 5](http://nbviewer.jupyter.org/github/JamesSample/rid/blob/master/notebooks/word_data_tables.ipynb).

In [12]:
# engine = rid.connect_to_nivabase(user=user, pw=pw)
# tab_path = rid.copy_word_template(2, year)
# loads_csv = f"../../../Results/Loads_CSVs/loads_and_flows_all_sites_{year}.csv"
# rid.write_word_loads_table(rid_155_df, loads_csv, tab_path, engine)