In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
from matplotlib.backends.backend_pdf import PdfPages

plt.style.use("ggplot")
custom_palette = [
    "#E24A33",
    "#348ABD",
    "#988ED5",
    "#777777",
    "#FBC15E",
    "#8EBA42",
    "#FFB5B8",
    "#17BECF",
]
plt.rcParams["axes.prop_cycle"] = plt.cycler(color=custom_palette)

In [2]:
def get_teotil2_results_for_vassoms(st_yr, end_yr, vassom_list):
    """ """
    main_catches = [f"{i:03d}." for i in vassom_list]
    df_list = []
    for year in range(st_yr, end_yr + 1):
        base_url = f"https://raw.githubusercontent.com/NIVANorge/teotil2/main/data/norway_annual_output_data/teotil2_results_{year}.csv"
        df = pd.read_csv(base_url)
        df = df.query("regine in @main_catches").copy()
        df["År"] = year
        cols = [i for i in df.columns if i.split("_")[0] == "accum"]
        df = df[["regine", "År"] + cols]
        df_list.append(df)
    df = pd.concat(df_list)

    return df


def get_teotil3_results_for_vassoms(st_yr, end_yr, vassom_list, agri_loss_model):
    """ """
    main_catches = [f"{i:03d}." for i in vassom_list]
    df = pd.read_csv(
        f"/home/jovyan/shared/common/teotil3/evaluation/teo3_results_nve2023_2013-2022_agri-{agri_loss_model}-loss.csv"
    )
    df = df.query(
        "(regine in @main_catches) and (year >= @st_yr) and (year <= @end_yr)"
    ).copy()
    df["År"] = df["year"]
    cols = [i for i in df.columns if i.split("_")[0] == "accum"]
    df = df[["regine", "År"] + cols]
    for col in df.columns:
        if col.endswith("_kg"):
            new_col = col.replace("_kg", "_tonnes")
            df[new_col] = df[col] / 1000
            del df[col]

    return df


def get_aggregation_dict_for_columns(par, model="teotil2"):
    """Make a dict mapping TEOTIL column names to summary columns with
    aggregation where necessary.

    Args
        par: Str. Either 'n' or 'p'

    Returns
        Dict with key's equal to output headings and values are lists
        of columns to aggregate.
    """
    assert par in ("n", "p")
    assert model in ("teotil2", "teotil3")

    if model == "teotil2":
        agg_dict = {
            "Akvakultur": [f"accum_aqu_tot-{par}_tonnes"],
            "Jordbruk": [
                f"accum_agri_diff_tot-{par}_tonnes",
                f"accum_agri_pt_tot-{par}_tonnes",
            ],
            "Avløp": [f"accum_ren_tot-{par}_tonnes", f"accum_spr_tot-{par}_tonnes"],
            "Industri": [f"accum_ind_tot-{par}_tonnes"],
            "Bebygd": [f"accum_urban_tot-{par}_tonnes"],
            "Bakgrunn": [f"accum_nat_diff_tot-{par}_tonnes"],
        }
    else:
        agg_dict = {
            "Akvakultur": [f"accum_aquaculture_tot{par}_tonnes"],
            "Jordbruk": [f"accum_agriculture_tot{par}_tonnes"],
            "Avløp": [
                f"accum_large-wastewater_tot{par}_tonnes",
                f"accum_spredt_tot{par}_tonnes",
            ],
            "Industri": [f"accum_industry_tot{par}_tonnes"],
            "Bebygd": [f"accum_urban_tot{par}_tonnes"],
            "Bakgrunn": [
                f"accum_agriculture-background_tot{par}_tonnes",
                f"accum_upland_tot{par}_tonnes",
                f"accum_wood_tot{par}_tonnes",
                f"accum_lake_tot{par}_tonnes",
            ],
        }

    return agg_dict


def aggregate_parameters(df, par, model):
    """Aggregate columns in TEOTIL output to headings used in the report.

    Args
        df:    Dataframe of TEOTIL results
        par:   Str. Either 'n' or 'p'
        model: Str. Either 'teotil2' or 'teotil3'

    Returns
        Dataframe.
    """
    agg_dict = get_aggregation_dict_for_columns(par, model=model)
    for group, cols in agg_dict.items():
        df[group] = df[cols].sum(axis=1)

    df = df[["regine", "År"] + list(agg_dict.keys())]

    return df

In [3]:
# Pars of interest
par_list = ["n", "p"]

# Periods of interest
teo2_st_yr = 1996
teo2_end_yr = 2022
teo3_st_yr = 2013
teo3_end_yr = 2022

# Define vassdragsområder for Oslofjord
#  - Inner Oslofjord vassoms 005 to 009
#  - Outer Oslofjord vassoms 001 to 004 and 010 to 014
#  - Whole Oslofjord vassoms 001 to 014
vassom_dict = {
    "Indre_Oslofjord.": list(range(5, 10)),
    "Ytre_Oslofjord.": list(range(1, 5)) + list(range(10, 18)),
    "Hele_Oslofjord.": list(range(1, 18)),
}

agri_loss_model = "risk"

In [4]:
# Get TEOTIL results
teo2_df = get_teotil2_results_for_vassoms(
    teo2_st_yr, teo2_end_yr, vassom_dict["Hele_Oslofjord."]
)
teo3_df = get_teotil3_results_for_vassoms(
    teo3_st_yr, teo3_end_yr, vassom_dict["Hele_Oslofjord."], agri_loss_model
)

# Process data for each vassom
res_dict = {}
for par in par_list:
    # Standardise and aggregate output params
    teo2_par_df = aggregate_parameters(teo2_df, par, "teotil2")
    teo3_par_df = aggregate_parameters(teo3_df, par, "teotil3")

    # Get TEOTIL2 results for period matching TEOTIL3 results
    # and calculate average for each vassom
    teo2_avg_par_df = (
        teo2_par_df.query("@teo3_st_yr <= `År` <= @teo3_end_yr")
        .drop(columns="År")
        .groupby("regine")
        .mean()
    )
    teo3_avg_par_df = (
        teo3_par_df.query("@teo3_st_yr <= `År` <= @teo3_end_yr")
        .drop(columns="År")
        .groupby("regine")
        .mean()
    )

    # Bias correct based on period for 2013 to 2022 for each vassom
    bias_df = (teo3_avg_par_df / teo2_avg_par_df).fillna(0).reset_index()
    bias_df.replace([np.inf, -np.inf], 1, inplace=True)
    bias_df = bias_df.set_index("regine").clip(lower=0, upper=10).reset_index()
    teo2_par_df = teo2_par_df.query("@teo2_st_yr <= `År` < @teo3_st_yr")
    teo2_par_df = pd.merge(
        teo2_par_df, bias_df, how="left", on="regine", suffixes=("", "_fac")
    )
    for src in bias_df.columns:
        if src != "regine":
            teo2_par_df[src] = teo2_par_df[src] * teo2_par_df[f"{src}_fac"]
            del teo2_par_df[f"{src}_fac"]

    # Merge bias-corrected TEOTIL2 data from 1996 to 2012 with
    # TEOTIL3 output from 2013 onwards
    par_df = (
        pd.concat([teo2_par_df, teo3_par_df], axis="rows")
        .sort_values(["regine", "År"])
        .reset_index(drop=True)
    )

    # Calculate totals
    par_df["Menneskeskapt"] = (
        par_df["Akvakultur"]
        + par_df["Jordbruk"]
        + par_df["Avløp"]
        + par_df["Industri"]
        + par_df["Bebygd"]
    )
    par_df["Totalt"] = par_df["Menneskeskapt"] + par_df["Bakgrunn"]
    res_dict[par] = par_df

In [5]:
# Totals for Oslofjord
for area, vassom_list in vassom_dict.items():
    catches = [f"{i:03d}." for i in vassom_list]
    for par in par_list:
        orig_par_df = res_dict[par].copy()
        par_df = res_dict[par].copy()
        par_df = par_df.query("regine in @catches")
        par_df = par_df.groupby("År").sum(numeric_only=True).reset_index()
        par_df["regine"] = area
        par_df = pd.concat([orig_par_df, par_df], axis="rows")
        res_dict[par] = par_df

In [6]:
# Save to Excel
with pd.ExcelWriter(
    f"teotil2-3_oslofjord_data_by_vassom_1996-2022_agri-{agri_loss_model}-loss.xlsx"
) as writer:
    for par in par_list:
        par_df = res_dict[par].copy()
        for vassom, vassom_df in par_df.groupby("regine"):
            vassom_df = vassom_df.drop(columns="regine")
            vassom_df.round(2).to_excel(
                writer,
                sheet_name=f"{vassom[:-1]}_TOT{par.upper()}_tonnes",
                index=False,
            )

In [7]:
# Create summary PDF with one page per vassdragsområde
sources = [
    "Akvakultur",
    "Jordbruk",
    "Avløp",
    "Industri",
    "Bebygd",
    "Bakgrunn",
]

with PdfPages(
    f"teotil2-3_oslofjord_summary_by_vassom_1996-2022_agri-{agri_loss_model}-loss.pdf"
) as pdf:
    for regine in res_dict["n"]["regine"].unique():
        fig, axs = plt.subplots(
            5, 2, figsize=(10, 12), gridspec_kw={"height_ratios": [1, 0.2, 1, 0.2, 1]}
        )
        if regine.endswith("Oslofjord."):
            fig.suptitle(f"{regine[:-1]}\n", fontsize=16)
        else:
            fig.suptitle(f"Vassdragsområde {regine[:-1]}\n", fontsize=16)
        fig.delaxes(axs[1, 0])
        fig.delaxes(axs[1, 1])
        fig.delaxes(axs[3, 0])
        fig.delaxes(axs[3, 1])

        for i, par in enumerate(["n", "p"]):
            df = res_dict[par][res_dict[par]["regine"] == regine].copy()
            df = df.drop(columns="regine").set_index("År")

            # Line chart
            df.plot(ax=axs[0, i], marker="o", legend=False)
            axs[0, i].axvline(2013, c="k", ls="--", lw=1)
            axs[0, i].set_title(f"TOT{par.upper()}")
            axs[0, i].set_xlabel("")
            axs[0, i].set_ylabel(f"TOT{par.upper()} (tonn)")

            # Stacked bar chart
            df[sources].plot(kind="bar", stacked=True, ax=axs[2, i], legend=False)
            axs[2, i].set_title(f"TOT{par.upper()}")
            axs[2, i].set_xlabel("")
            axs[2, i].set_ylabel(f"TOT{par.upper()} (tonn)")

            # Horizontal bar chart
            df_period = df[(df.index >= teo3_st_yr) & (df.index <= teo3_end_yr)][
                sources
            ]
            total_sources = df_period[sources].sum()
            percentages = (total_sources / total_sources.sum()) * 100
            percentages = percentages.sort_values()
            percentages.plot(kind="barh", ax=axs[4, i])
            axs[4, i].set_title(f"TOT{par.upper()} i prosent\n(gjennomsnitt 2013-2022)")
            axs[4, i].set_xlabel("Prosent")
            axs[4, i].set_ylabel("")
            for index, value in enumerate(percentages):
                axs[4, i].text(value, index, f"{value:.1f}%", va="center")

        # Add legends below the specified subplots
        handles, labels = axs[0, 0].get_legend_handles_labels()
        fig.legend(
            handles, labels, loc="upper center", bbox_to_anchor=(0.5, 0.69), ncol=3
        )

        handles, labels = axs[2, 0].get_legend_handles_labels()
        fig.legend(
            handles, labels, loc="upper center", bbox_to_anchor=(0.5, 0.34), ncol=3
        )

        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)