In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
from matplotlib.backends.backend_pdf import PdfPages

plt.style.use("ggplot")
custom_palette = [
    "#E24A33",
    "#348ABD",
    "#988ED5",
    "#777777",
    "#FBC15E",
    "#8EBA42",
    "#FFB5B8",
    "#17BECF",
]
plt.rcParams["axes.prop_cycle"] = plt.cycler(color=custom_palette)

In [2]:
def get_teotil3_results_for_vassoms(st_yr, end_yr, vassom_list, agri_loss_model):
    """ """
    main_catches = [f"{i:03d}." for i in vassom_list]
    df = pd.read_csv(
        f"/home/jovyan/shared/common/teotil3/evaluation/teo3_results_nve2023_2013-2022_agri-{agri_loss_model}-loss.csv"
    )
    df = df.query(
        "(regine in @main_catches) and (year >= @st_yr) and (year <= @end_yr)"
    ).copy()
    df["År"] = df["year"]
    cols = [i for i in df.columns if i.split("_")[0] == "accum"]
    df = df[["regine", "År"] + cols]
    for col in df.columns:
        if col.endswith("_kg"):
            new_col = col.replace("_kg", "_tonnes")
            df[new_col] = df[col] / 1000
            del df[col]

    return df


def get_aggregation_dict_for_columns(par):
    """Make a dict mapping TEOTIL column names to summary columns with
    aggregation where necessary.

    Args
        par: Str. Either 'n' or 'p'

    Returns
        Dict with key's equal to output headings and values are lists
        of columns to aggregate.
    """
    if par.endswith("n"):
        agg_dict = {
            "Akvakultur": [f"accum_aquaculture_{par}_tonnes"],
            "Jordbruk": [f"accum_agriculture_{par}_tonnes"],
            "Avløp": [
                f"accum_large-wastewater_{par}_tonnes",
                f"accum_spredt_{par}_tonnes",
            ],
            "Industri": [f"accum_industry_{par}_tonnes"],
            "Bebygd": [f"accum_urban_{par}_tonnes"],
            "Bakgrunn": [
                f"accum_agriculture-background_{par}_tonnes",
                f"accum_upland_{par}_tonnes",
                f"accum_wood_{par}_tonnes",
                f"accum_lake_{par}_tonnes",
            ],
        }
    else:
        # No lake dep
        agg_dict = {
            "Akvakultur": [f"accum_aquaculture_{par}_tonnes"],
            "Jordbruk": [f"accum_agriculture_{par}_tonnes"],
            "Avløp": [
                f"accum_large-wastewater_{par}_tonnes",
                f"accum_spredt_{par}_tonnes",
            ],
            "Industri": [f"accum_industry_{par}_tonnes"],
            "Bebygd": [f"accum_urban_{par}_tonnes"],
            "Bakgrunn": [
                f"accum_agriculture-background_{par}_tonnes",
                f"accum_upland_{par}_tonnes",
                f"accum_wood_{par}_tonnes",
            ],
        }

    return agg_dict


def aggregate_parameters(df, par):
    """Aggregate columns in TEOTIL output to headings used in the report.

    Args
        df:    Dataframe of TEOTIL results
        par:   Str. Either 'n' or 'p'
        model: Str. Either 'teotil2' or 'teotil3'

    Returns
        Dataframe.
    """
    agg_dict = get_aggregation_dict_for_columns(par)
    for group, cols in agg_dict.items():
        df[group] = df[cols].sum(axis=1)

    df = df[["regine", "År"] + list(agg_dict.keys())]

    return df

In [3]:
# Pars of interest
par_list = ["totn", "totp"]

# Periods of interest
teo3_st_yr = 2013
teo3_end_yr = 2022

# Ares of interest
vassom_list = [21]

# Agri model
agri_loss_model = "annual"

In [4]:
# Get TEOTIL results
teo3_df = get_teotil3_results_for_vassoms(
    teo3_st_yr, teo3_end_yr, vassom_list, agri_loss_model
)

# Process data for each vassom
res_dict = {}
for par in par_list:
    # Standardise and aggregate output params
    par_df = aggregate_parameters(teo3_df, par)

    # Calculate totals
    par_df["Menneskeskapt"] = (
        par_df["Akvakultur"]
        + par_df["Jordbruk"]
        + par_df["Avløp"]
        + par_df["Industri"]
        + par_df["Bebygd"]
    )
    par_df["Totalt"] = par_df["Menneskeskapt"] + par_df["Bakgrunn"]
    res_dict[par] = par_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  par_df["Menneskeskapt"] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  par_df["Totalt"] = par_df["Menneskeskapt"] + par_df["Bakgrunn"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  par_df["Menneskeskapt"] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row

In [6]:
# Save to Excel
with pd.ExcelWriter(
    f"teotil3_output_by_vassom_{teo3_st_yr}-{teo3_end_yr}_agri-{agri_loss_model}-loss.xlsx"
) as writer:
    for par in par_list:
        par_df = res_dict[par].copy()
        for vassom, vassom_df in par_df.groupby("regine"):
            vassom_df = vassom_df.drop(columns="regine")
            vassom_df.round(2).to_excel(
                writer,
                sheet_name=f"{vassom[:-1]}_{par.upper()}_tonnes",
                index=False,
            )

In [10]:
# Create summary PDF with one page per vassdragsområde
sources = [
    "Akvakultur",
    "Jordbruk",
    "Avløp",
    "Industri",
    "Bebygd",
    "Bakgrunn",
]

with PdfPages(f"teotil3_output_by_vassom_{teo3_st_yr}-{teo3_end_yr}_agri-{agri_loss_model}-loss.pdf") as pdf:
    for regine in sorted(res_dict[par_list[0]]["regine"].unique()):
        fig, axs = plt.subplots(
            5, 2, figsize=(10, 12), gridspec_kw={"height_ratios": [1, 0.2, 1, 0.2, 1]}
        )
        if regine.endswith("Oslofjord."):
            fig.suptitle(f"{regine[:-1]}\n", fontsize=16)
        else:
            fig.suptitle(f"Vassdragsområde {regine[:-1]}\n", fontsize=16)
        fig.delaxes(axs[1, 0])
        fig.delaxes(axs[1, 1])
        fig.delaxes(axs[3, 0])
        fig.delaxes(axs[3, 1])

        for i, par in enumerate(par_list):
            df = res_dict[par][res_dict[par]["regine"] == regine].copy()
            df = df.drop(columns="regine").set_index("År")

            # Line chart
            df.plot(ax=axs[0, i], marker="o", legend=False)
            axs[0, i].set_title(f"{par.upper()}")
            axs[0, i].set_xlabel("")
            axs[0, i].set_ylabel(f"{par.upper()} (tonn)")

            # Stacked bar chart
            df[sources].plot(kind="bar", stacked=True, ax=axs[2, i], legend=False)
            axs[2, i].set_title(f"{par.upper()}")
            axs[2, i].set_xlabel("")
            axs[2, i].set_ylabel(f"{par.upper()} (tonn)")

            # Horizontal bar chart
            df_period = df[(df.index >= teo3_st_yr) & (df.index <= teo3_end_yr)][
                sources
            ]
            total_sources = df_period[sources].sum()
            percentages = (total_sources / total_sources.sum()) * 100
            percentages = percentages.sort_values()
            percentages.plot(kind="barh", ax=axs[4, i])
            axs[4, i].set_title(f"{par.upper()} i prosent\n(gjennomsnitt 2013-2022)")
            axs[4, i].set_xlabel("Prosent")
            axs[4, i].set_ylabel("")
            for index, value in enumerate(percentages):
                axs[4, i].text(value, index, f"{value:.1f}%", va="center")

        # Add legends below the specified subplots
        handles, labels = axs[0, 0].get_legend_handles_labels()
        fig.legend(
            handles, labels, loc="upper center", bbox_to_anchor=(0.5, 0.69), ncol=3
        )

        handles, labels = axs[2, 0].get_legend_handles_labels()
        fig.legend(
            handles, labels, loc="upper center", bbox_to_anchor=(0.5, 0.34), ncol=3
        )

        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)