In [1]:
import os

import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.backends.backend_pdf import PdfPages

plt.style.use("ggplot")

# Task 2.16: Historic time series for TOTN and TOTP

In [2]:
def aggregate_regions(df, par, out_fold=None):
    """Sum TEOTIL output for the main catchments for each region defined in the
       report.

    Args
        df:       Dataframe of results aggregated to the correct column anmes for
                  the report
        par:      Str. Either 'n' or 'p'
        out_fold: None or str. Default None. Folder to save CSVs to, if desired

    Returns
        Dict of dataframes. Optionally, results for each region are saved to CSV.
    """
    assert par in ("n", "p")

    # Map regions used in report to main catchments
    # Intervals are "Python-style" i.e. include first but not last element in range
    regions_dict = {
        # Definerte kystavsnitt (chapter 5)
        "Norges kystområder": [1, 248, 315],
        "Sverige – Strømtangen fyr": [1, 3],
        "Indre Oslofjord": [5, 10],
        "Svenskegrensa – Lindesnes": [1, 24],
        "Lindesnes – Stad": [24, 92],
        "Stad – Russland": [92, 248],
        # Norske vannregioner (chapter 6)
        "Glomma": [1, 11],
        "Vest-Viken": [11, 18],
        "Agder": [18, 27],
        "Rogaland": [27, 41],
        "Hordaland": [41, 68],
        "Sogn og Fjordane": [68, 92],
        "Møre og Romsdal": [92, 117],
        "Trøndelag": [117, 144],
        "Nordland": [144, 186],
        "Troms": [186, 211],
        "Finnmark": [211, 248],
        # Norske forvaltingsplanområder (chapter 7)
        "Nordsjøen": [1, 91, 315],  # 315 is included here too in John Rune's Access db
        "Norskehavet": [91, 171],
        "Barentshavet": [171, 248],
    }

    result_dict = {}
    for region, catches in regions_dict.items():
        if len(catches) == 2:
            catch_list = list(range(catches[0], catches[1]))
        else:
            catch_list = list(range(catches[0], catches[1])) + [catches[2]]
        catch_list = [f"{i:03d}." for i in catch_list]

        reg_df = df.query("regine in @catch_list").copy()
        reg_df = reg_df.groupby("År").sum(numeric_only=True).reset_index()
        reg_df = reg_df.round(0).astype(int)
        result_dict[region] = reg_df

        if out_fold:
            if not os.path.exists(out_fold):
                os.makedirs(out_fold)
            csv_path = os.path.join(out_fold, f"{region}_{par}.csv")
            reg_df.to_csv(csv_path, index=False)

    return result_dict


def get_aggregation_dict_for_columns(par, model="teotil2"):
    """Make a dict mapping TEOTIL column names to columns used in the report
       with aggregation where necessary.

    Args
        par: Str. Either 'n' or 'p'

    Returns
        Dict with key's equal to headings used in the report and values are lists
        of columns to aggregate in the TEOTIL output.
    """
    assert par in ("n", "p")
    assert model in ("teotil2", "teotil3")

    if model == "teotil2":
        agg_dict = {
            "Akvakultur": [f"accum_aqu_tot-{par}_tonnes"],
            "Jordbruk": [
                f"accum_agri_diff_tot-{par}_tonnes",
                f"accum_agri_pt_tot-{par}_tonnes",
            ],
            "Avløp": [f"accum_ren_tot-{par}_tonnes", f"accum_spr_tot-{par}_tonnes"],
            "Industri": [f"accum_ind_tot-{par}_tonnes"],
            "Bakgrunn": [
                f"accum_nat_diff_tot-{par}_tonnes",
                f"accum_urban_tot-{par}_tonnes",
            ],
            "Totalt": [f"accum_all_sources_tot-{par}_tonnes"],
            "Menneskeskapt": [
                f"accum_anth_diff_tot-{par}_tonnes",
                f"accum_all_point_tot-{par}_tonnes",
            ],
        }
    else:
        agg_dict = {
            "Akvakultur": [f"accum_aquaculture_tot{par}_tonnes"],
            "Jordbruk": [f"accum_agriculture_tot{par}_tonnes"],
            "Avløp": [
                f"accum_large-wastewater_tot{par}_tonnes",
                f"accum_spredt_tot{par}_tonnes",
            ],
            "Industri": [f"accum_industry_tot{par}_tonnes"],
            "Bakgrunn": [
                f"accum_agriculture-background_tot{par}_tonnes",
                f"accum_urban_tot{par}_tonnes",
                f"accum_upland_tot{par}_tonnes",
                f"accum_wood_tot{par}_tonnes",
                f"accum_lake_tot{par}_tonnes",
            ],
            "Totalt": [
                f"accum_agriculture_tot{par}_tonnes",
                f"accum_agriculture-background_tot{par}_tonnes",
                f"accum_aquaculture_tot{par}_tonnes",
                f"accum_industry_tot{par}_tonnes",
                f"accum_lake_tot{par}_tonnes",
                f"accum_large-wastewater_tot{par}_tonnes",
                f"accum_spredt_tot{par}_tonnes",
                f"accum_urban_tot{par}_tonnes",
                f"accum_upland_tot{par}_tonnes",
                f"accum_wood_tot{par}_tonnes",
            ],
            "Menneskeskapt": [
                f"accum_agriculture_tot{par}_tonnes",
                f"accum_aquaculture_tot{par}_tonnes",
                f"accum_industry_tot{par}_tonnes",
                f"accum_large-wastewater_tot{par}_tonnes",
                f"accum_spredt_tot{par}_tonnes",
                f"accum_urban_tot{par}_tonnes",
            ],
        }

    return agg_dict


def aggregate_parameters(df, par, model="teotil2"):
    """Aggregate columns in TEOTIL output to headings used in the report.

    Args
        df:  Dataframe of TEOTIL results
        par: Str. Either 'n' or 'p'

    Returns
        Dataframe.
    """
    agg_dict = get_aggregation_dict_for_columns(par, model=model)
    for group, cols in agg_dict.items():
        df[group] = df[cols].sum(axis=1)

    df = df[["regine", "År"] + list(agg_dict.keys())]

    return df


def get_teotil2_results_main_catchments(st_yr, end_yr):
    """ """
    # List of catchments flowing to coast. 315 flows into Skagerrak
    main_catches = [f"{i:03d}." for i in range(1, 248)] + ["315."]
    df_list = []
    for year in range(st_yr, end_yr + 1):
        base_url = f"https://raw.githubusercontent.com/NIVANorge/teotil2/main/data/norway_annual_output_data/teotil2_results_{year}.csv"
        df = pd.read_csv(base_url)
        df = df.query("regine in @main_catches").copy()
        df["År"] = year
        cols = [i for i in df.columns if i.split("_")[0] == "accum"]
        df = df[["regine", "År"] + cols]
        df_list.append(df)
    df = pd.concat(df_list)

    return df


def get_teotil3_results_main_catchments(st_yr, end_yr):
    """ """
    # List of catchments flowing to coast. 315 flows into Skagerrak
    main_catches = [f"{i:03d}." for i in range(1, 248)] + ["315."]
    df = pd.read_csv(
        r"/home/jovyan/shared/common/teotil3/evaluation/teo3_results_nve2023_2013-2022.csv"
    )
    df = df.query(
        "(regine in @main_catches) and (year >= @st_yr) and (year <= @end_yr)"
    ).copy()
    df["År"] = df["year"]
    cols = [i for i in df.columns if i.split("_")[0] == "accum"]
    df = df[["regine", "År"] + cols]
    for col in df.columns:
        if col.endswith("_kg"):
            new_col = col.replace("_kg", "_tonnes")
            df[new_col] = df[col] / 1000
            del df[col]

    return df

In [3]:
teo2_st_yr, teo2_end_yr = 1990, 2021
teo3_st_yr, teo3_end_yr = 2013, 2022
pars = ["n", "p"]

teo2_df = get_teotil2_results_main_catchments(teo2_st_yr, teo2_end_yr)
teo3_df = get_teotil3_results_main_catchments(teo3_st_yr, teo3_end_yr)

In [4]:
teo2_res_dict = {}
for par in pars:
    df = aggregate_parameters(teo2_df, par, model="teotil2")
    teo2_res_dict[par] = aggregate_regions(df, par, out_fold=None)

teo2_res_dict["n"]["Norges kystområder"]

Unnamed: 0,År,Akvakultur,Jordbruk,Avløp,Industri,Bakgrunn,Totalt,Menneskeskapt
0,1990,7422,25330,0,3542,63245,99539,36649
1,1991,0,24985,0,3266,52614,80864,28606
2,1992,6518,24497,0,2498,61502,95015,33868
3,1993,8126,23560,17304,2876,58257,110122,52221
4,1994,10692,22947,14287,3094,55076,106096,51376
5,1995,13653,22932,18756,3088,62405,120834,58785
6,1996,15781,22770,18853,3582,46515,107500,61340
7,1997,18010,22605,18403,4408,61809,125235,63782
8,1998,20300,22441,18223,3559,56360,120884,64879
9,1999,20201,22279,17113,3109,56669,119371,63057


In [5]:
teo3_res_dict = {}
for par in pars:
    df = aggregate_parameters(teo3_df, par, model="teotil3")
    teo3_res_dict[par] = aggregate_regions(df, par, out_fold=None)

teo3_res_dict["n"]["Norges kystområder"]

Unnamed: 0,År,Akvakultur,Jordbruk,Avløp,Industri,Bakgrunn,Totalt,Menneskeskapt
0,2013,50472,43629,16577,2568,58653,171898,119129
1,2014,54726,47046,16999,2380,63816,184967,127587
2,2015,54543,45713,17784,2448,69966,190454,127845
3,2016,53579,41039,18020,2339,57700,172676,120864
4,2017,56944,45962,18488,2577,68383,192355,130895
5,2018,57149,51819,17571,2597,62710,191846,135499
6,2019,60572,45293,17913,2527,61411,187716,132551
7,2020,65198,53918,18580,2593,79737,220027,148389
8,2021,66835,42531,17383,2368,57401,186518,134847
9,2022,65344,45671,17655,2969,67502,199141,138124


In [6]:
# Save PNGs
plot_fold = r"/home/jovyan/shared/common/teotil3/teotil2_vs_teotil3_line_plots"

for area in teo2_res_dict["n"].keys():
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
    for idx, par in enumerate(pars):
        df2 = teo2_res_dict[par][area].set_index("År")
        df2.columns = [f"{col} (TEO2)" for col in df2.columns]
        df3 = teo3_res_dict[par][area].set_index("År")
        df3.columns = [f"{col} (TEO3)" for col in df3.columns]
        df2.plot(ax=axes[idx], ls="--", legend=False)
        df3.plot(ax=axes[idx])
        axes[idx].set_title(f"{par.capitalize()} (tonnes)")
    plt.tight_layout()
    png_path = os.path.join(plot_fold, area + ".png")
    plt.savefig(png_path, dpi=200, bbox_inches="tight")
    plt.close()

In [7]:
# Save PDF
plot_fold = r"/home/jovyan/shared/common/teotil3/teotil2_vs_teotil3_line_plots"
pdf_path = os.path.join(plot_fold, "teo2_vs_teo3_1990-2022.pdf")

with PdfPages(pdf_path) as pdf:
    for area in teo2_res_dict["n"].keys():
        fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
        for idx, par in enumerate(pars):
            df2 = teo2_res_dict[par][area].set_index("År")
            df2.columns = [f"{col} (TEO2)" for col in df2.columns]
            df3 = teo3_res_dict[par][area].set_index("År")
            df3.columns = [f"{col} (TEO3)" for col in df3.columns]
            df2.plot(ax=axes[idx], ls="--", legend=False)
            df3.plot(ax=axes[idx], legend=False)
            axes[idx].set_title(f"{par.capitalize()} (tonnes)")
            if idx == 1:
                plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
        plt.suptitle(area)
        plt.tight_layout()
        pdf.savefig(fig, dpi=200, bbox_inches="tight")
        plt.close()

In [8]:
# Save to Excel
xl_path = r"/home/jovyan/shared/common/teotil3/teotil2_vs_teotil3_line_plots/teo2_vs_teo3_data_tables.xlsx"

with pd.ExcelWriter(xl_path, engine="openpyxl") as writer:
    for area in teo2_res_dict["n"].keys():
        for idx, par in enumerate(pars):
            df2 = teo2_res_dict[par][area].set_index("År")
            df2.columns = [f"{col} (TEO2)" for col in df2.columns]
            df3 = teo3_res_dict[par][area].set_index("År")
            df3.columns = [f"{col} (TEO3)" for col in df3.columns]
            df_combined = df2.join(df3, how="outer")
            df_combined.to_excel(writer, sheet_name=f"{area} ({par.capitalize()})")