In [None]:
# # Run this and then restart the kernel at the start of each session to install
# # 'teotil3' in development mode
# !pip install -e /home/jovyan/projects/teotil3/

In [1]:
import glob
import os

import geopandas as gpd
import nivapy3 as nivapy
import pandas as pd
import teotil3 as teo
from sqlalchemy import text
from tqdm.notebook import tqdm

In [2]:
eng = nivapy.da.connect_postgis(admin=True)
# eng = nivapy.da.connect_postgis()

Username:  ········
Password:  ········


Connection successful.


# TEOTIL3: Upload annual data

First run notebooks 01 to 07 to clean and prepare the raw annual data. **This notebook will upload the processed annual data for the specified time period** (removing any older data previously uploaded for the same period). It is usually a good idea to **upload an entire cleaned dataset** for the period from 2013 to the final year of interest, as this ensures that TEOTIL3 remains up-to-date with changes in primary data sources, like Miljødirektoratet's database.

In [3]:
st_yr, end_yr = 2013, 2023
years = range(st_yr, end_yr + 1)

In [4]:
# Delete data already in the database for these years
with eng.connect() as conn:
    with conn.begin():
        sql = text(
            """DELETE FROM teotil3.point_source_values
               WHERE year >= :st_yr
               AND year <= :end_yr
        """
        )
        conn.execute(sql, {"st_yr": st_yr, "end_yr": end_yr})

        sql = text(
            """DELETE FROM teotil3.point_source_locations
               WHERE year >= :st_yr
               AND year <= :end_yr
        """
        )
        conn.execute(sql, {"st_yr": st_yr, "end_yr": end_yr})

        sql = text(
            """DELETE FROM teotil3.spredt_inputs
               WHERE year >= :st_yr
               AND year <= :end_yr
        """
        )
        conn.execute(sql, {"st_yr": st_yr, "end_yr": end_yr})

        sql = text(
            """DELETE FROM teotil3.agri_inputs
               WHERE year >= :st_yr
               AND year <= :end_yr
        """
        )
        conn.execute(sql, {"st_yr": st_yr, "end_yr": end_yr})

## 1. Large wastewater treatment plants and industry

This step processes the raw data from three files:

 * `large_wastewater_{year}_raw.xlsx`
 * `metals_{year}_raw.xlsx`
 * `industry_{year}_raw.xlsx`

Files for each year are hosted on `shared`:

    shared/common/teotil3/point_data/{year}

In [5]:
for year in years:
    print("#", year, "#################")
    data_fold = f"/home/jovyan/shared/common/teotil3/point_data/{year}"
    loc_gdf, df = teo.preprocessing.read_large_wastewater_and_industry_data(
        data_fold, year, eng
    )

    # # Add new sites to the database
    # loc_gdf.to_postgis(
    #     "point_source_locations",
    #     con=eng,
    #     schema="teotil3",
    #     if_exists="append",
    #     index=False,
    # )

    # # Add values to database
    # df.to_sql(
    #     "point_source_values",
    #     con=eng,
    #     schema="teotil3",
    #     if_exists="append",
    #     index=False,
    # )

# 2013 #################
# 2014 #################
# 2015 #################
# 2016 #################
# 2017 #################
# 2018 #################
# 2019 #################
1 locations do not have outlet co-ordinates in this year's data.
         site_id                                      name
90  2311.0001.01  Hav Line - Slakteskipet Norwegian Gannet
# 2020 #################
1 locations do not have outlet co-ordinates in this year's data.
         site_id                                      name
83  2311.0001.01  Hav Line - Slakteskipet Norwegian Gannet
# 2021 #################
1 locations do not have outlet co-ordinates in this year's data.
         site_id                                      name
87  2311.0001.01  Hav Line - Slakteskipet Norwegian Gannet
# 2022 #################
1 locations do not have outlet co-ordinates in this year's data.
         site_id                                      name
90  2311.0001.01  Hav Line - Slakteskipet Norwegian Gannet
# 2023 ###########

## 2. Small wastewater treatment plants

Process files named `small_wastewater_{year}.xlsx`.

In [6]:
for year in years:
    print("#", year, "#################")
    xl_path = f"/home/jovyan/shared/common/teotil3/point_data/{year}/small_wastewater_{year}_raw.xlsx"
    df = teo.preprocessing.read_raw_small_wastewater_data(xl_path, "Sheet1", year, eng)

    # # Add to database
    # df.to_sql(
    #     "spredt_inputs",
    #     con=eng,
    #     schema="teotil3",
    #     if_exists="append",
    #     index=False,
    # )

# 2013 #################
# 2014 #################
# 2015 #################
# 2016 #################
# 2017 #################
# 2018 #################
# 2019 #################
# 2020 #################
# 2021 #################
# 2022 #################
# 2023 #################


## 3. Aquaculture

In [7]:
for year in years:
    print("#", year, "#################")
    xl_path = f"/home/jovyan/shared/common/teotil3/point_data/{year}/fiske_oppdret_{year}_raw.xlsx"
    cu_tonnes = teo.preprocessing.get_annual_copper_usage_aquaculture(year)
    loc_gdf, df = teo.preprocessing.read_raw_aquaculture_data(
        xl_path,
        f"fiskeoppdrett_{year}",
        year,
    )
    df = teo.preprocessing.estimate_aquaculture_nutrient_inputs(
        df, year, eng, cu_tonnes=cu_tonnes, species_ids=[71401, 71101]
    )

    # # Add new sites to the database
    # loc_gdf.to_postgis(
    #     "point_source_locations",
    #     con=eng,
    #     schema="teotil3",
    #     if_exists="append",
    #     index=False,
    # )

    # # Add values to database
    # df.to_sql(
    #     "point_source_values",
    #     con=eng,
    #     schema="teotil3",
    #     if_exists="append",
    #     index=False,
    # )

# 2013 #################
The total annual copper lost to water from aquaculture is 923.1 tonnes.
# 2014 #################
The total annual copper lost to water from aquaculture is 960.5 tonnes.
# 2015 #################
The total annual copper lost to water from aquaculture is 980.9 tonnes.
# 2016 #################
The total annual copper lost to water from aquaculture is 1134.8 tonnes.
# 2017 #################
The total annual copper lost to water from aquaculture is 1217.2 tonnes.
# 2018 #################
The total annual copper lost to water from aquaculture is 1382.1 tonnes.
# 2019 #################
The total annual copper lost to water from aquaculture is 1443.3 tonnes.
# 2020 #################
The total annual copper lost to water from aquaculture is 1308.1 tonnes.
# 2021 #################
The total annual copper lost to water from aquaculture is 932.4 tonnes.
# 2022 #################
The total annual copper lost to water from aquaculture is 374.0 tonnes.
# 2023 #################


## 4. Agriculture

In [8]:
# Version of agri data to use
agri_version = "20241122"

# Process agricultural data
agri_fold = r"/home/jovyan/shared/common/teotil3/agri_data"
data_fold = os.path.join(agri_fold, f"agri_data_v{agri_version}")
for year in years:
    print("#", year, "#################")

    # Read NIBIO data
    df = teo.preprocessing.read_raw_agri_data(year, data_fold)

    # # Add values to database
    # df.to_sql(
    #     "agri_inputs",
    #     con=eng,
    #     schema="teotil3",
    #     if_exists="append",
    #     index=False,
    # )

# 2013 #################
# 2014 #################
# 2015 #################
# 2016 #################
# 2017 #################
# 2018 #################
# 2019 #################
# 2020 #################
# 2021 #################
# 2022 #################
# 2023 #################


## 5. HBV modelled discharge from NVE

In [None]:
# Period of data to upload (i.e. final year in dataset; the data_delivery_year
# is final_year + 1)
final_year = 2023

In [None]:
# # Folder containing modelled data
# data_fold = r"/home/jovyan/shared/common/teotil3/nve_hbv_data"

# df_list = []
# search_path = os.path.join(data_fold, f"RID_{final_year}", "hbv_*.var")
# flist = glob.glob(search_path)

# # Get number of days between 1990 and year of interest
# days = len(pd.date_range(start="1990-01-01", end="%s-12-31" % final_year, freq="D"))

# for fpath in flist:
#     name = os.path.split(fpath)[1]
#     vassom = name.split("_")[1][-7:-4]

#     df = pd.read_csv(
#         fpath, delim_whitespace=True, header=None, names=["date", "flow_m3/s"]
#     )
#     df["date"] = pd.to_datetime(df["date"], format="%Y%m%d/1200")
#     df["vassom"] = vassom
#     df["data_supply_year"] = final_year + 1
#     df = df[["data_supply_year", "vassom", "date", "flow_m3/s"]]

#     # Check st, end and length
#     assert df["date"].iloc[0] == pd.Timestamp(
#         "1990-01-01"
#     ), "Series does not start on 01/01/1990."
#     assert df["date"].iloc[-1] == pd.Timestamp("%s-12-31" % final_year), (
#         "Series does not end on 31/12/%s." % year
#     )
#     assert len(df) == days, "Unexpected length for new series."

#     df_list.append(df)

# df = pd.concat(df_list, axis="rows")
# assert df.duplicated(["data_supply_year", "vassom", "date"], keep=False).sum() == 0

# print(f"{len(df)/1e6:.1f} million rows to insert.")

In [None]:
# %%time

# # The databasse can't cope with writing millions of rows directly from pandas
# # Instead, manually split the dataframe into chunks and write one
# # at a time
# chunk_size = 100000

# table_name = "nve_hbv_discharge"

# # Write chunks in append mode
# chunks = [df[i : i + chunk_size] for i in range(0, df.shape[0], chunk_size)]
# for chunk in tqdm(chunks):
#     chunk.to_sql(
#         table_name,
#         eng,
#         schema="teotil3",
#         index=False,
#         if_exists="append",
#         method="multi",
#     )

## 6. Checking

### 6.1. Check point source locations

The code below plots the point source locations for a specific year. The map provides a useful way to identify obvious co-ordinate errors.

**It is a good idea to check one year at a time, because errors often overlap**.

See also notebook `T2-12b_check_outlet_locs.ipynb`, which checks the outlet co-ordinates provided by MDir during autumn 2023. This notebook identifies several issues with the industry and wastewater data in MDir's databases. These issues have been sent to Torstein for checking and some of them have been corrected, but not all. Once all issues are fixed, a complete new set of site and outlet co-ordinates should be requested and the database updated. See the issue [here](https://github.com/NIVANorge/teotil3/issues/27) for details.

In [None]:
# Year to plot
year = 2023
teo.vis.point_sources_map(year, eng, loc_type="outlet")