In [1]:
import time

import matplotlib.pyplot as plt
import nivapy3 as nivapy
import pandas as pd

plt.style.use("ggplot")

# Sensitivity to eutrophication

# Part 1: Data download

Search Vannmiljø to identify monitoring data for the waterbodies of interest.

## 1. Waterbodies of interest

In [2]:
# Read WB list from MDir
xl_path = r"../data/mdir_waterbodies_list.xlsx"
wb_df = pd.read_excel(xl_path)

# Strip leading and trailing whitespace
wb_df["VannforekomstID"] = wb_df["VannforekomstID"].str.strip()

# Check unique
print("The following waterbodies are duplicated. Only the last will be kept:")
display(wb_df[wb_df.duplicated("VannforekomstID", keep=False)])
wb_df.drop_duplicates("VannforekomstID", keep="last", inplace=True)

The following waterbodies are duplicated. Only the last will be kept:


Unnamed: 0,VannforekomstID,Id,Navn.vannforekomst,Breddegrad,Lengdegrad,Zero,Areal (km2),Type,Opphold.bunnvann,NIVA.data,oppdrag
141,050-106-R,,,,,,,R,,,Hovedoppdrag
142,050-105-R,,,,,,,R,,,Hovedoppdrag
153,050-105-R,R01,Bjoreio overføring fra Sysenvatnet - dam Tveito,60.4217,7.2528,0.0,17.369,R,,,
154,050-106-R,R02,Isdølo Isdalsvatnet - Bjoreio,60.4302,7.2762,0.0,3.356,R,,Vannforekomsten oppstrøms er med i referanseel...,


Note that many of the waterbodies are missing co-ordinate information in the file from MDir, so they cannot be plotted on the map below.

In [3]:
display(wb_df.head())
nivapy.spatial.quickmap(
    wb_df,
    lat_col="Breddegrad",
    lon_col="Lengdegrad",
    cluster=True,
    popup="Navn.vannforekomst",
)

Unnamed: 0,VannforekomstID,Id,Navn.vannforekomst,Breddegrad,Lengdegrad,Zero,Areal (km2),Type,Opphold.bunnvann,NIVA.data,oppdrag
0,0120000034-C,C01,Merdø - Hasseltangen,58.3998,8.7617,0.0,8.39,C,Kort,,Hovedoppdrag
1,0121000300-1-C,C02,Grosfjorden - indre,58.3196,8.5912,0.0,5.574,C,Oksygenfattig (og moderat),,Hovedoppdrag
2,0121010500-1-C,C03,Lillesandsfjorden,58.245,8.3883,0.0,2.064,C,Moderat,Ja,Hovedoppdrag
3,0130010301-2-C,C04,Østergapet - indre,58.1141,8.0348,0.0,22.523,C,Kort,,Hovedoppdrag
4,0131010200-C,C05,Høllefjorden,58.0703,7.8092,0.0,1.729,C,Moderat,,Hovedoppdrag


## 2. User options

In [4]:
# Define parameters and time period of interest
pars = [
    "ANDEKNPR",
    "ANDEKNSK",
    "ANDEKNSK_PKT",
    "ANEQR_E",
    "ANNEQR_E",
    "ANNVGR",
    "ANNVGRTO",
    "ANTIZM",
    "ASPT",
    "BENEQR_E",
    "BENEQR_G",
    "CYANOM",
    "FIEINDW3",
    "HBDEKNPS",
    "HBI2",
    "KLFA",
    "LBNEQR_E",
    "LBNEQR_G",
    "MAEQR_E",
    "MAH",
    "MANEQR_E",
    "MBDEKNPS",
    "MBDEKNSK",
    "MBDEKNSK_PKT",
    "MBNEQR_G",
    "MBNEQR_MID",
    "MFNEQR_E",
    "MHDEKNPS",
    "MHH",
    "MSMDI1",
    "MSMDI2",
    "MSMDI3",
    "N-NH3",
    "N-NH4",
    "N-NO2",
    "N-NO3",
    "N-SNOX",
    "N-TOT",
    "NQI1",
    "NQI1_MID",
    "O2",
    "P-ORTO",
    "P-PART",
    "P-PO4",
    "P-TOT",
    "PIT",
    "PON",
    "POP",
    "PPBIOMTOVO",
    "PPNEQR_E",
    "PPTI",
    "RSL4",
    "RSL5",
    "RSLA1",
    "RSLA2",
    "RSLA3",
    "SI-SIO2",
    "SIO2",
    "STS",
    "TIANTL",
    "TOC",
    "TON",
    "TSM",
]
st_yr, end_yr = 1960, 2024
res_xlsx = r"../data/vannmiljo_export_for_waterbodies.xlsx"

In [5]:
# # Query API
# wb_list = wb_df['VannforekomstID'].dropna().tolist()
# filter_dict = {
#     "WaterBodyIDFilter": wb_list,
#     "FromDateSamplingTime": f"{st_yr}-01-01",
#     "ToDateSamplingTime": f"{end_yr}-12-31",
#     # "ParameterIDFilter": pars,
#     "FromRegDate": "1900-01-01",
# }
# df = nivapy.da.post_data_to_vannmiljo("GetRegistrations", data=filter_dict)
# df.rename(columns={'CoordX':'UTM33East', 'CoordY':'UTM33North'}, inplace=True)
# df.to_excel(res_xl_path)

In [6]:
def query_api_with_retries(data, retries=3, delay=5):
    for attempt in range(retries):
        try:
            df = nivapy.da.post_data_to_vannmiljo("GetRegistrations", data=data)
            return df
        except KeyError as e:
            # print(f"  Attempt {attempt + 1} failed: {e}")
            if attempt < retries - 1:
                time.sleep(delay)
            else:
                print("  All retry attempts failed.")
                return pd.DataFrame()

In [7]:
%%time
# Query API
wb_list = wb_df["VannforekomstID"].dropna().tolist()
df_list = []
for wb_id in wb_list:
    print("Processing:", wb_id)
    filter_dict = {
        "WaterBodyIDFilter": [wb_id],
        "FromDateSamplingTime": f"{st_yr}-01-01",
        "ToDateSamplingTime": f"{end_yr}-12-31",
        "ParameterIDFilter": pars,
        "FromRegDate": "1900-01-01",
    }
    df = query_api_with_retries(data=filter_dict, retries=3, delay=5)
    if len(df) > 0:
        df_list.append(df)
    else:
        print("  No data available.")
df = pd.concat(df_list, axis="rows")

# Tidy
df["SamplingTime"] = pd.to_datetime(df["SamplingTime"])
df.rename(columns={"CoordX": "UTM33East", "CoordY": "UTM33North"}, inplace=True)
drop_cols = [
    "WaterRegistrationID",
    "WaterLocationID",
    "Name_Alternative",
    "SourceID",
    "ListName",
    "UnitID",
    "SampleID",
    "Username",
    "RegDate",
    "LastEditDate",
    "Archive",
    "FeatureType",
    "WaterCategory",
]
df.drop(columns=drop_cols, inplace=True)

# Save
df.to_excel(res_xlsx, index=False)

Processing: 0120000034-C
Processing: 0121000300-1-C
Processing: 0121010500-1-C
Processing: 0130010301-2-C
Processing: 0131010200-C
Processing: 0132010100-C
Processing: 0201000030-2-C
Processing: 0201010300-C
Processing: 0240010201-C
Processing: 0241000030-C
Processing: 0242000030-C
Processing: 0242000032-C
Processing: 0242010101-3-C
Processing: 0242011101-C
Processing: 0242040102-C
Processing: 0242050100-2-C
Processing: 0260010501-6-C
Processing: 0260020801-C
Processing: 0260020802-C
Processing: 0260020900-C
Processing: 0260021000-C
Processing: 0260040101-C
Processing: 0260040800-C
Processing: 0260050500-C
Processing: 0261010500-2-C
Processing: 0261010600-C
Processing: 0261010702-3-C
Processing: 0261010800-2-C
Processing: 0261010800-4-C
Processing: 0261010800-9-C
Processing: 0261020100-2-C
Processing: 0261030201-2-C
Processing: 0261030202-C
Processing: 0261030300-2-C
Processing: 0281010201-3-C
  No data available.
Processing: 0281010700-C
Processing: 0281010900-2-C
Processing: 02810110



CPU times: user 6min 35s, sys: 24 s, total: 6min 59s
Wall time: 39min 22s


In [8]:
# Print summary
data_wbs = df["WaterBodyID"].unique().tolist()
no_data_wbs = list(set(wb_list) - set(data_wbs))
site_ids = df["WaterLocationCode"].unique().tolist()
par_ids = df["ParameterID"].unique().astype(str).tolist()
print(f"{len(data_wbs)} out of {len(wb_list)} waterbodies have at least some data.")
print(
    f"\nThe following {len(no_data_wbs)} out of {len(wb_list)} waterbodies have no data:"
)
print(no_data_wbs)
print(f"\nData are available from {len(site_ids)} monitoring stations.")
print(f"\nThe following {len(par_ids)} parameters have been measured:")
print(sorted(par_ids))

143 out of 153 waterbodies have at least some data.

The following 10 out of 153 waterbodies have no data:
['0364000030-3-C', '0281010201-3-C', '0365010602-1-C', '156-480-R', '0365010601-2-C', '0401020100-3-C', '0242041000-C', '0361040400-C', '196-49-R', '0364040300-5-C']

Data are available from 3282 monitoring stations.

The following 53 parameters have been measured:
['ANDEKNSK', 'ANDEKNSK_PKT', 'ANEQR_E', 'ANNVGR', 'ANNVGRTO', 'ANTIZM', 'ASPT', 'BENEQR_E', 'BENEQR_G', 'CYANOM', 'HBDEKNPS', 'KLFA', 'LBNEQR_E', 'LBNEQR_G', 'MAH', 'MANEQR_E', 'MBDEKNSK', 'MBDEKNSK_PKT', 'MBNEQR_G', 'MBNEQR_MID', 'MFNEQR_E', 'MHH', 'MSMDI2', 'MSMDI3', 'N-NH3', 'N-NH4', 'N-NO2', 'N-NO3', 'N-SNOX', 'N-TOT', 'NQI1', 'NQI1_MID', 'O2', 'P-ORTO', 'P-PART', 'P-PO4', 'P-TOT', 'PIT', 'PON', 'POP', 'PPBIOMTOVO', 'PPNEQR_E', 'PPTI', 'RSL4', 'RSLA1', 'RSLA2', 'RSLA3', 'SI-SIO2', 'SIO2', 'STS', 'TIANTL', 'TOC', 'TSM']


In [9]:
# Plot stations
stn_df = df[["WaterLocationCode", "UTM33East", "UTM33North"]].drop_duplicates()
stn_df["utm_zone"] = 33
stn_df = nivapy.spatial.utm_to_wgs84_dd(
    stn_df, zone="utm_zone", east="UTM33East", north="UTM33North"
)
nivapy.spatial.quickmap(
    stn_df,
    lat_col="lat",
    lon_col="lon",
    cluster=True,
    popup="WaterLocationCode",
)