In [1]:
import time
import warnings

import altair as alt
import folium
import nivapy3 as nivapy
import pandas as pd
from altair import Chart
from folium import IFrame
from folium.plugins import MarkerCluster
from IPython.display import display

warnings.simplefilter(action="ignore", category=FutureWarning)

# Get $O_2$ data from Vannmiljø

Download all O2 data for Oslofjorden from Vannmiljø. See e-mails from Anette and Elianne on 31.03.2025.

In [2]:
# Vassdragsområder list. 001 to 017 for the whole Oslofjord, as defined by MDir
vassoms = [f"{id:03d}" for id in range(1, 18)]

# Parameters of interest
pars = ["O2"]

In [3]:
def query_api_with_retries(data, retries=3, delay=5):
    for attempt in range(retries):
        try:
            df = nivapy.da.post_data_to_vannmiljo("GetRegistrations", data=data)
            return df
        except KeyError as e:
            # print(f"  Attempt {attempt + 1} failed: {e}")
            if attempt < retries - 1:
                time.sleep(delay)
            else:
                print("  All retry attempts failed.")
                return pd.DataFrame()

In [4]:
# Get all stations around Oslofjord
data = {"VassdragsomradeIDFilter": vassoms}
stn_df = nivapy.da.post_data_to_vannmiljo("GetWaterLocations", data=data)

In [5]:
%%time
# Query API
df_list = []
for vassom in vassoms:
    print("Processing:", vassom)
    filter_dict = {
        "VassdragsomradeIDFilter": [vassom],
        "FromDateSamplingTime": f"1900-01-01",
        "ToDateSamplingTime": f"2025-12-31",
        "ParameterIDFilter": pars,
        "FromRegDate": "1900-01-01",
    }
    df = query_api_with_retries(data=filter_dict, retries=3, delay=5)
    if len(df) > 0:
        df_list.append(df)
    else:
        print("  No data available.")
df = pd.concat(df_list, axis="rows")

# Tidy
df["SamplingTime"] = pd.to_datetime(df["SamplingTime"])

# Save
res_xlsx = r"../data/o2_data/o2_data_vassoms_001-017.xlsx"
df.to_excel(res_xlsx, index=False)

Processing: 001
Processing: 002
Processing: 003
Processing: 004
Processing: 005
Processing: 006
Processing: 007
Processing: 008
Processing: 009
Processing: 010
Processing: 011
Processing: 012
Processing: 013
Processing: 014
Processing: 015
Processing: 016
Processing: 017
CPU times: user 4min 26s, sys: 6.49 s, total: 4min 32s
Wall time: 6min 25s


In [6]:
# Filter stations to just those with O2 data
o2_stn_list = df["WaterLocationCode"].unique().tolist()
stn_df = stn_df.query("WaterLocationCode in @o2_stn_list")
stn_xlsx = r"../data/o2_data/o2_stns_vassoms_001-017.xlsx"
stn_df.to_excel(stn_xlsx, index=False)

In [7]:
avg_df = (
    df[["WaterLocationCode", "SamplingTime", "Unit", "RegValue"]]
    .groupby(["WaterLocationCode", "SamplingTime", "Unit"])
    .mean()
    .reset_index()
)
avg_stn_df = stn_df[["WaterLocationCode", "CoordY_dg", "CoordX_dg"]].copy()

In [8]:
def create_chart(data):
    code = data["WaterLocationCode"].iloc[0]
    unit = data["Unit"].iloc[0]
    line = (
        alt.Chart(data)
        .mark_line()
        .encode(
            x="SamplingTime:T",
            y=alt.Y("RegValue:Q", title=f"O2 ({unit})"),
            tooltip=["SamplingTime:T", "RegValue:Q"],
        )
    )
    points = line.mark_circle()
    chart = line + points
    chart = chart.properties(title=code, width=300, height=200)
    return chart

In [9]:
# Map
m = folium.Map(
    location=[avg_stn_df["CoordY_dg"].mean(), avg_stn_df["CoordX_dg"].mean()],
    zoom_start=6,
)
marker_cluster = MarkerCluster().add_to(m)

# Add markers
for idx, row in avg_stn_df.iterrows():
    location_code = row["WaterLocationCode"]
    lat = row["CoordY_dg"]
    lon = row["CoordX_dg"]

    # Create chart
    location_data = avg_df[avg_df["WaterLocationCode"] == location_code]
    chart = create_chart(location_data)

    # Convert to HTML
    html = chart.to_html()
    iframe = IFrame(html, width=400, height=300)
    popup = folium.Popup(iframe, max_width=400)

    # Add to marker cluster
    folium.Marker(location=[lat, lon], popup=popup).add_to(marker_cluster)

m

In [10]:
# Plot number of samples per year
avg_df["Year"] = avg_df["SamplingTime"].dt.year
yearly_counts = avg_df.groupby("Year").size().reset_index(name="Count")

bar_chart = (
    alt.Chart(yearly_counts)
    .mark_bar()
    .encode(x="Year:O", y="Count:Q")
    .properties(title="Number of values per Year", width=600, height=400)
)

bar_chart