In [1]:
import folium
import geopandas as gpd
import nivapy3 as nivapy
import pandas as pd
from folium.plugins import MarkerCluster

# Numedalslågen VK

## Notebook 01: Get data from Vannmiljø and Vann-Nett

Initial data search for the Numedalslågen VK project ("Trendanalyse og evaluering av vannovervåking i Numedalslågen").

## 1. User options

We just want data for Numedalslågen upstream of the outflow in Larvik. The code below first gets all data for vassdragsområde 015, then calculates the catchment boundary upstream of the outflow and filters results to only include sites within this watershed.

In [2]:
# Vassdragsområde of interest
vassom = "015"

# Outlet co-ords for Numedalslågen at RV303 bridge in Larvik (EPSG 25833)
outlet_x, outlet_y = 216986, 6555370

## 2. Get data from Vannmiljø

For the whole of vassdragsområde 015.

In [3]:
# Get all data for vassom of interest
data = {
    "FromRegDate": "1900-01-01",
    "VassdragsomradeIDFilter": [vassom],
}
wc_df = nivapy.da.post_data_to_vannmiljo("GetRegistrations", data=data)

# Tidy to cols of interest
names_dict = {
    "WaterLocationID": "station_id",
    "WaterLocationCode": "station_code",
    "Name": "station_name",
    "WaterCategory": "category",
    "CoordX": "utm33_east",
    "CoordY": "utm33_north",
    "FylkeID": "fylke_id",
    "Fylke": "fylke_name",
    "KommuneID": "kommune_id",
    "Kommune": "kommune_name",
    "VassdragsomradeID": "vassom_id",
    "Vassdragsomrade": "vassom_name",
    "VannomradeID": "vannom_id",
    "Vannomrade": "vannom_name",
    "VannregionID": "vannreg_id",
    "Vannregion": "vannreg_name",
    "WaterBodyID": "waterbody_id",
    "WaterBody": "waterbody_name",
    "FeatureType": "feature_type",
    "ActivityID": "activity_id",
    "ActivityName": "activity_name",
    "Employer": "employer",
    "Contractor": "contractor",
    "MediumName": "medium_name",
    "SamplingTime": "sample_date",
    "UpperDepth": "upper_depth",
    "LowerDepth": "lower_depth",
    "FilteredSample": "filtered",
    "ParameterID": "par_id",
    "ParameterName": "par_name",
    "ValueOperator": "flag",
    "RegValue": "value",
    "Unit": "unit",
    "DetectionLimit": "lod",
    "QuantificationLimit": "loq",
}
stn_cols = [
    "station_id",
    "station_code",
    "station_name",
    "feature_type",
    "category",
    "fylke_id",
    "fylke_name",
    "kommune_id",
    "kommune_name",
    "vassom_id",
    "vassom_name",
    "vannom_id",
    "vannom_name",
    "vannreg_id",
    "vannreg_name",
    "waterbody_id",
    "waterbody_name",
    "utm33_east",
    "utm33_north",
]
wc_df = wc_df[names_dict.keys()].rename(columns=names_dict)
wc_df["sample_date"] = pd.to_datetime(wc_df["sample_date"])
stn_df = wc_df[stn_cols].drop_duplicates(subset="station_code")
stn_gdf = gpd.GeoDataFrame(
    stn_df,
    geometry=gpd.points_from_xy(
        stn_df["utm33_east"], stn_df["utm33_north"], crs="epsg:25833"
    ),
)

## 3. Filter to Numedalslågen

The code below delineates the catchment upstream of the specified outlet co-ordinates and uses this to filter the data from Vannmiljø.

In [4]:
# Get catchment boundary upstream of outlet co-ords
outlet_df = pd.DataFrame({"site_id": 1, "east": outlet_x, "north": outlet_y}, index=[0])
cat_gdf = nivapy.spatial.derive_watershed_boundaries(
    outlet_df,
    id_col="site_id",
    xcol="east",
    ycol="north",
    crs="epsg:25833",
    min_size_km2=1000,
    dem_res_m=40,
    buffer_km=None,
    temp_fold=None,
    reproject=False,
)
cat_gdf["area_km2"] = cat_gdf.to_crs({"proj": "cea"})["geometry"].area / 1e6

# Filter stations to just those within catchment boundary
stn_gdf = gpd.sjoin(stn_gdf, cat_gdf, predicate="within", how="inner")[
    stn_cols + ["geometry"]
].reset_index(drop=True)
stn_df = stn_gdf.drop(columns="geometry")
stn_list = stn_gdf["station_id"].tolist()
wc_df = wc_df.query("station_id in @stn_list").reset_index(drop=True)

print(
    f"There are {len(stn_df)} stations with {len(wc_df)} water chemistry values within the catchment of interest."
)

# Prepare to convert to wide
wc_df_wide = wc_df.copy()
wc_df_wide["par_unit"] = wc_df_wide["par_id"] + "_" + wc_df_wide["unit"]
wc_df_wide = wc_df_wide.drop(
    columns=["par_id", "par_name", "flag", "unit", "lod", "loq"]
)
wc_df_wide["upper_depth"] = wc_df_wide["upper_depth"].fillna(0)
wc_df_wide["lower_depth"] = wc_df_wide["lower_depth"].fillna(0)
wc_df_wide = wc_df_wide.dropna(subset="value").fillna("none")
id_cols = [col for col in wc_df_wide.columns if col != "value"]

# Save duplicated values for checking
dup_df = wc_df_wide[wc_df_wide.duplicated(subset=id_cols, keep=False)].sort_values(
    id_cols
)
if len(dup_df) > 0:
    print(
        "The dataset contains duplicates. These will be saved for checking and then averaged."
    )
    dup_xlsx = r"../data/vannmiljo_duplicates.xlsx"
    dup_df.to_excel(dup_xlsx, index=False)

    # Average duplicates
    wc_df_wide = wc_df_wide.groupby(id_cols).mean().reset_index()

# Convert to wide
wc_df_wide = wc_df_wide.set_index(id_cols).unstack("par_unit")
wc_df_wide.columns = wc_df_wide.columns.get_level_values(1)
wc_df_wide = wc_df_wide.reset_index()

# Save
stn_xlsx = r"../data/vannmiljo_stations.xlsx"
stn_df.to_excel(stn_xlsx, index=False)

wc_xlsx_long = r"../data/vannmiljo_data_long.xlsx"
wc_df.to_excel(wc_xlsx_long, index=False)

wc_xlsx_wide = r"../data/vannmiljo_data_wide.xlsx"
wc_df_wide.to_excel(wc_xlsx_wide, index=False)

Connection successful.


Looping over vassdragsområder:   0%|          | 0/1 [00:00<?, ?it/s]

Looping over outlets in vassdragsområder 015:   0%|          | 0/1 [00:00<?, ?it/s]

There are 1945 stations with 210091 water chemistry values within the catchment of interest.
The dataset contains duplicates. These will be saved for checking and then averaged.


## 4. Map

In [5]:
# Convert UTM to WGS84
stn_df["zone"] = 33
stn_df = nivapy.spatial.utm_to_wgs84_dd(
    stn_df, zone="zone", east="utm33_east", north="utm33_north"
).dropna(subset=["lon", "lat"])

# Setup map
avg_lon = stn_df["lon"].mean()
avg_lat = stn_df["lat"].mean()
m = folium.Map(location=[avg_lat, avg_lon], zoom_start=4, tiles="OpenStreetMap")

# Add feature group for catchment boundary
cat_grp = folium.FeatureGroup(name="Catchment Boundary")
folium.GeoJson(
    cat_gdf.to_crs("epsg:4326"),
    style_function=lambda feature: {
        "fillColor": "none",
        "color": "red",
        "weight": 2,
        "fillOpacity": 0,
    },
).add_to(cat_grp)
cat_grp.add_to(m)

# Add feature group for stations
stn_grp = folium.FeatureGroup(name="Stations")
locs = list(zip(stn_df["lat"].values, stn_df["lon"].values))
popups = list(stn_df["station_code"].values)
marker_cluster = MarkerCluster(locations=locs, popups=popups)
stn_grp.add_child(marker_cluster)
stn_grp.add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Zoom to data
xmin, xmax = stn_df["lon"].min(), stn_df["lon"].max()
ymin, ymax = stn_df["lat"].min(), stn_df["lat"].max()
m.fit_bounds([[ymin, xmin], [ymax, xmax]])

m