In [1]:
import os
import numpy as np
import nivapy3 as nivapy
import pandas as pd
import xarray as xr

# Download daily river discharge data from NVE

Stations of interest for AquaInfra use case:

* **Numedalslågen**: Holmfoss i Numedalslågen, 15.61.0, from 1970. About 15 km upstream of land-sea outflow.
* **Drammenselva**
    - Mjøndalen bru, 12.534.0, from 2004. Most D/S station
    - Døvikfoss, 12.285.0, from 1912. About 25 km U/S of Mjøndalen bru, no huge tribs between the two. Just downstream of Tyrifjorden and Gravfoss Kraftverk
* **Glomma**
    - Solbergfoss, 2.605.0, from 1964. Just downstream of Øyeren. Was the most downstream station for a long time. Estimated rather than measured? «Data ved Solbergfoss beregnes ut fra kraftverksproduksjon og tapping i luker»
    - Sarpsborg, 2.489.0, from 2009, Most D/S station. Not in Sildre though, only in NVE Atlas. Lets see if we can get it. Otherwise go for Solbergfoss. **Update: Not available in HydAPI, so drop for now.**

## 0. User inputs

In [2]:
# Dictionary of stations we want per river of interest
stn_id_name_dict = {
    "Numedalslågen": {"15.61.0": "Holmfoss"},

    "Drammenselva": {"12.534.0": "Mjøndalen bru",
                     "12.285.0": "Døvikfoss"},

    "Glomma": {"2.605.0": "Solbergfoss",
               "2.489.0": "Sarpsborg"}
}

# Parameters to download
par_ids = [1001]  # Daily mean discharge

# Start and end dates for download
st_dt = "1900-01-01"
end_dt = "2024-08-28"

# Station coords 
station_coords = {
    "15.61.0": {"latitude": 59.18906, "longitude": 9.99414}, 
    "12.534.0": {"latitude": 59.75292, "longitude": 10.00727}, 
    "12.285.0": {"latitude": 59.88624, "longitude": 9.90977},
    "2.605.0": {"latitude": 59.63733, "longitude": 11.15354},
}

# Metadata
global_metadata_config = {
    "naming_authority": "no.nve",
    "project": "AquaINFRA",
    "iso_topic_category": "inlandWaters",
    "featureType": "timeSeries",
    "spatial_representation": "point",
    "creator_type": "institution",
    "creator_institution": "Norwegian Water Resources and Energy Directorate (NVE)",
    "creator_name": "Norwegian Water Resources and Energy Directorate (NVE)",
    "institution": "Norwegian Water Resources and Energy Directorate (NVE)",
    "institution_short_name": "NVE",
    "creator_email": "NVE",
    "creator_email": "hydrology@nve.no",
    "creator_url": "https://www.nve.no",
    "data_owner": "Norwegian Water Resources and Energy Directorate (NVE)",
    "source": "NVE HydAPI (https://hydapi.nve.no/)",
    "processing_level": "Raw retrieved from NVE HydAPI",
    "Conventions": "CF-1.7, ACDD-1.3",
    "publisher_name": "Norwegian Water Resources and Energy Directorate (NVE)",
    "publisher_email": "hydrology@nve.no",
    "publisher_institution": "Norwegian Water Resources and Energy Directorate (NVE)",
    "publisher_url": "https://www.nve.no",
    "license": "https://data.norge.no/nlod/en",
    "license_comment": (
         "Licensed under the Norwegian License for Open Government Data (NLOD), "
         "compatible with CC BY 3.0 Norge. Data is provided as-is from NVE HydAPI. "
         "NVE assumes no responsibility for errors. Cite NVE HydAPI as the source."
    ),
    "keywords": "GCMD:EARTH SCIENCE > HYDROLOGY > STREAMFLOW, GCMDLOC:CONTINENT > EUROPE > NORWAY",
    "keywords_vocabulary": "GCMD:GCMD Science Keywords, GCMDLOC:GCMD Locations",
    "history": (
        "Data retrieved using NVE HydAPI (https://hydapi.nve.no/) using 1440-minute resolution. "
        "No modifications applied beyond reshaping to NetCDF. Correction and quality flags follow NVE's documentation."
    ),
    "correction_quality_notes": (
        "See https://hydapi.nve.no/UserDocumentation/ for full details.\n\n"
        "Quality types:\n"
        "  0: Unknown\n"
        "  1: Uncontrolled\n"
        "  2: PrimaryControlled\n"
        "  3: SecondaryControlled\n\n"
        "Correction types:\n"
        "  0: No changes\n"
        "  1: Manual- or ice correction\n"
        "  2: Interpolation\n"
        "  3: Model/series-based estimate\n"
        "  4: Daily mean by arithmetic mean\n"
        " 13: Based on nearby/similar station\n"
        " 14: Statistically estimated missing value"
    )
}

# Where to save data
download_date = "2024-08-28"  # (for file naming)
out_folder = r'../../data/river/discharge/raw'
metadata_folder = r'../../data/river/discharge'

## Check whether stations are available in HydAPI & save metadata for stations with data

In [3]:
# List all stations
stn_df = nivapy.da.get_nve_hydapi_stations()
print(len(stn_df), "stations available in HydAPI.")

# Check whether desired stations are available
station_id_li = [stn_id for river in stn_id_name_dict.values() for stn_id in river.keys()]
missing_station_ids = []

for stn_id in station_id_li:
    # Check if the station ID is in the DataFrame's 'station_id' column
    if stn_id not in stn_df['station_id'].values:
        missing_station_ids.append(stn_id)

print(f"Desired stations missing from HydAPI: {missing_station_ids}")

# Drop missing stations from the dictionary of stations we want to download data for
for river, stations in stn_id_name_dict.items():
    for stn_id in list(stations.keys()):  # Use list() to avoid modifying the dictionary while iterating
        if stn_id in missing_station_ids:
            del stations[stn_id]

# Drop missing stations from station_id_li
station_id_li = [stn_id for stn_id in station_id_li if stn_id not in missing_station_ids]

print("New river & station_id dictionary:")
print(stn_id_name_dict)

# Extract metadata for these stations & save to csv
select_stn_df = stn_df[stn_df['station_id'].isin(station_id_li)]
fpath = os.path.join(metadata_folder, "discharge_stations_metadata.csv")
select_stn_df.to_csv(fpath)

1870 stations available in HydAPI.
Desired stations missing from HydAPI: ['2.489.0']
New river & station_id dictionary:
{'Numedalslågen': {'15.61.0': 'Holmfoss'}, 'Drammenselva': {'12.534.0': 'Mjøndalen bru', '12.285.0': 'Døvikfoss'}, 'Glomma': {'2.605.0': 'Solbergfoss'}}


## Download data

See here for an interpretation of the correction and quality integer values in the data: https://hydapi.nve.no/UserDocumentation/

In [4]:
for river, station_dict in stn_id_name_dict.items():
    for station_id, station_name in station_dict.items():
        df = nivapy.da.query_nve_hydapi([station_id], par_ids, st_dt, end_dt, resolution=1440)
        df.set_index("datetime", inplace=True)
        df.index = pd.to_datetime(df.index)  

        # File names
        fname_csv = f"Q_daily-mean_{river}_{station_name}_{station_id}_download-{download_date}.csv"
        fname_nc = f"Q_daily-mean_{river}_{station_name}_{station_id}_download-{download_date}.nc"
        

        # Rename and reduce to necessary columns
        df_netcdf = df[["value", "correction", "quality"]].copy()
        df_netcdf.rename(columns={"value": "discharge"}, inplace=True)

        # Convert to xarray dataset
        ds = df_netcdf.to_xarray()
        ds = ds.rename({"datetime": "time"})
        ds["time"] = pd.to_datetime(ds["time"].values).astype("datetime64[ns]") 
        
        ds["time"].attrs.update({
            "standard_name": "time",
            "long_name": "Date of observation",
            "axis": "T"
        })

        # Get coordinates
        coords = station_coords.get(station_id, {"latitude": np.nan, "longitude": np.nan})
        lat = float(coords["latitude"])
        lon = float(coords["longitude"])

        # Add scalar coordinate variables
        ds = ds.assign_coords(
            latitude=xr.DataArray(lat, dims=(), attrs={
                "standard_name": "latitude", "long_name": "Latitude", "units": "degrees_north"
            }),
            longitude=xr.DataArray(lon, dims=(), attrs={
                "standard_name": "longitude", "long_name": "Longitude", "units": "degrees_east"
            }),
        )
        ds = ds.set_coords(["latitude", "longitude"])

        # Add scalar station info
        ds["station_id"] = xr.DataArray(station_id, dims=())
        ds["station_name"] = xr.DataArray(station_name, dims=(), attrs={"cf_role": "timeseries_id"})

        # Add attributes to discharge
        ds["discharge"].attrs.update({
            "units": df["unit"].iloc[0],
            "long_name": "Daily mean river discharge",
            "standard_name": "discharge"
        })

        # Global metadata
        dataset_metadata = global_metadata_config.copy()
        dataset_metadata.update({
            "title": f"Discharge at {station_name}",
            "summary": f"Daily mean discharge time series from NVE HydAPI at station {station_name} ({station_id}) on river {river}.",
            "method": str(df["method"].iloc[0]),
            "parameter_id": int(df["parameter"].iloc[0]),
            "parameter_name_no": str(df["parameter_name"].iloc[0]),
            "parameter_name_eng": str(df["parameter_name_eng"].iloc[0]),
            "geospatial_lat_min": lat,
            "geospatial_lat_max": lat,
            "geospatial_lon_min": lon,
            "geospatial_lon_max": lon,
            "time_coverage_start": str(df.index.min().date()),
            "time_coverage_end": str(df.index.max().date()),
            "download_date": download_date,
        })
        
        ds.attrs.update(dataset_metadata)

        # Save files
        df.to_csv(os.path.join(out_folder, fname_csv))
        ds.to_netcdf(
            os.path.join(out_folder, fname_nc),
            encoding={
                "time": {
                    "dtype": "int32",
                    "units": "days since 1970-01-01",
                    "calendar": "standard"
                }
            }
        )

        print(f"Downloaded & saved data for {station_name}, {station_id}")

  ds.to_netcdf(


Downloaded & saved data for Holmfoss, 15.61.0


  ds.to_netcdf(


Downloaded & saved data for Mjøndalen bru, 12.534.0


  ds.to_netcdf(


Downloaded & saved data for Døvikfoss, 12.285.0
Downloaded & saved data for Solbergfoss, 2.605.0


  ds.to_netcdf(
