In [None]:
%matplotlib inline
import calendar
import glob
import os

import matplotlib.pyplot as plt
import nivapy3 as nivapy
import numpy as np
import pandas as pd
import seaborn as sn

plt.style.use("ggplot")
api_key = "IoH2W0hZ7ka8icl51rn6zw=="

In [None]:
# Connect to db
engine = nivapy.da.connect()

# Update FELTFO discharge

This notebook uses the NVE API to update RESA with the latest discharge data for the FELTFO stations.

In [None]:
# Define parameters and time period of interest
nve_stn_ids = ["20.11.0", "12.188.0", "27.26.0", "19.96.0", "247.3.0", "111.5.0"]
par_ids = [1001]
year = 2022

## 1. GET NVE data

In [None]:
# List all stations
stn_df = nivapy.da.get_nve_hydapi_stations(api_key=api_key)
stn_df = stn_df.query("station_id in @nve_stn_ids")
stn_df

In [None]:
# Get discharge
st_dt = f"{year}-01-01"
end_dt = f"{year + 1}-01-01"
q_df = nivapy.da.query_nve_hydapi(
    nve_stn_ids, par_ids, st_dt, end_dt, resolution=1440, api_key=api_key
)
q_df.head()

In [None]:
# Check number of records as expected
days = 366 if calendar.isleap(year) else 365
assert len(q_df) == 6 * days, "Number of records is not as expected."

# Check quality control level
print("The following records have not completed quality control (i.e. 'quality' < 3)")
print("(see https://hydapi.nve.no/UserDocumentation/ for details):")
q_df.query("quality != 3")

## 2. Get corresponding discharge station codes from RESA

In [None]:
stn_id_dict = {}
for nve_id in nve_stn_ids:
    vnr1, vnr2, vnr3 = nve_id.split(".")

    # Get RESA2 station ID
    sql = (
        "SELECT dis_station_id FROM resa2.discharge_stations "
        "WHERE nve_serienummer LIKE '%s.%s%%'" % (vnr1, vnr2)
    )
    df = pd.read_sql(sql, engine)

    if len(df) > 1:
        print("More than one station found matching NVE ID %s.%s." % (vnr1, vnr2))
        print(df)

    elif len(df) == 0:
        print("NVE ID %s.%s not found." % (vnr1, vnr2))

    else:
        stn_id = df.iloc[0]["dis_station_id"]

        # Check whether data already exist for this year
        sql = (
            "SELECT count(*) FROM resa2.discharge_values "
            "WHERE dis_station_id = %s "
            "AND EXTRACT(YEAR FROM xdate) = %s " % (stn_id, year)
        )
        df = pd.read_sql(sql, engine)
        cnt = df.iloc[0, 0]

        if cnt > 0:
            print(
                "%s data already exist for NVE "
                "station %s.%s (RESA2 ID %s)." % (cnt, vnr1, vnr2, stn_id)
            )

    # Add useful results to dict for later
    stn_id_dict[nve_id] = stn_id

print("Finished checking.")
print("The NVE:RESA station code mapping is:")
stn_id_dict

## 3. Update discharge values

Once any issues above have been dealt with, the code below adds the new discharge data to the database.

In [None]:
assert pd.isna(q_df["value"]).sum() == 0, "q_df contains NaNs."

q_df["dis_station_id"] = q_df["station_id"].map(stn_id_dict)
q_df["xvalue"] = q_df["value"]
q_df["xdate"] = q_df["datetime"]
q_df["xcomment"] = np.nan
q_df = q_df[["dis_station_id", "xdate", "xvalue", "xcomment"]]
q_df.head()

In [None]:
# # Add new rows to database
# q_df.to_sql(
#     "discharge_values", con=engine, schema="resa2", if_exists="append", index=False
# )

## 4. Area-scaling for `FELTFO` stations

The `FELTFO` project involves 6 NVE stations, listed in the table below:

| RESA2 chem code | RESA2 discharge code | NVE code |
|:---------------:|---------------------:|-----------------------:|
| BIE01 | 1 | 20.11 |
| LAE01 | 2 | 12.188 |
| OVELV 19 23 | 3 | Area-scaled from 27.26 |
| STE01 | 21 | 19.96 |
| DALELV | 23 | Area-scaled from 247.3 |
| KAE01 | 527 | Area-scaled from 111.5 |

The first step is to use the code above to upload NVE data for these stations. Once this has been done, data for 3 of the stations can be calculated by area-scaling, and the scaled series are then added to the database as separate datasets.

**Note (added 13/09/2018):** There is some confusion regarding which discharge datasets to use for estimating flows at `'KAE01'`. The nearest NVE station is Naustaa (111.10), which has a similar drainage area to the chemistry monitoring station and a largely natural flow regime. However, there are data gaps in the Naustaa record, especially during late 2008 and early 2009. It appears that Tore therefore switched to using data from Toaa (111.5), which is further downstream (catchment area of 150 km2, compared to 25 km2) and more heavily regulated. Rather than simply area-scaling, Tore derived a regression equation relating flows at Toaa to those at `'KAE01'` (i.e. Naustaa), and this relationship has been used subsequently to estimate flows for `'KAE01'` for the period from 2009 to 2015 inclusive. In 2016, I estimated flows by area-scaling the data from Toaa, because this is the general approach applied by RESA2 and Tore's regression methodology was not documented.

I have now compared the various discharge records for 111.5, 111.10 and KAE01 - see `'compare_kae01_flows.ipynb'`. It seems to me that area-scaling from Toaa actually works better than the regression approach (see e-mail sent to Liv Bente 13/09/2018 for details). The simplest method for the future would probably be to switch back to using Naustaa. However, because of the potential for future data gaps, I propose that we keep area-scaling from Toaa, which seems to do a reasonable job.

In [None]:
# Dict mapping RESA IDs to the "parent" NVE station for scaling
# {resa_id : parent_resa_id}
scale_dict = {3: 32, 527: 526, 23: 25}

# List to store output
df_list = []

# Loop over stations
for stn_id in scale_dict.keys():
    # Get parent ID for scaling
    par_id = scale_dict[stn_id]

    # Get catchment areas
    # FELTFO station
    sql = (
        "SELECT area from resa2.discharge_stations "
        "WHERE dis_station_id = %s" % stn_id
    )
    df = pd.read_sql(sql, engine)
    stn_area = df.iloc[0, 0]

    # Parent station
    sql = (
        "SELECT area from resa2.discharge_stations "
        "WHERE dis_station_id = %s" % par_id
    )
    df = pd.read_sql(sql, engine)
    par_area = df.iloc[0, 0]

    # Conversion factor
    fac = stn_area / float(par_area)

    # Get data for parent for year of interest
    sql = (
        "SELECT * from resa2.discharge_values "
        "WHERE dis_station_id = %s "
        "AND EXTRACT(YEAR FROM xdate) = %s " % (par_id, year)
    )
    df = pd.read_sql(sql, engine)

    # Apply correction
    df["xvalue"] = df["xvalue"] * fac

    # Change station ID
    df["dis_station_id"] = stn_id

    # Append to output
    df_list.append(df)

# Stack data
df = pd.concat(df_list, axis=0)

df.head()

In [None]:
# # Add new rows to database
# df.to_sql(
#     "discharge_values", con=engine, schema="resa2", if_exists="append", index=False
# )

In [None]:
# # Resa stores daily values as YYYY-MM-DD 00:00:00
# # NVE data has YYYY-MM-DD 12:00:00. Remove HH part
# sql = (
#     "UPDATE resa2.discharge_values "
#     "SET xdate = TRUNC(xdate) "
#     "WHERE EXTRACT(YEAR FROM xdate) = %s "
#     "AND dis_station_id IN (1, 2, 3, 21, 23, 25, 32, 526, 527)" % year
# )

# res = engine.execute(sql)

## 5. Test series uploaded correctly

**Note:** You need to specify the ID for the *chemistry* station, not the `discharge_station_id`.

In [None]:
# Plot data for specified station
stn_id = 108
st_dt = f"{year}-01-01"
end_dt = f"{year + 1}-12-31"

q_df = nivapy.da.extract_resa_discharge(stn_id, st_dt, end_dt, engine, plot=True)