In [None]:
%matplotlib inline
import calendar
import configparser
import datetime
import os

import matplotlib.pyplot as plt
import nivapy3 as nivapy
import pandas as pd
import seaborn as sn

plt.style.use("ggplot")

# Get API key for HydAPI
api_key = nivapy.da.authenticate_nve_hydapi()

# Elveovervåkingsprogrammet - average monthly temperatures

**This notebook has not been run for 2023 as the data were already processed by manually Øyvind.**

This notebook calculates average monthly temperatures for the 20 main rivers in the 2021-22 monitoring programme. Data come from a variety of sources, so the workflow is a bit messy. See e-mail from Liv Bente received 31.08.2021 at 12.26 for more details. In addition, the spreadsheet here shows which data sources are usually used for which stations:

    K:\Prosjekter\Ferskvann\16384 Elveovervåkingsprogrammet\2019\4. Data\6. Vanntemperatur\Grunnprogrammet\2018_TempData_Overview_oppdat for 2019_28aug20.xlsx

**Note:** In this notebook I have attempted to switch to HydAPI instead of Hydra-II for the temperature data.

## 1. Get manual data from RESA2

The code below gets all the temperature data from RESA2 and calculates monthly averages.

In [None]:
# Connect to db
eng = nivapy.da.connect()

In [None]:
# Year of interest
year = 2023

In [None]:
# Find project
prj_df = nivapy.da.select_resa_projects(eng)
prj_df = prj_df[prj_df["project_name"].str.contains("lveovervåking", na=False)]
prj_df

In [None]:
# Get stations
stn_df = nivapy.da.select_resa_project_stations([4551], eng)
stn_df

In [None]:
# Not interested in TROEMÅL (it has now been replaced by TROEMÅL2)
stn_df = stn_df.query("station_id != 29848")

In [None]:
# Find ID for temp var
par_grid = nivapy.da.select_resa_station_parameters(
    stn_df, f"{year}-01-01", f"{year}-12-31", eng
)
par_grid.query('parameter_name == "Temp"')

In [None]:
# Get temp data
wc_df, dup_df = nivapy.da.select_resa_water_chemistry(
    stn_df,
    [125],
    f"{year}-01-01",
    f"{year}-12-31",
    eng,
)

wc_df.head()

In [None]:
# Print number of measurements in year for each site
print(wc_df[["station_id", "Temp_oC"]].groupby("station_id").count())

# Aggregate to monthly
wc_df["month"] = wc_df["sample_date"].dt.month
agg = wc_df[["station_id", "month", "Temp_oC"]].groupby(["station_id", "month"])
mon_df = agg.mean().reset_index()

In [None]:
# Plot monthly means
sn.catplot(
    x="month",
    y="Temp_oC",
    data=mon_df,
    col="station_id",
    col_wrap=4,
    kind="point",
    height=3,
)

# Save
out_path = f"/home/jovyan/shared/common/elveovervakingsprogrammet/temperature_data/temp_2022-23/other/temps_manual_{year}.png"
plt.savefig(out_path, dpi=300)

In [None]:
# Pivot to "wide" format
man_df = mon_df.pivot(index="station_id", columns="month", values="Temp_oC")

man_df

Based on Liv Bente's spreadsheet here

    K:\Prosjekter\Ferskvann\16384 Elveovervåkingsprogrammet\2019\4. Data\6. Vanntemperatur\Grunnprogrammet\2018_TempData_Overview_oppdat for 2019_28aug20.xlsx
    
we will use the data from RESA for 9 stations. Filter to just these.

In [None]:
# Filter to desired stations for RESA
resa_list = [29617, 36225, 29612, 29832, 29842, 29822, 29844, 29820, 29819]
man_df = man_df.query("station_id in @resa_list")

## 2. TinyTag data

TinyTags are deployed in 7 rivers and the sensors are swapped in the middle of the year, so we need to splice together two data files for each location. The TinyTags are often recording even when they're out of the water, so it's important to know the date when each logger was taken in/set out. Liv Bente stores details in Excel files on the Project Portalen [here](https://niva365.sharepoint.com/sites/projects1/4076/SitePages/home.aspx#nav=InvoDocumentsRecent) under the folder

    {year}/Data/Vanntemperatur/TinyTag
    
This files provide a rough guide, but it is often more obvious from the data exactly when changes have taken place.
    
**Note:** The date format in the text files sometimes changes (e.g from `%d.%m.%Y %H.%M.%S,%f` to `%d.%m.%Y %H:%M:%S,%f`), so it might be necessary to modify the code below. The raw files may also need converting to UTF-8.

In [None]:
# Dates from Liv Bente's spreadsheet
swap_dates = {
    29615: ["Numedalslagen", "2022-06-07 09:30"],
    29613: ["Skienselva", "2022-06-07 11:00"],
    29614: ["Otra", "2022-06-07 10:34"],
    29783: ["Orreelva", "2022-06-07 09:15"],
    29821: ["Vosso", "2022-06-07 12:25"],
    29782: ["Vefsna", "2022-09-02 11:00"],
    29779: ["Altaelva", "2022-09-30 09:32"],
}

# Base folder (with tidied file names)
data_fold = r"/home/jovyan/shared/common/elveovervakingsprogrammet/temperature_data"

# Define date and decimal format for the current and previous year
curr_dt_fmt = "%d.%m.%Y %H:%M:%S.%f"
prev_dt_fmt = "%d.%m.%Y %H:%M:%S.%f"
curr_dec_sep = "."
prev_dec_sep = "."

In [None]:
# Container for output
df_list = []

# Setup plot
fig, axes = plt.subplots(nrows=7, ncols=1, figsize=(10, 15), sharex=True)

# Loop over stations
for idx, stn_id in enumerate(swap_dates.keys()):
    # Get stn
    stn = swap_dates[stn_id][0]
    print(stn)

    # Get data paths
    path_prev_yr = os.path.join(
        data_fold,
        f"temp_{year - 1}-{year - 2000}/tiny_tag/{stn}_{year - 1}-{year - 2000}.txt",
    )
    path_yr = os.path.join(
        data_fold, f"temp_{year}-{year - 1999}/tiny_tag/{stn}_{year}-{year - 1999}.txt"
    )

    # Parse series 1
    df_prev_yr = pd.read_csv(
        path_prev_yr,
        delim_whitespace=True,
        skiprows=2,
        names=["date", "time", "temp"],
        decimal=prev_dec_sep,
    )
    df_prev_yr["datetime"] = df_prev_yr["date"] + " " + df_prev_yr["time"]
    df_prev_yr["datetime"] = pd.to_datetime(df_prev_yr["datetime"], format=prev_dt_fmt)
    df_prev_yr.set_index("datetime", inplace=True)
    del df_prev_yr["date"], df_prev_yr["time"]

    # Parse series 2
    df_yr = pd.read_csv(
        path_yr,
        delim_whitespace=True,
        skiprows=2,
        names=["date", "time", "temp"],
        decimal=curr_dec_sep,
    )
    df_yr["datetime"] = df_yr["date"] + " " + df_yr["time"]
    df_yr["datetime"] = pd.to_datetime(df_yr["datetime"], format=curr_dt_fmt)
    df_yr.set_index("datetime", inplace=True)
    del df_yr["date"], df_yr["time"]

    # Get date logger changed
    swap_dt = pd.to_datetime(swap_dates[stn_id][1])
    swap_dt_plus1 = swap_dt + pd.DateOffset(
        hours=3
    )  # Skip 3 hrs to allow time for re-equilibration

    # Truncate series 1
    df_prev_yr = df_prev_yr.truncate(before="%s-01-01 00:00" % year, after=swap_dt)

    # Truncate series 2
    df_yr = df_yr.truncate(before=swap_dt_plus1, after="%s-12-31 23:59" % year)

    # Combine
    df = pd.merge(df_prev_yr, df_yr, how="outer", left_index=True, right_index=True)

    # Plot
    df.plot(ax=axes[idx], legend=False)
    axes[idx].set_title(stn)
    axes[idx].set_xlim([datetime.date(year, 1, 1), datetime.date(year, 12, 31)])

    # Concat to single series
    df = pd.concat([df_prev_yr, df_yr], axis=0, sort=True)

    # Monthly avgs.
    df = df.resample("M").mean()

    # Add to output
    df["station_id"] = stn_id
    df_list.append(df)

plt.tight_layout()

In [None]:
# Combine results
df = pd.concat(df_list, axis=0, sort=True)
df.columns = ["station_id", "Temp_oC"]
df["month"] = df.index.month
df.reset_index(inplace=True)
tt_df = df.pivot(index="station_id", columns="month", values="Temp_oC")

tt_df

## 3. Other logger data

Other temperature data is also available for Målselv and Vegårdselva (= Storelva/Lundevann)

### 3.1. Målselva

In [None]:
# Read raw data
in_xlsx = f"/home/jovyan/shared/common/elveovervakingsprogrammet/temperature_data/temp_{year}-{year - 1999}/other/malselva_{year}.xlsx"
df = pd.read_excel(in_xlsx, sheet_name="Temp")
del df["StationName"]
df["Date"] = pd.to_datetime(df["Date"], format="%d.%m.%Y %H:%M:%S")
df.set_index("Date", inplace=True)

# Resample
df = df.resample("M").mean()

# Tidy
df.columns = ["Temp_oC"]
df["month"] = df.index.month
df.reset_index(inplace=True)
del df["Date"]
df["station_id"] = 38005

mal_df = df.pivot(index="station_id", columns="month", values="Temp_oC")

mal_df

### 3.2. Vegårdselva

In [None]:
# Read raw data
in_xlsx = f"/home/jovyan/shared/common/elveovervakingsprogrammet/temperature_data/temp_{year}-{year - 1999}/other/vegardselva_{year}.xlsx"
df = pd.read_excel(in_xlsx, sheet_name="Temp")
del df["StationName"]
df["Date"] = pd.to_datetime(df["Date"], format="%d.%m.%Y %H:%M:%S")
df.set_index("Date", inplace=True)

# Resample
df = df.resample("M").mean()

# Tidy
df.columns = ["Temp_oC"]
df["month"] = df.index.month
df.reset_index(inplace=True)
del df["Date"]
df["station_id"] = 30019

veg_df = df.pivot(index="station_id", columns="month", values="Temp_oC")

veg_df

## 4. NVE data

In 2019, NVE datasets were only required for Vikedalselva and Orkla.

**Note:** For Orkla, we usually use data from 121.62.0, as this site has a long temperature record. However, in 2022 the data from this location are not complete, so for this year I will use data from 121.22.0, which is just downstream. Similarly, data for Vikedalselva (`38.2.0`) are not complete for 2022, but the series from Holmen (`38.1.0`) looks OK and is only about 50 m upstream.

In [None]:
# Dict mapping NVE temp codes to RESA IDs
stn_id_dict = {
    #     "2.1087.0": 29617,
    #     "12.298.0": 29612,
    #     "15.115.0": 29615,
    #     "16.207.0": 29613,
    #     "21.79.0": 29614,
    #     "27.29.0": 29832,
    # "38.2.0": 29837,  # Vikedalselva utløp. Use this if possible
    "38.1.0": 29837,  # Holmen, about 50 m upstream of Vikedalselva utløp. Use this if problems with Vikedalselva utløp
    #     "62.30.0": 29821,
    #     "84.23.0": 29842,
    # "121.62.0": 29778,  # Orkla (with long time series, not good in 2020)
    "121.22.0": 29778,  # Orkla (with OK data for 2020, but nothing before 2014)
    #     "151.32.0": 29782,
    #     "246.11.0": 29819,
    # "212.11.0": 29779,  # Alta
}

# Get stations from HydAPI
nve_stn_df = nivapy.da.get_nve_hydapi_stations(api_key=api_key)
nve_stn_ids = stn_id_dict.keys()
nve_stn_df = nve_stn_df.query("station_id in @nve_stn_ids")
print(f"{len(nve_stn_df)} out of {len(nve_stn_ids)} stations found in HydAPI:")
nve_stn_df

In [None]:
# Get temperature
par_ids = [1003]
st_dt = f"{year}-01-01"
end_dt = f"{year + 1}-01-01"
nve_df = nivapy.da.query_nve_hydapi(
    nve_stn_ids, par_ids, st_dt, end_dt, resolution=1440, api_key=api_key
)
nve_df.head()

In [None]:
# Check number of records as expected
days = 366 if calendar.isleap(year) else 365
if len(nve_df) != len(nve_stn_df) * days:
    print("Number of records is not as expected.\n\n")

# Check quality control level
print("The following series have not completed quality control (i.e. 'quality' < 3;")
print("see https://hydapi.nve.no/UserDocumentation/ for details):\n")
print(nve_df.query("quality != 3")[["station_id", "station_name"]].drop_duplicates())

# Check for NaN
if pd.isna(nve_df["value"]).sum() > 0:
    print("\n\nThe following records contain NaN values:\n")
    print(
        nve_df[pd.isna(nve_df["value"])][
            ["station_id", "station_name"]
        ].drop_duplicates()
    )

In [None]:
# Plot
sn.relplot(
    x="datetime",
    y="value",
    row="station_name",
    data=nve_df,
    kind="line",
    aspect=3,
    height=2,
    facet_kws={"sharey": True, "sharex": True},
)

In [None]:
# Resample
df_list = []

for stn_cd in stn_id_dict.keys():
    df = nve_df.query("station_id == @stn_cd").copy()
    df = df.set_index("datetime")[["value"]]
    df = df.resample("M").mean()

    # Convert index to month
    df.index = df.index.month
    df.index.name = "month"

    # Change column to site id
    df.columns = [
        stn_id_dict[stn_cd],
    ]

    # Transpose and append
    df_list.append(df.T)

# Combine
nve_df = pd.concat(df_list, axis=0, sort=True)
nve_df.index.name = "station_id"
nve_df

## Combine

In [None]:
# Melt all and combine
# RESA
man_df2 = man_df.reset_index().melt(id_vars="station_id")
man_df2["source"] = "Manual"

# TinyTag
tt_df2 = tt_df.reset_index().melt(id_vars="station_id")
tt_df2["source"] = "TinyTag"

# Other
mal_df2 = mal_df.reset_index().melt(id_vars="station_id")
mal_df2["source"] = "Other"

veg_df2 = veg_df.reset_index().melt(id_vars="station_id")
veg_df2["source"] = "Other"

# NVE
nve_df2 = nve_df.reset_index().melt(id_vars="station_id")
nve_df2["source"] = "NVE"

# Combine
df = pd.concat([man_df2, tt_df2, mal_df2, veg_df2, nve_df2], axis=0, sort=True)

# Join stn codes
df = pd.merge(df, stn_df[["station_id", "station_code"]], how="left", on="station_id")
del df["station_id"]
df.columns = ["Month", "Source", "Temperature (C)", "Station code"]

df.head()

In [None]:
# Plot
sn.catplot(
    x="Month",
    y="Temperature (C)",
    data=df,
    # hue='Source',
    col="Station code",
    col_wrap=4,
    height=3,
    # linestyles=['--', '--', '--', '--'],
    kind="point",
)

# Save
out_path = f"/home/jovyan/shared/common/elveovervakingsprogrammet/temperature_data/temp_{year}-{year - 1999}/temps_all_sources_{year}.png"
plt.savefig(out_path, dpi=300)

## 5. Format for output

In [None]:
# Combine
df = pd.concat([man_df, tt_df, mal_df, veg_df, nve_df], axis=0, sort=True)
df.reset_index(inplace=True)

# Join station details
df = pd.merge(df, stn_df, how="left", on="station_id")

# Reorder
df = df[
    ["station_id", "station_code", "station_name", "latitude", "longitude", "altitude"]
    + list(range(1, 13))
]
del df["latitude"], df["longitude"], df["altitude"]

# Round values
df = df.round(2)

# Save output
out_csv = f"/home/jovyan/shared/common/elveovervakingsprogrammet/temperature_data/temp_{year}-{year - 1999}/monthly_avg_temps_{year}-{year - 1999}.csv"
df.to_csv(out_csv, encoding="utf-8", index=False)

df