In [1]:
import aquamonitor as am
import labware as lw
import matplotlib.pyplot as plt
import pandas as pd

plt.style.use("ggplot")

# Elveovervåkingsprogrammet quality control
# Notebook 01: Compare Aquamonitor and Labware

This initial notebook queries data for the previous (2017 - 2020) Elveovervåkingsprogrammet from both Labware and Aquamonitor. Values are then compared to ensure results can be matched consistently between the two databases (and that my code is converting units etc. correctly).

In [2]:
# Login to am
am_token = am.login()

Please enter your credentials.


Username:  ···
Password:  ··············


In [3]:
# Login to lw
am.aqua_site = "admin"
lw_token = am.login()

# Reset am to 'AquaServices'
am.aqua_site = "AquaServices"

Please enter your credentials.


Username:  ···
Password:  ··············


## 1. Get historic data from Aquamonitor

### 1.1. Find relevant project IDs

In [4]:
# Get all projects
proj_df = am.get_projects(token=am_token)

# Find the project code for Elveovervåkingsprogrammet
with pd.option_context("display.max_colwidth", None):
    display(proj_df.query("project_name.str.contains('lveoverv')", engine="python"))

Unnamed: 0,project_id,project_code,project_name,description
1000,10306,16384-3,Elveovervåkingsprogrammet,Etterfølgeren til RID. Overvåking av tilførsler fra elver til hav. Grunnprogrammet
1040,11126,16384-4,Elveoverv opsj3 2017,Opsjon 3 vannkjemisk overvåking utv elver i 2017
1088,11927,16384-4,Elveoverv opsj3 2018,Opsjon 3 vannkjemisk overvåking utv elver i 2018
1149,12370,16384-19,Elveoverv_Flomprøver,Flomprøver på Elveoverv
1162,12388,,Elveovervåkingen 2017,
1169,12399,16384,Elveovervåkningen Miljøgifter 2018,Miljøgifter fisk Elveovervåkningen 2018
1171,12402,"16384-4,190018",Elveoverv opsj3 2019,Opsjon 3 vannkjemisk overvåking utv elver i 2019
1199,12432,16384,Elveovervåkningen miljøgifter 2019-2020,"Alna, Miljøgifter"
1236,12475,,Elveovervåking biologi 2018,
1237,12476,,Elveovervåkingen 2018,Alle stasjoner for Elveovervåkingen 2018


Based on the above table:

 * Elveovervåkingsprogrammet 2017 - 2020 is project ID 10306 (16384-3)
 * Elveovervåkingsprogrammet 2021 - 2025 is project ID 12564 (200310-3) 

In [5]:
# Define historic project and period of interest
his_proj_id = 10306
st_dt = "01.01.2017"
end_dt = "31.12.2019"

### 1.2. Get historic stations

In [6]:
# Get stations for project
his_stn_df = am.get_project_stations(his_proj_id, token=am_token)
print(f"{len(his_stn_df)} stations in project.")
his_stn_df.head()

22 stations in project.


Unnamed: 0,project_id,station_id,station_code,station_name,type
0,10306,40352,BUSEDRA,Drammenselva,Elv
1,10306,40353,TELESKI,Skienselva,Elv
2,10306,40354,VAGEOTR,Otra,Elv
3,10306,40355,VESENUM,Numedalslågen,Elv
4,10306,40356,ØSTEGLO,Glomma ved Sarpsfoss,Elv


### 1.3. Get historic chemistry data

In [7]:
# Get historic water chemsitry
am_df = am.get_project_chemistry(his_proj_id, st_dt, end_dt, token=am_token)
am_df.to_csv(r"../data/rid_am_historic_data.csv", index=False)

In [8]:
# Read previously saved data for speed
am_df = pd.read_csv(r"../data/rid_am_historic_data.csv")
am_df["sample_date"] = pd.to_datetime(am_df["sample_date"]).dt.tz_localize(None)
am_df.rename({"parameter_name": "parameter"}, axis="columns", inplace=True)

am_df.head()

Unnamed: 0,project_id,project_name,station_id,station_code,station_name,sample_date,depth1,depth2,parameter,flag,value,unit
0,10306,Elveovervåkingsprogrammet,40352,BUSEDRA,Drammenselva,2017-01-23 12:00:00,0.0,0.0,Ca,,4.0,mg/L
1,10306,Elveovervåkingsprogrammet,40352,BUSEDRA,Drammenselva,2017-01-23 12:00:00,0.0,0.0,DOC,,2.7,mg/L C
2,10306,Elveovervåkingsprogrammet,40352,BUSEDRA,Drammenselva,2017-01-23 12:00:00,0.0,0.0,Konduktivitet,,3.5,mS/m
3,10306,Elveovervåkingsprogrammet,40352,BUSEDRA,Drammenselva,2017-01-23 12:00:00,0.0,0.0,Mg,,0.6,mg/L
4,10306,Elveovervåkingsprogrammet,40352,BUSEDRA,Drammenselva,2017-01-23 12:00:00,0.0,0.0,NH4-N,,8.0,µg/l


## 2. Get historic data from Labware

### 2.1. Get Labware projects associated with "main" project

In [9]:
# Define future project and year of interest
fut_proj_id = "16384;3"
fut_yr = 2020

In [10]:
# Get Labware projects
proj_df = lw.get_labware_projects(lw_token, fut_proj_id)
proj_df.head()

Unnamed: 0,name,status,closed
0,507-9322,V,True
1,507-9366,V,True
2,507-9199,V,True
3,507-8436,V,True
4,507-8437,V,True


### 2.2. Get all samples for Labware projects 

In [None]:
# Get Labware samples
samp_df = lw.get_labware_project_samples(lw_token, proj_df["name"])
samp_df.head()

### 2.3. Get results for Labware samples

In [None]:
# Get results for Labware samples
res_df = lw.get_labware_sample_results(lw_token, samp_df["sampleNumber"])
res_df.head()

### 2.4. Tidy Labware data

In [None]:
# Tidy
samp_df2 = samp_df[
    [
        "sampleNumber",
        "station_id",
        "station_name",
        "station_type",
        "sampledDate",
        "sampleDepthUpper",
        "sampleDepthLower",
    ]
]

samp_df2.columns = [
    "sample_id",
    "station_id",
    "station_name",
    "station_type",
    "sample_date",
    "depth1",
    "depth2",
]

res_df["test.anaFraction"].replace(
    {
        None: "",
        "Partikulært": "-part",
        "Filtrert": "-filt",
    },
    inplace=True,
)
res_df["name"] = res_df["name"] + res_df["test.anaFraction"]
res_df2 = res_df[["sample_id", "name", "status", "loq", "numericEntry", "units"]]
res_df2.columns = ["sample_id", "parameter", "status", "loq", "value", "units"]

# Join
lw_df = pd.merge(res_df2, samp_df2, how="left", on="sample_id")

# Add verbose status codes
res_status = pd.read_csv("../data/labware_result_status_codes.csv", sep=";")
lw_df = pd.merge(lw_df, res_status, how="left", on="status")
del lw_df["status"]
lw_df.rename({"description": "status"}, axis="columns", inplace=True)

# Tidy
lw_df = lw_df[
    [
        "sample_id",
        "station_id",
        "station_name",
        "station_type",
        "sample_date",
        "depth1",
        "depth2",
        "parameter",
        "status",
        "loq",
        "value",
        "units",
    ]
]

# # Get only surface samples
# lw_df = lw_df.query("(depth1==0) and (depth2==0)")
# del lw_df["depth1"], lw_df["depth2"]

# Drop duplicates
lw_df.drop_duplicates(inplace=True)

# Remove strange results where station ID is NaN and sample date is '0001-01-01'
lw_df = lw_df.query("station_id == station_id")

lw_df["sample_date"] = pd.to_datetime(lw_df["sample_date"])

# Save
lw_df.to_csv(r"../data/rid_labware_historic_data.csv", index=False)

In [None]:
# Read previously saved data for speed
lw_df = pd.read_csv(r"../data/rid_labware_historic_data.csv")
lw_df["sample_date"] = pd.to_datetime(lw_df["sample_date"])

# # Get just data for the period of interest
# lw_df = lw_df[lw_df["sample_date"].dt.year == fut_yr]

lw_df.head()

## 3. Link AM and Labware datasets

The file `../data/lw_am_par_map.csv` was generated using the following SQL in Nivabasen

    SELECT a.name         AS lw_meth,
      a.unit              AS lw_unit,
      b.name              AS am_par,
      b.unit              AS am_unit,
      c.conversion_factor AS lw2am_fac
    FROM NIVADATABASE.METHOD_DEFINITIONS a,
      NIVADATABASE.WC_PARAMETER_DEFINITIONS b,
      NIVADATABASE.WC_PARAMETERS_METHODS c
    WHERE a.method_id  = c.METHOD_ID
    AND c.PARAMETER_ID = b.PARAMETER_ID
    ORDER BY a.name;

In [None]:
# Lookup matching Labware and AM pars
par_map = pd.read_csv(r"../data/lw_am_par_map.csv", sep=";", decimal=",")
par_map.head()

In [None]:
# Join AM par names
lw_df = pd.merge(
    lw_df,
    par_map,
    left_on=["parameter", "units"],
    right_on=["lw_meth", "lw_unit"],
    how="left",
)

# Convert units
lw_df["value"] = lw_df["value"] * lw_df["lw2am_fac"]

cols = [
    "sample_id",
    "station_id",
    "station_name",
    "station_type",
    "sample_date",
    "depth1",
    "depth2",
    "am_par",
    "status",
    "value",
    "am_unit",
]
lw_df = lw_df[cols]

# Drop NaNs and duplicates
lw_df = lw_df.query("am_par == am_par")
# lw_df.drop_duplicates(subset=cols, inplace=True)

# # Get just 'authorised' samples
lw_df = lw_df.query("status == 'Authorised'")

# Tidy
lw_df.rename(
    {
        "am_par": "parameter",
        "am_unit": "unit",
    },
    inplace=True,
    axis="columns",
)
lw_df = lw_df[
    ["station_id", "sample_date", "depth1", "depth2", "status", "parameter", "value"]
]

# Standardise dates
am_df["sample_date"] = am_df["sample_date"].dt.date
lw_df["sample_date"] = lw_df["sample_date"].dt.date

# Join
df = pd.merge(
    am_df,
    lw_df,
    how="inner",
    on=["station_id", "sample_date", "depth1", "depth2", "parameter"],
    suffixes=["_am", "_lw"],
)

## 4. Compare values

In [None]:
# Plot
fig = plt.figure(figsize=(10, 10))
plt.plot(df["value_am"], df["value_lw"], "ro")
plt.plot(df["value_am"], df["value_am"], "k-")
plt.xlabel("Value in Aquamonitor")
plt.ylabel("Value in Labware")