# Packages Installation

In [1]:
# Let's install hakai-profile-qaqc package which should also install ioos_qc, hakai_api
#!pip install git+https://github.com/HakaiInstitute/hakai-profile-qaqc.git

In [2]:
import pandas as pd
import xarray as xr
import datetime as dt


from hakai_api import Client
from ioos_qc.config import QcConfig
import hakai_profile_qc
import ipywidgets as widgets
from ipywidgets import interact

!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


# Download station to be reviewed
This section download some CTD profile data from the Hakai Database, you can modify the variables station, start_time and end_time variables to refine the data you want to review.

In [3]:
# Get Hakai CTD Data Download through the API
station = "QU39"
start_time = "2019-01-01"
end_time = "2020-01-01"

variable_lists = hakai_profile_qc.get.hakai_api_selected_variables

# Let's just get the data from QU39
filterUrl = (
    "station="
    + station
    + "&(status!=MISCAST|status==null)&limit=-1"
    + "&measurement_dt>"
    + start_time
    + "&measurement_dt<"
    + end_time
    + "&fields="
    + ",".join(variable_lists)
)
df = hakai_profile_qc.get.hakai_ctd_data(filterUrl)
print(str(len(df)) + " records found")

# Regroup profiles and cast direction and sort them by pressure
group_variables = [
    "device_model",
    "device_sn",
    "ctd_file_pk",
    "ctd_cast_pk",
    "direction_flag",
]
df = df.sort_values(by=group_variables + ["pressure"])

# Just show the first few lines to have a look
df.head()  # Show the top of the data frame


Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast/data?station=QU39&(status!=MISCAST|status==null)&limit=-1&measurement_dt>2019-01-01&measurement_dt<2020-01-01&fields=ctd_file_pk,ctd_cast_pk,hakai_id,ctd_data_pk,filename,device_model,device_sn,vessel,operators,cast_comments,work_area,cruise,station,device_firmware,cast_number,latitude,longitude,start_dt,bottom_dt,end_dt,duration,start_depth,bottom_depth,direction_flag,measurement_dt,descent_rate,conductivity,conductivity_flag,temperature,temperature_flag,depth,depth_flag,pressure,pressure_flag,par,par_flag,flc,flc_flag,turbidity,turbidity_flag,ph,ph_flag,salinity,salinity_flag,spec_cond,spec_cond_flag,dissolved_oxygen_ml_l,dissolved_oxygen_ml_l_flag,rinko_do_ml_l,rinko_do_ml_l_flag,dissolved_oxygen_percent,dissolved_oxygen_percent_flag,oxygen_voltage,oxygen_voltage_flag,c_star_at,c_star_at_flag,sos_un,sos_un_flag,backscatter_beta,backscatter_beta_flag,cdom_ppb,cdom_ppb_flag
15914 records found


Unnamed: 0,ctd_file_pk,ctd_cast_pk,hakai_id,ctd_data_pk,filename,device_model,device_sn,vessel,operators,cast_comments,...,oxygen_voltage,oxygen_voltage_flag,c_star_at,c_star_at_flag,sos_un,sos_un_flag,backscatter_beta,backscatter_beta_flag,cdom_ppb,cdom_ppb_flag
5147,5505,13723,080217_2019-04-02T19:41:11.666Z,8614263,080217_20190402_1500,RBRmaestro,80217,HAKAI SPIRIT,"bryn.fedje,chris.mackenzie,emma.myers,eva.jord...",,...,,,,,,,,,,
5148,5505,13723,080217_2019-04-02T19:41:11.666Z,8614264,080217_20190402_1500,RBRmaestro,80217,HAKAI SPIRIT,"bryn.fedje,chris.mackenzie,emma.myers,eva.jord...",,...,,,,,,,,,,
5149,5505,13723,080217_2019-04-02T19:41:11.666Z,8614265,080217_20190402_1500,RBRmaestro,80217,HAKAI SPIRIT,"bryn.fedje,chris.mackenzie,emma.myers,eva.jord...",,...,,,,,,,,,,
5150,5505,13723,080217_2019-04-02T19:41:11.666Z,8614266,080217_20190402_1500,RBRmaestro,80217,HAKAI SPIRIT,"bryn.fedje,chris.mackenzie,emma.myers,eva.jord...",,...,,,,,,,,,,
5151,5505,13723,080217_2019-04-02T19:41:11.666Z,8614267,080217_20190402_1500,RBRmaestro,80217,HAKAI SPIRIT,"bryn.fedje,chris.mackenzie,emma.myers,eva.jord...",,...,,,,,,,,,,


# Review Profile
Let's have a look at the percentage of flags present for each tests.

In [4]:
# Get the list of parameters (variables associated with a *_flag column), ignore direction_flag
flag_columns = df.filter(like="_flag").columns.tolist()
var_columns = set([sub.replace("_flag", "") for sub in flag_columns]) - {"direction"}

# Temporary until the database has the right setup:
for var in var_columns:
    if var + "_qartod_flag" not in df.columns:
        df[var + "_qartod_flag"] = 2
    if var + "_flag_description" not in df.columns:
        df[var + "_flag_description"] = ""

# Retrive list of tests
test_list = sorted(df.filter(regex="_flag$|_test$").columns.tolist())
test_list.append("None")

# Run the interactive profile review tool
hakai_profile_qc.show.interactive_profile_viewer(df, var_columns, test_list)


HBox(children=(VBox(children=(Dropdown(description='Test to review', index=37, options=('backscatter_beta_flag…

Output()

Output()

# Define Standard Attributes


In [5]:
# Your coordinates
creator = {
    "creator_name": "Jessy Barrette",
    "creator_url": "https://hakai.org/",
    "creator_email": "jessy.barrette@hakai.org",
    "creator_institution": "Hakai Institute",
    "creator_country": "Canada",
}
# Hakai Coordinates
publisher = {
    "publisher_name": "Hakai Institute",
    "publisher_url": "https://hakai.org/",
    "publisher_email": "contact@hakai.org",
    "publisher_country": "Canada",
    "publisher_institution": "Hakai Institute",
}
# General Global Attributes related to this dataset
standard_global_attributes = {
    "institution": "Hakai Institute",
    "project": "Hakai Oceanography",
    "summary": "text describing that specific data",
    "comment": "",
    "infoUrl": "https://hakai.org/",
    "keywords": "conductivity,temperature,salinity,depth,pressure,dissolved oxygen",
    "acknowledgment": "Hakai Field Techniciens, research and IT groups",
    "naming_authority": "Hakai Instititute",
    "standard_name_vocabulary": "CF Standard Name Table v1.3",
    "license": "unknown",
    "geospatial_lat_units": "degrees_north",
    "geospatial_lon_units": "degrees_east",
}
# Standard Variable Attributes
standard_variable_attributes = {}

standard_global_attributes.update(creator)
standard_global_attributes.update(publisher)


# Generate a NetCDF File for the approuved profiles!

In [6]:
approuved_hakai_id = [
    "080217_2017-01-08T18:03:05.167Z",
    "080217_2017-01-26T16:56:39.000Z",
]

save_to = ""
for hakai_id in approuved_hakai_id:
    hakai_id_global_attributes = {
        "title": "Hakai Research CTD Profile: " + hakai_id,
        "id": hakai_id,
        "date_created": str(dt.datetime.utcnow().isoformat()),
    }
    global_attributes = standard_global_attributes
    global_attributes.update(hakai_id_global_attributes)
    global_attributes.update(creator)
    global_attributes.update(publisher)

    # Create the NetCDF Source file!
    hakai_profile_qc.research.generate_netcdf(
        hakai_id,
        save_to,
        extra_global_attributes=global_attributes,
        extra_variable_attributes=standard_variable_attributes,
    )

Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast/data?hakai_id={080217_2017-01-08T18:03:05.167Z}&(status!=MISCAST|status==null)&limit=-1
Add derived variables
Load Default Configuration
Retrieve data from: https://hecate.hakai.org/api/eims/views/output_sites?limit=-1
Flag Bad Values: ['.isna', -9.99e-29]


Apply QARTOD Tests to individual ['hakai_id', 'direction_flag']: 100%|██████████| 2/2 [00:00<00:00,  3.24profile/s]


DO Cap Detection to dissolved_oxygen_ml_l variable
DO Cap Detection to rinko_do_ml_l variable
Flag Missing Position Records
Flag Bottom Hit Data
Flag PAR Shadow Data
Apply flag results to latitude
Apply flag results to c_star_at
Apply flag results to conductivity
Apply flag results to depth
Apply flag results to dissolved_oxygen_ml_l
Apply flag results to dissolved_oxygen_percent
Apply flag results to flc
Apply flag results to par
Apply flag results to pressure
Apply flag results to rinko_do_ml_l
Apply flag results to salinity
Apply flag results to sigma0
Apply flag results to temperature
Apply flag results to turbidity
Apply Hakai Grey List
Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast?hakai_id=080217_2017-01-08T18:03:05.167Z&limit=-1
Retrieve data from: https://hecate.hakai.org/api/eims/views/output_sites?limit=-1
Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast/data?hakai_id={080217_2017-01-26T16:56:39.000Z}&(status!=MISCAST|status==null)&l

Apply QARTOD Tests to individual ['hakai_id', 'direction_flag']: 100%|██████████| 2/2 [00:00<00:00,  6.09profile/s]


DO Cap Detection to dissolved_oxygen_ml_l variable
DO Cap Detection to rinko_do_ml_l variable
Flag Missing Position Records
Flag Bottom Hit Data
Flag PAR Shadow Data
Apply flag results to latitude
Apply flag results to c_star_at
Apply flag results to conductivity
Apply flag results to depth
Apply flag results to dissolved_oxygen_ml_l
Apply flag results to dissolved_oxygen_percent
Apply flag results to flc
Apply flag results to par
Apply flag results to pressure
Apply flag results to rinko_do_ml_l
Apply flag results to salinity
Apply flag results to sigma0
Apply flag results to temperature
Apply flag results to turbidity
Apply Hakai Grey List
Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast?hakai_id=080217_2017-01-26T16:56:39.000Z&limit=-1
Retrieve data from: https://hecate.hakai.org/api/eims/views/output_sites?limit=-1


In [7]:
# Generate All QCed NetCDF files based on combination of QARTOD Automated flag and manual review from "ctd/views/file/cast/data".
output_path = 'reviewed_netcdf'
hakai_profile_qc.research.update_research_dataset_with_ctd_log(output_path)

Retrieve data from: https://hecate.hakai.org/api/eims/views/output/ctd_qc?limit=-1
Save 018032_2012-06-07T21:13:39.833Z
Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast/data?hakai_id={018032_2012-06-07T21:13:39.833Z}&(status!=MISCAST|status==null)&limit=-1
Add derived variables
Load Default Configuration
Retrieve data from: https://hecate.hakai.org/api/eims/views/output_sites?limit=-1
Flag Bad Values: ['.isna', -9.99e-29]


Apply QARTOD Tests to individual ['hakai_id', 'direction_flag']: 100%|██████████| 2/2 [00:00<00:00, 11.70profile/s]


DO Cap Detection to dissolved_oxygen_ml_l variable
DO Cap Detection to rinko_do_ml_l variable
Flag Missing Position Records
Flag Bottom Hit Data
Flag PAR Shadow Data
Apply flag results to latitude
Apply flag results to c_star_at
Apply flag results to conductivity
Apply flag results to depth
Apply flag results to dissolved_oxygen_ml_l
Apply flag results to dissolved_oxygen_percent
Apply flag results to flc
Apply flag results to par
Apply flag results to pressure
Apply flag results to rinko_do_ml_l
Apply flag results to salinity
Apply flag results to sigma0
Apply flag results to temperature
Apply flag results to turbidity
Apply Hakai Grey List
Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast?hakai_id=018032_2012-06-07T21:13:39.833Z&limit=-1
Retrieve data from: https://hecate.hakai.org/api/eims/views/output_sites?limit=-1
Save 018032_2012-06-07T21:48:29.499Z
Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast/data?hakai_id={018032_2012-06-07T21:48:29.4

Apply QARTOD Tests to individual ['hakai_id', 'direction_flag']: 100%|██████████| 2/2 [00:00<00:00,  4.11profile/s]


DO Cap Detection to dissolved_oxygen_ml_l variable
DO Cap Detection to rinko_do_ml_l variable
Flag Missing Position Records
Flag Bottom Hit Data
Flag PAR Shadow Data
Apply flag results to latitude
Apply flag results to c_star_at
Apply flag results to conductivity
Apply flag results to depth
Apply flag results to dissolved_oxygen_ml_l
Apply flag results to dissolved_oxygen_percent
Apply flag results to flc
Apply flag results to par
Apply flag results to pressure
Apply flag results to rinko_do_ml_l
Apply flag results to salinity
Apply flag results to sigma0
Apply flag results to temperature
Apply flag results to turbidity
Apply Hakai Grey List
Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast?hakai_id=018032_2012-06-07T21:48:29.499Z&limit=-1
Retrieve data from: https://hecate.hakai.org/api/eims/views/output_sites?limit=-1
Save 018032_2012-06-07T22:08:17.000Z
Retrieve data from: https://hecate.hakai.org/api/ctd/views/file/cast/data?hakai_id={018032_2012-06-07T22:08:17.0

Apply QARTOD Tests to individual ['hakai_id', 'direction_flag']: 100%|██████████| 2/2 [00:00<00:00,  8.62profile/s]


DO Cap Detection to dissolved_oxygen_ml_l variable
DO Cap Detection to rinko_do_ml_l variable
Flag Missing Position Records
Flag Bottom Hit Data
Flag PAR Shadow Data
Apply flag results to latitude
Apply flag results to c_star_at
Apply flag results to conductivity
Apply flag results to depth
Apply flag results to dissolved_oxygen_ml_l
Apply flag results to dissolved_oxygen_percent
Apply flag results to flc
Apply flag results to par
Apply flag results to pressure
Apply flag results to rinko_do_ml_l
Apply flag results to salinity
Apply flag results to sigma0
Apply flag results to temperature
Apply flag results to turbidity
Apply Hakai Grey List


KeyboardInterrupt: 

# To Download Generated Files 
Click on the folder icon on the left bar of google colab and select the files to download.
