# Packages Installation

In [1]:
import pandas as pd
import xarray as xr
import datetime as dt

# Load local modules
!pip install git+https://github.com/HakaiInstitute/hakai-profile-qaqc.git
import hakai_qc


^C


# Download station to be reviewed
This section download some CTD profile data from the Hakai Database, you can modify the variables station, start_time and end_time variables to refine the data you want to review.

In [2]:
# Get Hakai CTD Data Download through the API
station = 'QU39'
start_time = '2019-01-01'
end_time = '2020-01-01'

variable_lists = hakai_qc.get.hakai_api_selected_variables()

# Let's just get the data from QU39
filterUrl = 'station='+station+'&status!=MISCAST&limit=-1'+\
    '&measurement_dt>'+start_time+'&measurement_dt<'+end_time+\
    '&fields='+','.join(variable_lists)
df, url = hakai_qc.get.hakai_ctd_data(filterUrl)
print(str(len(df))+' records found')

# Regroup profiles and cast direction and sort them by pressure
group_variables = ['device_model','device_sn','ctd_file_pk','ctd_cast_pk','direction_flag']
df = df.sort_values(by=group_variables+['pressure'])

# Just show the first few lines to have a look
df.head() # Show the top of the data frame

TypeError: an integer is required (got type str)

# Review Profile
Let's have a look at the percentage of flags present for each tests.

In [34]:
# Get the list of parameters (variables associated with a *_flag column), ignore direction_flag
flag_columns = df.filter(like='_flag').columns.tolist()
var_columns = set([sub.replace('_flag','') for sub in flag_columns])-{'direction'}

# Temporary until the database has the right setup:
for var in var_columns:
    if var+'_qartod_flag' not in df.columns:
        df[var+'_qartod_flag'] = 2
    if var+'_flag_description' not in df.columns:
        df[var+'_flag_description'] = ''
        
# Retrive list of tests
test_list = sorted(df.filter(regex='_flag$|_test$').columns.tolist())
test_list.append("None")

# Run the interactive profile review tool
hakai_qc.show.interactive_profile_viewer(df,var_columns,test_list)

Select Flagged Variable(s) to consider in the following list:


interactive(children=(Dropdown(description='Test to review', index=37, options=('backscatter_beta_flag', 'back…

In [50]:
from ipywidgets import widgets, interactive_output

hakai_id_list = widgets.Dropdown(options=df['hakai_id'].to_list(),value=df[])


# Generate a NetCDF File for the approuved profiles!

In [30]:
approuved_hakai_id = ['080217_2017-01-08T18:03:05.167Z','080217_2017-01-26T16:56:39.000Z']

save_to = ''
creator = {
    'creator_name': 'Jessy Barrette',
    'creator_url': 'https://hakai.org/',
    'creator_email': 'jessy.barrette@hakai.org',
    'creator_institution':'Hakai Institute',
    'creator_country':'Canada'
}
publisher = {
    'publisher_name': 'Hakai Institute',
    'publisher_url': 'https://hakai.org/',
    'publisher_email': 'contact@hakai.org',
    'publisher_country':'Canada',
    'publisher_institution':'Hakai Institute'
}

for hakai_id in approuved_hakai_id:
    global_attributes = {'institution':'Hakai Institute',
                         'project':'Hakai Oceanography',
                         'title':'Hakai Research CTD Profile: '+hakai_id,
                         'summary':'text describing that specific data',
                         'comment': '',
                         'infoUrl':'https://hakai.org/',
                         'keywords':'conductivity,temperature,salinity,depth,pressure,dissolved oxygen',
                         'acknowledgment':'Hakai Field Techniciens, research and IT groups',
                         'id':hakai_id,
                         'naming_authority':'Hakai Instititute',
                         'date_created':str(dt.datetime.utcnow().isoformat()),
                         'standard_name_vocabulary': 'CF Standard Name Table v1.3',
                         'license':'unknown',
                         'geospatial_lat_units':'degrees_north',
                         'geospatial_lon_units':'degrees_east'}

    extra_global_attributes = global_attributes
    extra_global_attributes.update(creator)
    extra_global_attributes.update(publisher)

    # Create the NetCDF Source file!
    hakai_qc.get.research_profile_netcdf(hakai_id,save_to,
                                         extra_global_attributes=extra_global_attributes,
                                         extra_variable_attributes=variable_attributes)