# Hakai Nutrient QA-QC 
---
# Start Notebook
## Install packages 
Google colab servers has already a few commonly used packages installed. To install those missing, which are specific toh Hakai let's run the following commands:

In [1]:
!pip install hakai-api
!pip install git+git://github.com/HakaiInstitute/ioos_qc.git@development
!pip install git+git://github.com/HakaiInstitute/process_ocean_timeseries

Collecting hakai-api
  Downloading hakai_api-1.1.2-py3-none-any.whl (4.0 kB)
Installing collected packages: hakai-api
Successfully installed hakai-api-1.1.2
Collecting git+git://github.com/HakaiInstitute/ioos_qc.git@development
  Cloning git://github.com/HakaiInstitute/ioos_qc.git (to revision development) to /tmp/pip-req-build-fjwjeftc
  Running command git clone -q git://github.com/HakaiInstitute/ioos_qc.git /tmp/pip-req-build-fjwjeftc
  Running command git checkout -b development --track origin/development
  Switched to a new branch 'development'
  Branch 'development' set up to track remote branch 'development' from 'origin'.
Collecting geojson
  Downloading geojson-2.5.0-py2.py3-none-any.whl (14 kB)
Collecting geopandas
  Downloading geopandas-0.9.0-py2.py3-none-any.whl (994 kB)
[K     |████████████████████████████████| 994 kB 8.1 MB/s 
Collecting pygc
  Downloading pygc-1.1.0.tar.gz (19 kB)
Collecting ruamel.yaml
  Downloading ruamel.yaml-0.17.13-py3-none-any.whl (108 kB)
[K   

## Import the python packages needed

In [2]:
# Let's load pandas for working with the data in table
import pandas as pd 
import numpy as np

# Let's load seaborn to plot the data
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# This to install the hakai api tool and be able to download some data from hakai's database
from hakai_api import Client

# Install ioos_qc which is used to qc data
from ioos_qc.config import QcConfig
from ioos_qc import qartod

from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import widgets, HBox, VBox
import time

from process_ocean_data.tools import qc as review

# Define some project standards

In [3]:
# Flag convention
flag_convention = 'HAKAI'
flag_color_map = {key:value['Color'] for key,value in review.flag_conventions[flag_convention].items()}

# And the interesting variables to use from the Hakai database
nutrient_variables = ['no2_no3_um','sio2','po4']

pd.options.display.max_columns = 100

# Download data from the Hakai database
For more information regarding the Hakai API, go [here](https://github.com/HakaiInstitute/hakai-api).

You can find a list of all the data type endpoints [here](http://hakaiinstitute.github.io/hakai-api/#endpoints).

In [4]:
# Let's retrieve the endpoint to retrieve nutrients data:
endpointUrl = '/eims/views/output/nutrients'
site_id = 'QU39'
start_time = '2012-01-01'
end_time = '2022-06-11'
# We'll retrieve data only associated with QU39 between January 1st 2019 to January 1st 2020
filterUrl = 'site_id={0}&collected>{1}&collected<{2}&limit=-1'.format(
    site_id, start_time, end_time
)

# Get Hakai Data    
#Get Data from Hakai API
client = Client() # Follow stdout prompts to get an API token

# Make a data request for sampling stations
url = '%s/%s?%s' % (client.api_root,endpointUrl,filterUrl)
response = client.get(url)
df = pd.DataFrame(response.json())
original_columns = df.columns
print(str(len(df))+' records downloaded')
df.head()

Please go here and authorize:
https://hecate.hakai.org/api-client-login

Copy and past your credentials from the login page:
access_token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjExNjMxNzQzNDYzMTA5NzUwOTczOSIsImVtYWlsIjoiamVzc3kuYmFycmV0dGVAaGFrYWkub3JnIiwidmVyaWZpZWRfZW1haWwiOnRydWUsIm5hbWUiOiJKZXNzeSBCYXJyZXR0ZSIsImdpdmVuX25hbWUiOiJKZXNzeSIsImZhbWlseV9uYW1lIjoiQmFycmV0dGUiLCJsaW5rIjoiaHR0cHM6Ly9wbHVzLmdvb2dsZS5jb20vMTE2MzE3NDM0NjMxMDk3NTA5NzM5IiwicGljdHVyZSI6Imh0dHBzOi8vbGgzLmdvb2dsZXVzZXJjb250ZW50LmNvbS9hLS9BT2gxNEdpcjRPN1hoT3JzMi1BVlozeXRWZjhQNjB3VGczX0RELWVHYko3VD1zOTYtYyIsImdlbmRlciI6Im1hbGUiLCJsb2NhbGUiOiJlbiIsImhkIjoiaGFrYWkub3JnIiwiZ3JvdXBzIjpbImplc3N5LmJhcnJldHRlQGhha2FpLm9yZyIsImNpb29zLnBhY2lmaWNAaGFrYWkub3JnIiwiZWltcy5hdXRoLmhha2FpQGhha2FpLm9yZyIsImVpbXMuYXV0aC5uYXR1cmUudHJ1c3RAaGFrYWkub3JnIiwiZWltcy5hdXRoLnBhcmtzLmNhbmFkYUBoYWthaS5vcmciLCJlaW1zLmF1dGguc2ZjQGhha2FpLm9yZyIsImVpbXMuYXV0aC51d3RAaGFrYWkub3JnIiwiZWltcy5pbnN0cnVtZW50YXRpb25AaGFrYWkub3JnIiwiZWltcy5vY2Vhbm9nc

Unnamed: 0,action,event_pk,rn,is_replicate,date,work_area,organization,survey,sampling_bout,site_id,project_specific_id,hakai_id,source,lat,long,gather_lat,gather_long,collection_method,line_out_depth,pressure_transducer_depth,filtered,filter_type,volume,installed,collected,preserved,analyzed,lab_technician,nh4_,no2_no3_um,no2_no3_ugl,no2_no3_units,tp,tdp,tn,tdn,srp,po4,sio2,po4pfilt,no3nfilt,po4punfl,no3nunfl,nh4nunfl,nh4__flag,no2_no3_flag,tp_flag,tdp_flag,tn_flag,tdn_flag,srp_flag,po4_flag,sio2_flag,po4pfilt_flag,no3nfilt_flag,po4punfl_flag,no3nunfl_flag,nh4nunfl_flag,analyzing_lab,row_flag,metadata_qc_flag,quality_level,comments,quality_log
0,,416,1,,2015-03-18,QUADRA,HAKAI,"QOMA,QOMA2,QOMA1",1,QU39,QNUT382,QNUT382,M,50.0307,-125.0992,,,,0,,,0.45nm,13,,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-21T07:00:00.000Z,"Kate,Katie,Rebecca",,5.918382,,uM,,,,,,0.637,12.324552,,,,,,,AV,AV,AV,AV,AV,AV,AV,AV,,,,,,UBC,Results,,Technicianmr,,1: Bottom depth is 265 m\r2: Checked by KP\r3:...
1,,416,1,,2015-03-18,QUADRA,HAKAI,"QOMA,QOMA2,QOMA1",1,QU39,QNUT383,QNUT383,M,50.0307,-125.0992,,,,5,,,0.45nm,13,,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-21T07:00:00.000Z,"Kate,Katie,Rebecca",,6.011794,,uM,,,,,,0.638,12.396646,,,,,,,AV,AV,AV,AV,AV,AV,AV,AV,,,,,,UBC,Results,,Technicianmr,,1: Bottom depth is 265 m\r2: Checked by KP\r3:...
2,,416,1,,2015-03-18,QUADRA,HAKAI,"QOMA,QOMA2,QOMA1",1,QU39,QNUT384,QNUT384,M,50.0307,-125.0992,,,,10,,,0.45nm,13,,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-21T07:00:00.000Z,"Kate,Katie,Rebecca",,6.634283,,uM,,,,,,0.7,12.694025,,,,,,,AV,AV,AV,AV,AV,AV,AV,AV,,,,,,UBC,Results,,Technicianmr,,1: Bottom depth is 265 m\r2: Checked by KP\r3:...
3,,416,1,,2015-03-18,QUADRA,HAKAI,"QOMA,QOMA2,QOMA1",1,QU39,QNUT385,QNUT385,M,50.0307,-125.0992,,,,30,,,0.45nm,13,,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-21T07:00:00.000Z,"Kate,Katie,Rebecca",,24.151575,,uM,,,,,,2.085,42.222115,,,,,,,AV,AV,AV,AV,AV,AV,AV,AV,,,,,,UBC,Results,,Technicianmr,,1: Bottom depth is 265 m\r2: Checked by KP\r3:...
4,,416,1,,2015-03-18,QUADRA,HAKAI,"QOMA,QOMA2,QOMA1",1,QU39,QNUT386,QNUT386,M,50.0307,-125.0992,,,,50,,,0.45nm,13,,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-21T07:00:00.000Z,"Kate,Katie,Rebecca",,26.960609,,uM,,,,,,2.251,49.347595,,,,,,,AV,AV,AV,AV,AV,AV,AV,AV,,,,,,UBC,Results,,Technicianmr,,1: Bottom depth is 265 m\r2: Checked by KP\r3:...


In [33]:
# Let's convert the collected time to a datetime object variable called  time 
#  and extract the from those datetime objects the year and month 
df['time'] = pd.to_datetime(df['collected'])
df['year'] = df['time'].dt.year
df['month'] = df['time'].dt.month
df['dayoftheyear'] = pd.to_timedelta(df['time'].dt.dayofyear, unit='d')


# Define a depth variable which is: 
#   - pressure_transducer_depth (if available)
#   - OR line_out_depth
df['depth'] = df['pressure_transducer_depth'].fillna(df['line_out_depth'])

# Review Replicates
## Pool Standard Deviation


In [6]:
 # Let's create a pooled standard deviation function
def pooled_standard_deviation(df_to_review,count_col='count',std_col='std'):
    # Keep only records that have replicates
    df_replicates = df_to_review[df_to_review[count_col]>1]
    upper =  df_replicates[count_col].sub(-1).mul(df_replicates[std_col].pow(2)).sum()
    lower = df_replicates[count_col].sub(-1).sum()
    pooled_std = np.sqrt(upper/lower)
    return pooled_std

In [7]:
df_grouped = df.groupby(['site_id','line_out_depth','collected']).agg(['mean','std','count'])

for var in nutrient_variables:
    pool_std = pooled_standard_deviation(df_grouped[var])
    print('{0} pool.std.: {1}'.format(var,pool_std))

no2_no3_um pool.std.: 0.12386840168403754
sio2 pool.std.: 0.5301664184882673
po4 pool.std.: 0.03475953788519301


## Replicates Standard Deviation Distribution
Present the distribution of the standard deviations the replicate samples for each line_out_depth.

In [8]:
px.histogram(df_grouped['po4'].reset_index().dropna(subset=['std']).sort_values('line_out_depth'),
             x='std',color='line_out_depth', hover_name='collected',
             marginal='box')

# Apply detection limit flag
Samples with values lower than the dectection limit will be flag as BDL.



In [9]:
# If lower than detection limit, flag as BDL
df.loc[df['no2_no3_um']<0.036 ,'no2_no3_um_bdl_flag']='BDL'
df.loc[df['po4']<0.032 ,'po4_bdl_flag']='BDL'
df.loc[df['sio2']<0.1 ,'sio2_bdl_flag']='BDL'

# Time series QARTOD tests
Review each depth time series for a station and run timeseries specific test on them. The configuarion dictionary below, list of each variables and depth ranges the tests that will be applied.


## Set QARTOD Tests Configuration

In [10]:
qc_config = [{
    "depth_range":{
        "minimum":-5,
        "maximum":55
        },
    "streams":{
        "no2_no3_um":{
            "qartod": {
                "gross_range_test":{
                    "suspect_span": [0, 36],
                    "fail_span": [0, 40]
                    },
                "aggregate": {}
            }
        },
        "po4":{
            "qartod": {
                "gross_range_test":{
                    "suspect_span": [0, 3],
                    "fail_span": [0, 4]
                    },
                "aggregate": {}
            }
                    
        },
        "sio2":{
            "qartod": {
                "gross_range_test":{
                    "suspect_span": [0, 80],
                    "fail_span": [0, 100]
                    },
                "aggregate": {}
            }
                    
        }
    }
},{
    "depth_range":{
        "minimum":55,
        "maximum":300
        },
    "streams":{
        "no2_no3_um":{
            "qartod": {
                "gross_range_test":{
                    "suspect_span": [0, 36],
                    "fail_span": [0, 40]
                    },
                "spike_test": {
                    "suspect_threshold": 2,
                    "fail_threshold": 3,
                    "method": "differential",
                    "n_dev_suspect":1,
                    "n_dev_fail":2,
                    "n_records":40,
                    "min_records":20
                    },
                "aggregate": {}
            }
        },
        "po4":{
            "qartod": {
                "gross_range_test":{
                    "suspect_span": [0, 3],
                    "fail_span": [0, 4]
                    },
                "spike_test": {
                    "suspect_threshold": 0.2,
                    "fail_threshold": 0.4,
                    "method": "differential",
                    "n_dev_suspect":2,
                    "n_dev_fail":3,
                    "n_records":50,
                    "min_records":40
                    },
                "aggregate": {}
            }
                    
        },
        "sio2":{
            "qartod": {
                "gross_range_test":{
                    "suspect_span": [0, 80],
                    "fail_span": [0, 100]
                    },
                "spike_test": {
                    "suspect_threshold":8,
                    "fail_threshold": 12,
                    "method": "differential",
                    "n_dev_suspect":2,
                    "n_dev_fail":3,
                    "n_records":30,
                    "min_records":20
                    },
                "aggregate": {}
            }
                    
        }
    }
}]


## Run QARTOD Tests on Depth TimeSeries


In [11]:
# Run QARTOD tests
# We are using the deprecated QcConfig method and hopefully will move 
#  to a new stream method soon.
time = 'time'
depth = 'depth'
group_timeseries = ['line_out_depth']
for item in qc_config:
    df_depth_range = df[(df['depth']>item['depth_range']["minimum"]) & \
                        (df['depth']<item['depth_range']["maximum"])]
    for line_out_depth, timeserie in df_depth_range.groupby(['site_id','line_out_depth']):
        timeserie = timeserie.sort_values(time)
        for var in item['streams'].keys():
            qc = QcConfig(item['streams'][var])
            qc_result = qc.run(
                inp=timeserie[var],
                tinp=timeserie[time],
                zinp=timeserie[depth],
            )
            for module,tests in qc_result.items():
                for test, flag in tests.items():
                    df.loc[timeserie.index, var+'_'+module+"_"+test] = flag

# Map QARTOD Flags to Hakai Convention
for var in df.filter(like='qartod').columns:
     df[var] = df[var].replace({pd.NA:9}).astype(int).replace(review.flag_conventions['mapping']['QARTOD-HAKAI'])

## Review QARTOD Results

In [12]:
flag_color_map = {key:value['Color'] for key,value in review.flag_conventions['HAKAI'].items()}

line_out_depths = line_out_depth=df['line_out_depth'].drop_duplicates().sort_values().tolist()
line_out_depth_selector = widgets.SelectMultiple(
    options=line_out_depths,
    value=[line_out_depths[0]],
    #rows=10,
    description='Fruits',
    disabled=False
)

def make_qartod_plot_review(var,line_out_depth):
    flag_var = df.filter(regex='flag|qartod').columns.tolist()
    fig = px.scatter(df[df['line_out_depth'].isin(line_out_depth)].sort_values(['line_out_depth',var+'_qartod_aggregate']),
                x='time',y=var,
                color=var+'_qartod_aggregate', 
                symbol='line_out_depth',
                hover_data=['hakai_id']+flag_var,
                color_discrete_map=flag_color_map)
    fig.update_layout(
        height=600,
        width=1600
    )
    fig.show()

interact(make_qartod_plot_review, var=['sio2','po4','no2_no3_um'],line_out_depth=line_out_depth_selector)

interactive(children=(Dropdown(description='var', options=('sio2', 'po4', 'no2_no3_um'), value='sio2'), Select…

<function __main__.make_qartod_plot_review>

# Red field ratio

In [13]:
max_depth = 100
figs=px.scatter(df[df['line_out_depth']<max_depth],
               x='po4',
               y='no2_no3_um',
               color='line_out_depth', 
               hover_data=['hakai_id','date'], 
               template='simple_white',
               title='PO4',
               labels={'po4':'PO4 (uM)', 'line_out_depth':'Bottle Target Depth (m)'},facet_col='year')
kk=1
for item in figs.data:
  figs.add_trace(go.Scatter(x = [0, 3],
                            y =  [0, 48],
                            mode='lines',
                            line_color='red',
                            showlegend=False
                            ),
                row=1,
                col=kk)
  kk+=1
figs.show()

# Review Interannual Variability
Let's compute the average value measured for each depth and the associated standard deviation.

## Compute the seasonal variability
### Development 
Generate for each line_out_depth a seasonal variability model



In [16]:
# Generate a reference depth variable
df['reference_depth'] = df['line_out_depth']
df.loc[df['reference_depth']>230, 'reference_depth'] = 260 # Let's group all data below 230 in the same group
df = df.sort_values('reference_depth')
df['reference_depth'] = df['reference_depth'].astype(str)

In [37]:
# Apply a running window over the days  for each station and reference depth
window = '60d'
df_running_seasonal = df.groupby(['site_id','reference_depth']).apply(
    lambda x: x.sort_values('dayoftheyear').set_index('dayoftheyear')[nutrient_variables]\
       .rolling(window).agg(['mean','std']).reset_index()
)

# Resample the resulting data to a regular time interval
seasonal = df_running_seasonal.reset_index().set_index(['site_id','reference_depth','dayoftheyear']).groupby(
    [pd.Grouper(level='site_id'),
     pd.Grouper(level='reference_depth'),
     pd.Grouper(freq='7D', level=-1)]).mean().reset_index()

In [42]:
# Apply a shift a to the time series since rolling is assigning the end
#  of the window (center isn't compatible with timedelta indexes)
seasonal['dayoftheyear'] = seasonal['dayoftheyear'] - pd.to_timedelta(window)/2

In [43]:
# Generate Seasonnal limits for each variables
alpha=2 # Coefficient by which std deviation is multiply to computer lower and upper limit
years = df['year'].drop_duplicates().sort_values().tolist()

df_seasonal_timeseries = pd.DataFrame()
for year in years:
    df_temp = seasonal.copy()
    df_temp['time'] = df_temp['dayoftheyear'] + pd.to_datetime(f"{year}-01-01 00:00:00")
    df_seasonal_timeseries = df_seasonal_timeseries.append(df_temp)

# Generate limits 
for var in nutrient_variables:
    df_seasonal_timeseries[(var,'lower_limit')] = df_seasonal_timeseries[var]['mean'] - alpha*df_seasonal_timeseries[var]['std']
    df_seasonal_timeseries[(var,'upper_limit')] = df_seasonal_timeseries[var]['mean'] + alpha*df_seasonal_timeseries[var]['std']
    for test in qc_config:        
        within_range = (
            (df_seasonal_timeseries['reference_depth'].astype(float)<float(test['depth_range']['maximum'])) & 
            (df_seasonal_timeseries['reference_depth'].astype(float)>float(test['depth_range']['minimum'])) 
            )
        
        # Make sure that the limits do not exceed the fail limits of the QARTOD Test
        fail_span = test['streams'][var]['qartod']['gross_range_test']['fail_span']
        df_seasonal_timeseries.loc[(within_range & (df_seasonal_timeseries[var]['lower_limit']<fail_span[0])),(var,'lower_limit')] = fail_span[0]
        df_seasonal_timeseries.loc[(within_range & (df_seasonal_timeseries[var]['upper_limit']>fail_span[1])),(var,'lower_limit')] = fail_span[1]


In [52]:
def plot_seasonal(reference_depth,variable):
    df_filtered  = df_seasonal_timeseries.loc[df_seasonal_timeseries['reference_depth'].isin([reference_depth])]
    df_data_filtered = df.loc[df['reference_depth']==reference_depth]

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df_filtered['time'],y=df_filtered[variable]['mean'],name='mean',line_color='black'))
    fig.add_trace(go.Scatter(x=df_filtered['time'],y=df_filtered[variable]['upper_limit'],line_color='lightgrey',mode=None,name='upper_limit'))
    fig.add_trace(go.Scatter(x=df_filtered['time'],y=df_filtered[variable]['lower_limit'],line_color='lightgrey',fill='tonexty',mode=None,name='lower_limit'))
    fig_data = px.scatter(df.query(f'reference_depth=="{reference_depth}"'),
                          x='time',y=variable,color=f"{variable}_qartod_aggregate",
                          hover_data=['hakai_id'],color_discrete_map=flag_color_map)
    
    for item in fig_data.data:
        fig.add_trace(item)
    fig.update_yaxes(title=variable)
    fig.update_layout(width=1500)
    fig.show()

interact(plot_seasonal,
         reference_depth=df['reference_depth'].drop_duplicates().tolist(),
         variable=nutrient_variables)

interactive(children=(Dropdown(description='reference_depth', options=('0', '5', '10', '20', '30', '40', '50',…

<function __main__.plot_seasonal>

## Make Monthly Box Plots

In [58]:
def make_boxplot_per_depth(var):
    # Show the seasonality of the data
    fig = px.box(df.sort_values('line_out_depth'),x='month',y=var,
            color='line_out_depth', animation_frame='line_out_depth',
            hover_name='hakai_id')
    fig.update_layout(width=1500)
    return fig.show()
    
interact(make_boxplot_per_depth,var=nutrient_variables)

interactive(children=(Dropdown(description='var', options=('no2_no3_um', 'sio2', 'po4'), value='no2_no3_um'), …

<function __main__.make_boxplot_per_depth>

# Create Aggregated Suggested Flag


In [59]:
flags_considered = ['_qartod_aggregate','_bdl_flag']
for var in nutrient_variables:
    flag_vars = [var+item for item in flags_considered]
    df[var+'_review_flag'] = \
       df[flag_vars].apply(lambda x: review.compare_flags(list(x),convention='HAKAI'),axis='columns')

# Update Original Flags
df['po4_flag'] = df['po4_flag'].fillna(df['po4_review_flag'])
df['sio2_flag'] = df['sio2_flag'].fillna(df['sio2_review_flag'])
df['no2_no3_flag'] = df['no2_no3_flag'].fillna(df['no2_no3_um_review_flag'])

# Review Results Manually

## Review Profiles

In [60]:
# Plot profile
group_profile_by = 'date'
profile_list = widgets.SelectMultiple(
    options=df[group_profile_by].sort_values().drop_duplicates(),
    value=(df[group_profile_by].iloc[0],),
    description="Select Profiles",
    disabled=False,
)
def make_profile_plot(profiles):
    # Filter only selected profiles
    df_selected = df[df['date'].isin(profiles)]
    df_temp = df[df['date'].isin(profile_list.value)].sort_values(['date','depth'])
    fig = []
    for ii in [0,1,2]:
        fig.append(px.line(df_temp,x=nutrient_variables[ii],y='depth',color='date',hover_data=['hakai_id']))

    subfig = make_subplots(rows=1, cols=3)
    kk=1
    for item in fig:
        for subitem in item.data:
            if kk>1:
                subitem.showlegend=False
            subfig.add_trace(subitem,row=1,col=kk)
        subfig.update_xaxes(title_text=nutrient_variables[kk-1], row=1, col=kk)
        kk += 1
    subfig.update_yaxes(title_text='Depth (m)', row=1, col=1)
    subfig.update_yaxes(autorange="reversed")
    subfig.update_xaxes(matches=None)
    subfig.update_traces(mode='markers+lines')
    subfig.update_layout(width=1500)
    return subfig

interact(make_profile_plot, profiles=profile_list)

interactive(children=(SelectMultiple(description='Select Profiles', index=(0,), options=('2015-03-18', '2015-0…

<function __main__.make_profile_plot>

## Review Timeseries

In [61]:
group_timeseries_by = 'line_out_depth'
# Plot TimeSeries
timeseries_id = widgets.SelectMultiple(
    options=df[group_timeseries_by].sort_values().drop_duplicates().values,
    value=(df[group_timeseries_by].iloc[0],),
    description="Select "+group_timeseries_by,
    disabled=False,
)

nutrient_id = widgets.ToggleButtons(
    options=nutrient_variables,
    value=nutrient_variables[0],
    description="Select "+group_timeseries_by,
    disabled=False,
)
timeseries_id
def plot_timeseries(nutrient, profile):
    flag_columns = df.filter(regex= nutrient+'.*(flag|qartod)').columns
    df_temp = df[df['line_out_depth'].isin(timeseries_id.value)].sort_values('time').copy()
    df_temp[df_temp.filter(like='flag').columns] = df_temp.filter(like='flag').astype(str)
    df_good = df_temp.loc[df_temp[nutrient.replace('_um','')+'_flag']=='AV']
    fig = px.line(df_good,
            x='time',
            y=nutrient,
            line_dash='line_out_depth',
            color=nutrient+'_review_flag',
            color_discrete_map={value:item['Color'] for value, item in review.flag_conventions['HAKAI'].items()}
           )
    fig2 = px.scatter(df_temp,
            x='time',
            y=nutrient,
            color=nutrient+'_review_flag',
            hover_data=['date','hakai_id','depth']+list(flag_columns),
            color_discrete_map={value:item['Color'] for value, item in review.flag_conventions['HAKAI'].items()}
           )
    for item in fig2.data:
        fig.add_trace(item)
    fig.update_layout(width=1500)
    return fig
    
interact(plot_timeseries,nutrient=nutrient_id, profile=timeseries_id)

interactive(children=(ToggleButtons(description='Select line_out_depth', options=('no2_no3_um', 'sio2', 'po4')…

<function __main__.plot_timeseries>

# Get List of Sample Flagged

In [274]:
# Show me the data flagged
df.loc[df[['no2_no3_flag','sio2_flag','po4_flag']].isin(['SVC','SVD','BDL']).any(axis='columns')][['date','hakai_id','no2_no3_flag','sio2_flag','po4_flag']]

Unnamed: 0,date,hakai_id,no2_no3_flag,sio2_flag,po4_flag
1610,2018-09-25,QNUT4738,BDL,AV,AV
2101,2019-09-04,QNUT5705,BDL,AV,AV
2011,2019-07-24,QNUT5593,BDL,AV,AV
730,2017-03-30,QNUT2910,SVD,SVD,SVD
396,2016-05-19,QNUT2164,AV,AV,BDL
1382,2018-05-08,QNUT4031,BDL,AV,AV
1864,2019-04-02,QNUT5099,BDL,AV,AV
1383,2018-05-08,QNUT4032,BDL,AV,AV
2556,2020-05-14,QNUT6443,SVD,AV,AV
1968,2019-07-09,QNUT5514,BDL,AV,AV


# Manual Flag

In [275]:
# Review QC Manually ( Not Compatible with Colab)
df['no2_no3'] = df['no2_no3_um']
review.manual_qc_interface(df, ['no2_no3','sio2','po4'], 'HAKAI' ,review_flag="_flag")


VBox(children=(HBox(children=(VBox(children=(VBox(children=(Dropdown(description='X Axis:', index=1, options=(…

# Save Result to Hakai Portal Compatible Excel Format

In [None]:
df_output = df[original_columns]

# Rename Columns to Match Portal Output
# Drop unchanged rows

###ATTENTION### The output has not yet been tested with the Hakai API.
df_output.to_excel('Hakai_Nutrient_Revision_{0}.xlsx'.format(pd.Timestamp.now().isoformat().replace(':','')))


# Report Figures

## Contour plot

In [291]:
# Plot contourf interpolate linearly over the x axis and maximum over two NaN values
def get_contour(var):
    df_pivot = pd.pivot_table(df,values=var,index='line_out_depth',columns='date',aggfunc='mean').interpolate(axis='index',limit=4).sort_index(axis=0).sort_index(axis=1).interpolate(axis='columns',limit=3)
    fig = go.Figure(data =
        go.Contour(z=df_pivot.values,x=df_pivot.columns,y=df_pivot.index.values,
                colorbar=dict(title=var, titleside='right'),
                colorscale='RdYlGn',
                ncontours=10,
                contours_coloring='heatmap'
                #,connectgaps=True
                ))
    fig.update_yaxes(title='Depth (m)',autorange="reversed",linecolor='black',mirror=True,
                    ticks='outside',showline=True)
    fig.update_xaxes(linecolor='black',mirror=True,ticks='outside',showline=True)
    fig.update_layout(width=1500)
    fig.show()

interact(get_contour,var=nutrient_variables)

interactive(children=(Dropdown(description='var', options=('no2_no3_um', 'sio2', 'po4'), value='no2_no3_um'), …

<function __main__.get_contour>

## Scatter with colorbar

In [277]:
## Scatter with colorbar
fig = px.scatter(df.dropna(subset=['no2_no3_um'],axis=0),x='time',y='depth',
                 color='no2_no3_um',color_continuous_scale='RdYlGn',
                 hover_name='hakai_id')
fig.update_yaxes(title='Depth (m)',autorange="reversed",linecolor='black',mirror=True,
                 ticks='outside')
fig.update_xaxes(linecolor='black',mirror=True,ticks='outside',showline=True)
fig.show()

In [289]:
# Add scatter per depth
px.scatter(df.sort_values(['line_out_depth','time']),x='time',y='no2_no3_um',color='depth',color_continuous_scale=px.colors.sequential.deep)