# Hakai Chlorophyll-a QA-QC 
---
# Start Notebook
## Install packages 
Google colab servers has already a few commonly used packages installed. To install those missing, which are specific toh Hakai let's run the following commands:

In [1]:
!pip install hakai-api
!pip install git+git://github.com/HakaiInstitute/ioos_qc.git@development

Collecting hakai-api
  Downloading https://files.pythonhosted.org/packages/10/70/248ff82cc78d9d95a0c52d670f6452e7584f41eb3fff3b0abcae3ed77d97/hakai_api-1.1.2-py3-none-any.whl
Installing collected packages: hakai-api
Successfully installed hakai-api-1.1.2
Collecting git+git://github.com/HakaiInstitute/ioos_qc.git@development
  Cloning git://github.com/HakaiInstitute/ioos_qc.git (to revision development) to /tmp/pip-req-build-6dbvwit7
  Running command git clone -q git://github.com/HakaiInstitute/ioos_qc.git /tmp/pip-req-build-6dbvwit7
  Running command git checkout -b development --track origin/development
  Switched to a new branch 'development'
  Branch 'development' set up to track remote branch 'development' from 'origin'.
Collecting geojson
  Downloading https://files.pythonhosted.org/packages/e4/8d/9e28e9af95739e6d2d2f8d4bef0b3432da40b7c3588fbad4298c1be09e48/geojson-2.5.0-py2.py3-none-any.whl
Collecting geopandas
[?25l  Downloading https://files.pythonhosted.org/packages/d7/bf/e9

## Import the python packages needed

In [48]:
# Let's load pandas for working with the data in table
import pandas as pd 
import numpy as np
# Let's load seaborn to plot the data
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# This to install the hakai api tool and be able to download some data from hakai's database
from hakai_api import Client

# Install ioos_qc which is used to qc data
from ioos_qc.config import QcConfig
from ioos_qc import qartod

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

# Download data from the Hakai database
For more information regarding the Hakai API, go [here](https://github.com/HakaiInstitute/hakai-api).

You can find a list of all the data type endpoints [here](http://hakaiinstitute.github.io/hakai-api/#endpoints).

In [111]:
# Let's first define the QARTOD flag colors to use
qartod_colors={1:'#2ECC40', 2:'#FFDC00', 3:'#FF851B', 4:'#FF4136', 9:'#85144b'}
qartod_colors_str = {"{0}".format(key):value for key,value in qartod_colors.items()}

hakai_flag_colors={'AV':'#2ECC40', 2:'#FFDC00', 'SVC':'#FF851B', 'SVD':'#FF4136', 'None':'#7d1485'}

# And the interesting variables to use from the Hakai database
variables_of_inteterest = ['chla','phaeo']

In [102]:
# Create a tool to handle Hakai Flags
def return_hakai_flag(flag_string):
    if 'SVD' in flag_string:
        return 'SVD'
    elif 'SVC' in flag_string:
        return 'SVC'
    elif 'AV' in flag_string:
        return 'AV'
    else:
        return 'None'

In [10]:
# Let's retrieve the endpoint to retrieve nutrients data:
endpointUrl = '/eims/views/output/chlorophyll'
site_id = 'QU39'
start_time = '2015-01-01'
end_time = '2022-06-11'
# We'll retrieve data only associated with QU39 between January 1st 2019 to January 1st 2020
filterUrl = 'site_id={0}&collected>{1}&collected<{2}&limit=-1'.format(
    site_id, start_time, end_time
)

# Get Hakai Data    
#Get Data from Hakai API
client = Client() # Follow stdout prompts to get an API token

# Make a data request for sampling stations
url = '%s/%s?%s' % (client.api_root,endpointUrl,filterUrl)
response = client.get(url)
df = pd.DataFrame(response.json())

print(str(len(df))+' records downloaded')
df.head()

5575 records downloaded


Unnamed: 0,action,event_pk,rn,date,work_area,organization,survey,sampling_bout,site_id,lat,long,gather_lat,gather_long,collection_method,line_out_depth,pressure_transducer_depth,volume,collected,preserved,analyzed,lab_technician,project_specific_id,hakai_id,is_blank,is_solid_standard,filter_size_mm,filter_type,acetone_volume_ml,flurometer_serial_no,calibration,acid_ratio_correction_factor,acid_coefficient,calibration_slope,before_acid,after_acid,acid_flag,dilution_factor,chla,chla_flag,chla_final,phaeo,phaeo_flag,phaeo_final,analyzing_lab,row_flag,quality_level,comments,quality_log
0,,416,1,2015-03-18,QUADRA,HAKAI,"QOMA1,QOMA2,QOMA",1,QU39,50.0307,-125.0992,,,,0,,250.0,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-08T07:05:00.000Z,"Kate,Katie,Rebecca",,QCHL754,,,,Bulk GF/F,10.0,720001154,2015-03-19T07:00:00.000Z,2.37623,1.726623,0.000444,251996.43,174536.76,,1.0,3.266174,AV,3.266174,2.081435,AV,2.081435,HAKAI,Results,Principal Investigator,,1: Bottom depth is 265 m\r2: Calibration input...
1,,416,1,2015-03-18,QUADRA,HAKAI,"QOMA1,QOMA2,QOMA",1,QU39,50.0307,-125.0992,,,,5,,250.0,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-08T07:10:00.000Z,"Kate,Katie,Rebecca",,QCHL755,,,,Bulk GF/F,10.0,720001154,2015-03-19T07:00:00.000Z,2.37623,1.726623,0.000444,259396.34,180222.42,,1.0,3.338458,AV,3.338458,2.183354,AV,2.183354,HAKAI,Results,Principal Investigator,,1: Bottom depth is 265 m\r2: Calibration input...
2,,416,1,2015-03-18,QUADRA,HAKAI,"QOMA1,QOMA2,QOMA",1,QU39,50.0307,-125.0992,,,,30,,250.0,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-08T07:15:00.000Z,"Kate,Katie,Rebecca",,QCHL756,,,,Bulk GF/F,10.0,720001154,2015-03-19T07:00:00.000Z,2.37623,1.726623,0.000444,83754.03,67977.68,,1.0,0.665228,AV,0.665228,1.417532,AV,1.417532,HAKAI,Results,Principal Investigator,,1: Bottom depth is 265 m\r2: Calibration input...
3,,416,1,2015-03-18,QUADRA,HAKAI,"QOMA1,QOMA2,QOMA",1,QU39,50.0307,-125.0992,,,,10,,250.0,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-08T07:20:00.000Z,"Kate,Katie,Rebecca",,QCHL757,,,,Bulk GF/F,10.0,720001154,2015-03-19T07:00:00.000Z,2.37623,1.726623,0.000444,595494.93,395232.68,,1.0,8.444284,AV,8.444284,3.665199,AV,3.665199,HAKAI,Results,Principal Investigator,,1: Bottom depth is 265 m\r2: Calibration input...
4,,416,1,2015-03-18,QUADRA,HAKAI,"QOMA1,QOMA2,QOMA",1,QU39,50.0307,-125.0992,,,,50,,250.0,2015-03-18T16:02:55.000Z,2015-03-18T16:02:14.000Z,2015-05-08T07:25:00.000Z,"Kate,Katie,Rebecca",,QCHL758,,,,Bulk GF/F,10.0,720001154,2015-03-19T07:00:00.000Z,2.37623,1.726623,0.000444,42202.48,34572.76,,1.0,0.321716,AV,0.321716,0.737555,AV,0.737555,HAKAI,Results,Principal Investigator,,1: Bottom depth is 265 m\r2: Calibration input...


In [11]:
# Let's convert the collected time to a datetime object variable called  time 
#  and extract the from those datetime objects the year and month 
df['time'] = pd.to_datetime(df['collected'])
df['year'] = df['time'].dt.year
df['month'] = df['time'].dt.month
df['dayoftheyear'] = pd.to_timedelta(df['time'].dt.dayofyear, unit='d')

# Define a depth variable which is: 
#   - pressure_transducer_depth (if available)
#   - OR line_out_depth
df['depth'] = df['pressure_transducer_depth'].fillna(df['line_out_depth'])

# Review Replicates
## Pool Standard Deviation


In [12]:
 # Let's create a pooled standard deviation function
def pooled_standard_deviation(df_to_review,count_col='count',std_col='std'):
    # Keep only records that have replicates
    df_replicates = df_to_review[df_to_review[count_col]>1]
    upper =  df_replicates[count_col].sub(-1).mul(df_replicates[std_col].pow(2)).sum()
    lower = df_replicates[count_col].sub(-1).sum()
    pooled_std = np.sqrt(upper/lower)
    return pooled_std

In [13]:
df_grouped = df.groupby(['site_id','line_out_depth','collected','filter_type']).agg(['mean','std','count'])

for var in variables_of_inteterest:
    pool_std = pooled_standard_deviation(df_grouped[var])
    print('{0} pool.std.: {1}'.format(var,pool_std))

chla pool.std.: 1.9777327235137088
phaeo pool.std.: 0.5259297460346936


## Replicates Standard Deviation Distribution

In [16]:
px.histogram(df_grouped['chla'].reset_index().dropna(subset=['std']).sort_values('line_out_depth'),
             x='std',color='line_out_depth', hover_name='collected',
             marginal='box')

# Apply detection limit flag
Samples with values lower than the dectection limit will be flag as BDL.



In [None]:
# # If lower than detection limit, flag as BDL
# df.loc[df['chla']<0.036 ,'chla_flag']='BDL'
# df.loc[df['phaeo']<0.032 ,'phaeo_flag']='BDL'

# Review Stacked filters vs Bulk
Hakai runs two filtrations one with stacked filters:
1. 20um
2. 2um
3. GF/F

And another filtration with just a GF/F filter. 

In theory, the cumulative sum of the elements filtrered within the stacked and simple GF/F filters should be similar. Let's see. 


In [107]:
# Set Count columns to text so that plotly use them as categories
for var in df.filter(like='flag').columns:
    df[var] = df[var].astype(str)

# Remove split Bulk filters and stacked ones.
group_samples_with = ['collected','line_out_depth']
df_bulk = df.loc[df['filter_type']=='Bulk GF/F']
df_stacked = df.loc[df['filter_type']!='Bulk GF/F']

# Group filters to averages replicates values if exist
grouping_method = {'hakai_id': ','.join, 'filter_type': ','.join, 
                   'chla_flag': ','.join, 'phaeo_flag': ','.join, 
                   'chla': 'mean', 'phaeo': 'mean'}
df_bulk = df_bulk.groupby(group_samples_with).agg(grouping_method)
df_stacked = df_stacked.groupby(group_samples_with+['filter_type']).agg(grouping_method)

# Sum stacked filters
grouping_method = {'hakai_id': ','.join, 'filter_type': ','.join, 
                   'chla_flag': ','.join, 'phaeo_flag': ','.join, 
                   'chla': ['sum','count'], 'phaeo': ['sum','count']}
df_stacked_sum = df_stacked.groupby(group_samples_with).agg(grouping_method)
df_stacked_sum.columns = [(col+'_'+method).replace('_join','') for col, method in df_stacked_sum.columns]

df_grouped = df_stacked_sum.add_suffix('_stacked').merge(
    df_bulk.add_suffix('_bulk'),
    left_index=True,right_index=True)

# Ignore data that has no value yet (count=0)
df_grouped = df_grouped[(df_grouped['chla_count_stacked']>0) & (df_grouped['phaeo_count_stacked']>0)]

# Set Count columns to text so that plotly use them as categories
for var in df_grouped.filter(like='count').columns:
    df_grouped[var] = df_grouped[var].astype(str)

# Combine flag columns
df_grouped['chla_combined_flag'] = (df_grouped['chla_flag_stacked']+','+ df_grouped['chla_flag_bulk']).apply(return_hakai_flag)
df_grouped['phaeo_combined_flag'] = (df_grouped['phaeo_flag_stacked']+','+ df_grouped['phaeo_flag_bulk']).apply(return_hakai_flag)

df_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,hakai_id_stacked,filter_type_stacked,chla_flag_stacked,phaeo_flag_stacked,chla_sum_stacked,chla_count_stacked,phaeo_sum_stacked,phaeo_count_stacked,hakai_id_bulk,filter_type_bulk,chla_flag_bulk,phaeo_flag_bulk,chla_bulk,phaeo_bulk,chla_combined_flag,phaeo_combined_flag
collected,line_out_depth,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2015-03-18T16:02:55.000Z,0,"QCHL856,QCHL854,QCHL855","20um,3um,GF/F","AV,AV,AV","AV,AV,AV",3.927663,3,2.264585,3,QCHL754,Bulk GF/F,AV,AV,3.266174,2.081435,AV,AV
2015-03-18T16:02:55.000Z,5,"QCHL858,QCHL857,QCHL859","20um,3um,GF/F","AV,AV,AV","AV,AV,AV",3.289195,3,1.730948,3,QCHL755,Bulk GF/F,AV,AV,3.338458,2.183354,AV,AV
2015-03-18T16:02:55.000Z,10,"QCHL865,QCHL863,QCHL864","20um,3um,GF/F","AV,AV,AV","AV,AV,AV",8.439128,3,5.253860,3,QCHL757,Bulk GF/F,AV,AV,8.444284,3.665199,AV,AV
2015-03-18T16:02:55.000Z,30,"QCHL862,QCHL861,QCHL860","20um,3um,GF/F","AV,AV,AV","AV,AV,AV",0.845579,3,1.868725,3,QCHL756,Bulk GF/F,AV,AV,0.665228,1.417532,AV,AV
2015-03-24T16:04:16.000Z,0,"QCHL933,QCHL934,QCHL932","20um,3um,GF/F","AV,AV,AV","AV,AV,AV",1.992064,3,1.319311,3,QCHL785,Bulk GF/F,AV,AV,1.768847,1.103087,AV,AV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-01-13T18:35:00.000Z,0,"QCHL12126,QCHL12127,QCHL12128","20um,3um,GF/F","None,None,None","None,None,None",0.313394,3,0.273130,3,QCHL12129,Bulk GF/F,,,0.413611,0.486734,,
2021-01-13T18:35:00.000Z,5,"QCHL12130,QCHL12131,QCHL12132","20um,3um,GF/F","None,None,None","None,None,None",0.305998,3,0.485304,3,QCHL12133,Bulk GF/F,,,0.328943,0.407180,,
2021-01-13T18:35:00.000Z,10,"QCHL12134,QCHL12135,QCHL12136","20um,3um,GF/F","None,None,None","None,None,None",0.269949,3,0.330011,3,QCHL12137,Bulk GF/F,,,0.264615,0.322212,,
2021-01-13T18:35:00.000Z,20,"QCHL12138,QCHL12139,QCHL12140","20um,3um,GF/F","None,None,None","None,None,None",0.216296,3,0.438276,3,QCHL12141,Bulk GF/F,,,0.159488,0.251122,,


In [112]:
fig1 = px.scatter(df_grouped,x='chla_sum_stacked',y='chla_bulk',
                  color='chla_combined_flag', hover_data=hoverinfo,
                  symbol='chla_count_stacked',
                  color_discrete_map=hakai_flag_colors)
fig2 = px.scatter(df_grouped,x='phaeo_sum_stacked',y='phaeo_bulk',
                  color='phaeo_combined_flag',hover_data=hoverinfo,
                  symbol='phaeo_count_stacked',
                  color_discrete_map=hakai_flag_colors)


fig = make_subplots(rows=1, cols=2, shared_xaxes=False, subplot_titles=('Chla','Phaeo'))
for trace in fig1['data']:
    fig.add_trace(trace, row=1, col=1)
for trace in fig2['data']:
    fig.add_trace(trace, row=1, col=2)
fig.update_xaxes(title_text='Stacked',row=1,col=1)
fig.update_xaxes(title_text='Stacked',row=1,col=2)
fig.update_yaxes(title_text='Bulk',row=1,col=1)
fig.update_yaxes(title_text='Bulk',row=1,col=2)

# Time series QARTOD tests
Review each depth time series for a station and run timeseries specific test on them. The configuarion dictionary below, list of each variables and depth ranges the tests that will be applied.


## Set QARTOD Tests Configuration

In [114]:
qc_config = [{
    "depth_range":{
        "minimum":-5,
        "maximum":55
        },
    "streams":{
        "chla":{
            "qartod": {
                "gross_range_test":{
                    "suspect_span": [0, 36],
                    "fail_span": [0, 40]
                    },
                "aggregate": {}
            }
        },
        "phaeo":{
            "qartod": {
                "gross_range_test":{
                    "suspect_span": [0, 3],
                    "fail_span": [0, 4]
                    },
                "aggregate": {}
            }
                    
        }
    }
}]


## Run QARTOD Tests

In [115]:
# Run QARTOD tests
# We are using the deprecated QcConfig method and hopefully will move 
#  to a new stream method soon.
time = 'time'
depth = 'depth'
group_timeseries = ['line_out_depth']
for item in qc_config:
    df_depth_range = df[(df['depth']>item['depth_range']["minimum"]) & \
                        (df['depth']<item['depth_range']["maximum"])]
    for line_out_depth, timeserie in df_depth_range.groupby(['site_id','line_out_depth']):
        timeserie = timeserie.sort_values(time)
        for var in item['streams'].keys():
            qc = QcConfig(item['streams'][var])
            qc_result = qc.run(
                inp=timeserie[var],
                tinp=timeserie[time],
                zinp=timeserie[depth],
            )
            for module,tests in qc_result.items():
                for test, flag in tests.items():
                    df.loc[timeserie.index, var+'_'+module+"_"+test] = flag

# Since plotly is better threading strings as categories let's convert those values to that.
df['phaeo_qartod_aggregate'] = df['phaeo_qartod_aggregate'].replace({pd.NA:9}).astype(int).astype(str)
df['chla_qartod_aggregate'] = df['chla_qartod_aggregate'].replace({pd.NA:9}).astype(int).astype(str)

## Review QARTOD Results

In [119]:
 var = 'chla'
 line_out_depth= [5]
 px.scatter(df[df['line_out_depth'].isin(line_out_depth)].sort_values(['line_out_depth',var+'_qartod_aggregate']),
                 x='time',y=var,
                 color=var+'_qartod_aggregate', 
                 hover_data=['hakai_id',var+"_qartod_aggregate"],
                 color_discrete_map=qartod_colors_str)

# Review Interannual Variability
Let's compute the average value measured for each depth and the associated standard deviation.

## Compute the seasonal variability
### Monthly Values


In [120]:
# Compute the monthly value recorded for each station and line_out_depth 
df_seasonal = df.groupby(['site_id','line_out_depth','month']) \
  .agg(['mean','std'])[nutrient_variables] \
  .reset_index()
df_seasonal.columns = ['_'.join(filter(None,col)).strip() for col in df_seasonal.columns.values]

# Add a seasonal value to each data
df_with_seasons = pd.merge(df,df_seasonal,on=['site_id','line_out_depth','month'], suffixes=('','_seasonal'))


### Over 30 days window

In [121]:
# TODO not that pandas.rolling isn't compatible with center window yet.
# Or get the season with a 30 days running window instead
window = '60d'
df_running_seasonal = df.groupby(['site_id','line_out_depth']).apply(
    lambda x: x.sort_values('dayoftheyear').set_index('dayoftheyear')[nutrient_variables]\
       .rolling(window).agg(['mean','std']).reset_index()
)
df_running_seasonal.columns = ['_'.join(filter(None,col)).strip() for col in df_running_seasonal.columns.values]

# Since rolling is looking the days prior, we'll center the value to the middle of the window.
#   rolling(center) isn't yet available for datetime index
df_running_seasonal['dayoftheyear'] = df_running_seasonal['dayoftheyear']-pd.to_timedelta(window)/2
df_running_seasonal.loc[df_running_seasonal['dayoftheyear']<pd.to_timedelta('0d'),'dayoftheyear'] += pd.to_timedelta('365d')

# Add a seasonal value to each data
df_with_seasons = pd.merge(df_with_seasons,df_running_seasonal,on=['site_id','line_out_depth','dayoftheyear'], suffixes=('','_running_seasonal'))


## Make Monthly Box Plots

In [124]:
review_depth = 20
var = 'chla'

# Show the seasonality of the data
px.box(df.sort_values('line_out_depth'),x='month',y=var,
        color='line_out_depth', animation_frame='line_out_depth',
       hover_name='hakai_id')

In [127]:
# Review each depth timeseries and
review_depth = 5
alpha = 2
var = 'chla'

#  compare seasonal average to value recorded
for var_name in ['chla','phaeo']:
    df_with_seasons[var_name+'_seasonal_flag'] = '1'

    residual = (df_with_seasons[var_name]-df_with_seasons[var_name+'_mean']).abs()
    df_with_seasons.loc[residual> alpha*df_with_seasons[var_name+'_std'], var_name+'_seasonal_flag'] = '3'

# Isolate the review depth
df_temp = df_with_seasons[df_with_seasons['line_out_depth']==review_depth].sort_values('time')

# Plot data
fig = px.scatter(df_temp,x='time',y=var,color=var+'_seasonal_flag', color_discrete_map=qartod_colors_str,
                 hover_name='hakai_id')
fig.add_trace(go.Scatter(x=df_temp['time'],y=df_temp[var+"_mean"],name='mean',
              line = dict(color='firebrick', width=2)))
fig.add_trace(go.Scatter(x=df_temp['time'],
                         y=df_temp[var+"_mean"]+alpha*df_temp[var+"_std"],
                         name='mean+{0}*std'.format(alpha),
              line = dict(color='grey', width=2, dash='dash')))
fig.add_trace(go.Scatter(x=df_temp['time'],
                         y=df_temp[var+"_mean"]-alpha*df_temp[var+"_std"],
                         name='mean-{0}*std'.format(alpha),
                         line = dict(color='grey', width=2, dash='dash')))

fig.add_trace(go.Scatter(x=df_temp['time'],
                         y=df_temp[var+"_mean_running_seasonal"],name='running mean',
              line = dict(color='red', width=2, dash='dash')))
fig.add_trace(go.Scatter(x=df_temp['time'],
                         y=df_temp[var+"_mean_running_seasonal"]+alpha*df_temp[var+"_std_running_seasonal"],name='running mean+{0}*std'.format(alpha),
              line = dict(color='black', width=2, dash='dot')))
fig.add_trace(go.Scatter(x=df_temp['time'],
                         y=df_temp[var+"_mean_running_seasonal"]-alpha*df_temp[var+"_std_running_seasonal"],
                         name='running mean-{0}*std'.format(alpha),
                         line = dict(color='black', width=2, dash='dot')))

fig.update_layout(coloraxis_showscale=False)
fig.show()

In [128]:
#  compare seasonal average to value recorded
for var in ['chla','phaeo']:
    df_with_seasons[var+'_seasonal_flag'] = '1'

    residual = (df_with_seasons[var]-df_with_seasons[var+'_mean']).abs()
    df_with_seasons.loc[residual> alpha*df_with_seasons[var+'_std'], var+'_seasonal_flag'] = '3'

# Create Suggested Flag


In [132]:
flags_considered = ['_qartod_aggregate','_seasonal_flag']
for var in nutrient_variables:
    df_flags = df[flags_considered].astype(float).astype(int)
    df.loc[df_flags.index,var+'_qartod'] = \
       qartod.qartod_compare(df_flags.transpose().values).astype(str)


KeyError: ignored

In [131]:
review_depth = 0
var = 'chla'
fig = px.scatter(df[df['line_out_depth']==review_depth].sort_values(var+'_qartod'),
                 x='time',y=var,
                 color=var+'_qartod', 
                 hover_data=['hakai_id',var+"_qartod_aggregate"],
                 color_discrete_map=qartod_colors_str)
fig.show()

KeyError: ignored

# Pretty plot examples for report
## Scatter plots

In [134]:
px.scatter(df,x='time',y='chla',color='line_out_depth', hover_data=['hakai_id'], template='simple_white')

In [139]:
px.box(df[df['line_out_depth'].isin([0,5,10,50])],x='month',y='chla',color='year', hover_data=['hakai_id','date'], template='simple_white',facet_row='line_out_depth')

## Contour plot

In [146]:

# Plot contourf interpolate linearly over the x axis and maximum over two NaN values
var = 'chla'
df_pivot = pd.pivot_table(df,values=var,index='line_out_depth',columns='time',aggfunc='mean').interpolate(axis=0,limit=2).sort_index(axis=0).sort_index(axis=1).interpolate(axis=1,limit=3)
fig = go.Figure(data =
    go.Contour(z=df_pivot.values,x=df_pivot.columns,y=df_pivot.index.values,
               colorbar=dict(title=var, titleside='right'),
               colorscale='greens',
               contours_coloring='heatmap'
               #,connectgaps=True
              ))
fig.update_yaxes(title='Depth (m)',autorange="reversed",linecolor='black',mirror=True,
                 ticks='outside',showline=True)
fig.update_xaxes(linecolor='black',mirror=True,ticks='outside',showline=True)
fig.show()

## Scatter with colorbar

In [147]:
## Scatter with colorbar
fig = px.scatter(df.dropna(subset=['chla'],axis=0),x='time',y='depth',
                 color='chla',color_continuous_scale='greens',
                 hover_name='hakai_id')
fig.update_yaxes(title='Depth (m)',autorange="reversed",linecolor='black',mirror=True,
                 ticks='outside')
fig.update_xaxes(linecolor='black',mirror=True,ticks='outside',showline=True)
fig.show()