# Using EcoFOCIpy to process Winkler/Nutrient field data

## ____ Template for cruise (pre 2020)

## BTL Data + Nutrient Data

This is a streamlined version of generation routines to merge bottle data and Mordy Nut. Lab Nutrient Data for long term archive and Oxygen Winkler Data

<div class="warning" style='background-color:#E9D8FD; color: #69337A; border-left: solid #805AD5 4px; border-radius: 4px; padding:0.7em;'>
<span>
<p style='margin-top:1em; text-align:center'>
<b>A template for Nutrient Lab and Oxygen Winkler ASCII files to NETCDF</b></p>
<p style='margin-left:1em;'>
Populate the necessary paths in the following cells.</p>
<p style='margin-bottom:1em; margin-right:1em; text-align:right; font-family:Georgia'> <b>- Shaun Bell</b>
</p></span>
</div>


In [78]:
import yaml
import glob
import os
import pandas as pd

import ecofocipy.io.sbe_ctd_parser as sbe_ctd_parser #<- instrument specific
import ecofocipy.io.ncCFsave as ncCFsave
import ecofocipy.metaconfig.load_config as load_config

In [79]:
sample_data_dir = '/Users/bell/ecoraid/2021/CTDcasts/dy2103/' #root path to cruise directory
ecofocipy_dir = '/Users/bell/Programs/EcoFOCIpy/'

In [125]:
###############################################################
# edit to point to {cruise sepcific} raw datafiles 
datafile = sample_data_dir+'rawconverted/' #<- point to cruise and process all files within
nutdatafile = sample_data_dir+'working/Discrete_Nutrients/DY2103 Nutrient Data.txt' #<- point to cruise and process all files within
oxydatafile = sample_data_dir+'working/Discrete_Oxygens/DY2103 Oxygen Data.txt' #<- point to cruise and process all files within
cruise_name = 'dy2103' #no hyphens
cruise_meta_file = sample_data_dir+'logs/DY2103.yaml'
inst_nut_meta_file = sample_data_dir+'logs/FOCI_standard_CTDpNuts.yaml' #<- copy to each deployment for simplicity?
inst_oxy_meta_file = sample_data_dir+'logs/FOCI_standard_CTDpOxy.yaml' #<- copy to each deployment for simplicity?
group_meta_file = ecofocipy_dir+'staticdata/institutional_meta_example.yaml'
###############################################################

# Process Nutrients and Oxygen Flags
nut_proc = False
oxy_proc = True

#init and load data
cruise = sbe_ctd_parser.sbe_btl()
filename_list = sorted(glob.glob(datafile + '*.btl'))

cruise_btl_data = cruise.manual_parse(filename_list)

Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd001.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd002.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd003.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd004.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd005.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd006.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd007.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd008.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd009.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd010.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd011.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd012.btl
Processing /Users/bell/ecoraid/2021/CTDcasts/dy2103/rawconverted/ctd013.btl
Processing /

## Load csv Nutrient File

In [129]:
if nut_proc:
    nut_data = pd.read_csv(nutdatafile,delimiter='\t')
nut_data

NameError: name 'nut_data' is not defined

## Load csv Oxygen File

In [128]:
if oxy_proc:
    oxy_data = pd.read_csv(oxydatafile,delimiter='\t')
oxy_data

Unnamed: 0,Cast,Niskin,O2 (uM)
0,12,7,347.72
1,13,1,350.40
2,16,7,361.20
3,17,1,370.48
4,18,7,374.27
...,...,...,...
88,71,8,62.81
89,69,6,308.33
90,69,5,302.70
91,69,4,281.86


## Merge Bottle and Nutrient Data but drop non nutrient vars?

In [88]:
keep_param = ['bottle','prdm']

for cast,cdata in cruise_btl_data.items():
    try:
        matchcast = int((cast.split('.')[0]).lower().split('ctd')[-1])
        cruise_data[cast] = pd.merge(nut_data[nut_data['Cast']==matchcast],cdata.reset_index()[keep_param],right_on='bottle',left_on='Niskin').set_index('bottle').drop(columns=['Cast'])
    except:
            continue

## Add Deployment meta information

In [89]:
#just a dictionary of dictionaries - simple
with open(cruise_meta_file) as file:
    cruise_config = yaml.full_load(file)

## Add Instrument meta information

Time, depth, lat, lon should be added regardless (always our coordinates) but for a mooring site its going to be a (1,1,1,t) dataset
The variables of interest should be read from the data file and matched to a key for naming.  That key is in the inst_config file seen below and should represent common conversion names in the raw data

In [90]:
with open(inst_nut_meta_file) as file:
    inst_nut_config = yaml.full_load(file)

## Add institutional meta-information


In [91]:
with open(group_meta_file) as file:
    group_config = yaml.full_load(file)

## Save CF Netcdf files

Currently stick to netcdf3 classic... but migrating to netcdf4 (default) may be no problems for most modern purposes.  Its easy enough to pass the `format` kwargs through to the netcdf api of xarray.

In [92]:
#loop over all casts and perform tasks shown above

for cast in cruise_data.keys():
    try:
        cruise_data[cast] = cruise_data[cast].rename(columns={
                            'Sil (uM)':'SI',
                            'PO4 (uM)':'PO4',
                            'NO2 (uM)':'NO2', 
                            'NO3 (uM)':'NO3',
                            'NH4 (uM)':'NH4',
                            'Niskin':'BTLID',
                            'prdm':'pressure',
                            'empty':'empty', #this will be ignored
                            'flag':'flag'})

        cruise_data_nc = ncCFsave.EcoFOCI_CFnc(df=cruise_data[cast], 
                                    instrument_yaml=inst_config, 
                                    operation_yaml=cruise_config,
                                    operation_type='ctd')

        cruise_data_nc.expand_dimensions(dim_names=['latitude','longitude','time'],geophys_sort=False)

        cruise_data_nc.variable_meta_data(variable_keys=list(cruise_data[cast].columns.values),drop_missing=False)
        #adding dimension meta needs to come after updating the dimension values... BUG?
        cruise_data_nc.dimension_meta_data(variable_keys=['time','latitude','longitude'])
        cruise_data_nc.temporal_geospatioal_meta_data_ctd(positiveE=False,conscastno=cast.split('.')[0])

        #add global attributes
        cruise_data_nc.deployment_meta_add(conscastno=cast.split('.')[0].upper())

        #add instituitonal global attributes
        cruise_data_nc.institution_meta_add(group_config)

        #add creation date/time - provenance data
        cruise_data_nc.provinance_meta_add()

        #provide intial qc status field
        cruise_data_nc.qc_status(qc_status='excellent') #<- options are unknown, excellent, probably good, mixed, unqcd

        cruise_data_nc.xarray2netcdf_save(xdf = cruise_data_nc.get_xdf(),
                                   filename=cruise_name+'c'+cast.lower().split('d')[-1].split('.')[0].zfill(3)+'_nut.nc',format="NETCDF3_CLASSIC")
    except:
        print(f'Skipping & Removing {cast}')
        os.remove(path=cruise_name+'c'+cast.lower().split('d')[-1].split('.')[0].zfill(3)+'_nut.nc')

Skipping & Removing ctd001.btl


FileNotFoundError: [Errno 2] No such file or directory: 'dy2103cctd001.btl_nut.nc'

## Merge Bottle and Oxygen Winkler Data but drop non oxygen vars?

In [116]:
keep_param = ['bottle','prdm']

for cast,cdata in cruise_btl_data.items():
    # try:
    matchcast = int((cast.split('.')[0]).lower().split('ctd')[-1])
    cruise_data[cast] = pd.merge(oxy_data[oxy_data['Cast']==matchcast],cdata.reset_index()[keep_param],right_on='bottle',left_on='Niskin').set_index('bottle').drop(columns=['Cast'])
    # except:
    #         continue

## Add Deployment meta information

In [117]:
#just a dictionary of dictionaries - simple
with open(cruise_meta_file) as file:
    cruise_config = yaml.full_load(file)

## Add Instrument meta information

Time, depth, lat, lon should be added regardless (always our coordinates) but for a mooring site its going to be a (1,1,1,t) dataset
The variables of interest should be read from the data file and matched to a key for naming.  That key is in the inst_config file seen below and should represent common conversion names in the raw data

In [118]:
with open(inst_oxy_meta_file) as file:
    inst_oxy_config = yaml.full_load(file)

## Add institutional meta-information


In [119]:
with open(group_meta_file) as file:
    group_config = yaml.full_load(file)

## Save CF Netcdf files

Currently stick to netcdf3 classic... but migrating to netcdf4 (default) may be no problems for most modern purposes.  Its easy enough to pass the `format` kwargs through to the netcdf api of xarray.

In [123]:
#loop over all casts and perform tasks shown above

for cast in cruise_data.keys():
    try:
        cruise_data[cast] = cruise_data[cast].rename(columns={
                            'O2 (uM)':'O2',
                            'Niskin':'BTLID',
                            'prdm':'pressure',
                            'empty':'empty', #this will be ignored
                            'flag':'flag'})

        cruise_data_nc = ncCFsave.EcoFOCI_CFnc(df=cruise_data[cast], 
                                    instrument_yaml=inst_oxy_config, 
                                    operation_yaml=cruise_config,
                                    operation_type='ctd')

        cruise_data_nc.expand_dimensions(dim_names=['latitude','longitude','time'],geophys_sort=False)

        cruise_data_nc.variable_meta_data(variable_keys=list(cruise_data[cast].columns.values),drop_missing=False)
        #adding dimension meta needs to come after updating the dimension values... BUG?
        cruise_data_nc.dimension_meta_data(variable_keys=['time','latitude','longitude'])
        cruise_data_nc.temporal_geospatioal_meta_data_ctd(positiveE=False,conscastno=cast.split('.')[0])

        #add global attributes
        cruise_data_nc.deployment_meta_add(conscastno=cast.split('.')[0].upper())

        #add instituitonal global attributes
        cruise_data_nc.institution_meta_add(group_config)

        #add creation date/time - provenance data
        cruise_data_nc.provinance_meta_add()

        #provide intial qc status field
        cruise_data_nc.qc_status(qc_status='excellent') #<- options are unknown, excellent, probably good, mixed, unqcd

        cruise_data_nc.xarray2netcdf_save(xdf = cruise_data_nc.get_xdf(),
                                   filename=cruise_name+'c'+cast.lower().split('d')[-1].split('.')[0].zfill(3)+'_oxy.nc',format="NETCDF3_CLASSIC")
    except KeyError:
        print(f'Skipping {cast}')
    except RuntimeError:
        print(f'Skipping & Removing {cast}')
        os.remove(path=cruise_name+'c'+cast.lower().split('d')[-1].split('.')[0].zfill(3)+'_oxy.nc')

Skipping & Removing ctd001.btl
Skipping & Removing ctd014.btl
Skipping & Removing ctd015.btl
Skipping ctd021.btl
Skipping & Removing ctd030.btl
Skipping & Removing ctd060.btl
Skipping ctd079.btl
Skipping & Removing ctd081.btl
Skipping & Removing ctd082.btl
Skipping & Removing ctd085.btl
Skipping & Removing ctd086.btl
Skipping & Removing ctd087.btl
Skipping & Removing ctd088.btl
