# Using EcoFOCIpy to process raw field data

## Cruise ID - DY1904


## BTL Data + Nutrient Data

This is a streamlined version of generation routines to merge bottle data and Mordy Nut. Lab Nutrient Data for long term archive

In [83]:
import yaml
import glob
import pandas as pd

import EcoFOCIpy.io.sbe_ctd_parser as sbe_ctd_parser #<- instrument specific
import EcoFOCIpy.io.ncCFsave as ncCFsave
import EcoFOCIpy.metaconfig.load_config as load_config

In [84]:
sample_data_dir = '/Users/bell/ecoraid/2019/CTDcasts/dy1904/' #root path to cruise directory
ecofocipy_dir = '/Users/bell/Programs/EcoFOCIpy/'

In [95]:
###############################################################
# edit to point to {cruise sepcific} raw datafiles 
datafile = sample_data_dir+'rawconverted/' #<- point to cruise and process all files within
nutdatafile = sample_data_dir+'working/DiscreteNutrients/DY1904 Nutrient Data.txt' #<- point to cruise and process all files within
cruise_name = 'dy1904' #no hyphens
cruise_meta_file = sample_data_dir+'logs/dy1904.yaml'
inst_meta_file = sample_data_dir+'logs/FOCI_standard_CTDpNutsWOCE.yaml' #<- copy to each deployment for simplicity?
group_meta_file = ecofocipy_dir+'staticdata/institutional_meta_example.yaml'
###############################################################

#init and load data
cruise = sbe_ctd_parser.sbe_btl()
filename_list = sorted(glob.glob(datafile + '*.btl'))

cruise_data = cruise.manual_parse(filename_list)

Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd001.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd003.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd004.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd005.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd006.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd007.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd008.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd009.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd010.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd011.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd012.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd013.btl
Processing /Users/bell/ecoraid/2019/CTDcasts/dy1904/rawconverted/ctd014.btl
Processing /

## Load csv Nutrient File

In [87]:
nut_data = pd.read_csv(nutdatafile,delimiter='\t')
nut_data

Unnamed: 0,Cast,Niskin,PO4 (uM),PO4 Flag,Sil (uM),Sil Flag,NO3 (uM),NO3 Flag,NO2 (uM),NO2 Flag,NH4 (uM),NH4 Flag
0,11,1,0.989,2,19.6,2,10.0,2,0.22,2,0.35,2
1,11,2,0.991,2,20.0,2,9.6,2,0.21,2,0.32,2
2,11,3,0.944,2,19.6,2,9.9,2,0.22,2,0.32,2
3,11,4,0.998,2,19.6,2,10.1,2,0.22,2,0.31,2
4,11,5,1.014,2,19.7,2,10.1,2,0.22,2,0.28,2
...,...,...,...,...,...,...,...,...,...,...,...,...
211,41,3,1.264,2,20.6,2,10.5,2,0.15,2,0.71,2
212,41,4,1.239,2,20.9,2,10.2,2,0.15,2,0.65,2
213,41,5,1.223,2,21.6,2,11.3,2,0.16,2,0.71,2
214,41,6,1.221,2,22.0,2,11.5,2,0.19,2,0.83,2


## Merge Bottle and Nutrient Data but drop non nutrient vars?

In [105]:
keep_param = ['bottle','prdm']
# keep_param = ['bottle','prsm']

for cast,cdata in cruise_data.items():
    try:
        matchcast = int((cast.split('.')[0]).lower().split('ctd')[-1])
        cruise_data[cast] = pd.merge(nut_data[nut_data['Cast']==matchcast],cdata.reset_index()[keep_param],right_on='bottle',left_on='Niskin').set_index('bottle').drop(columns=['Cast'])
    except:
        continue

## Add Deployment meta information

In [108]:
#just a dictionary of dictionaries - simple
with open(cruise_meta_file) as file:
    cruise_config = yaml.full_load(file)

## Add Instrument meta information

Time, depth, lat, lon should be added regardless (always our coordinates) but for a mooring site its going to be a (1,1,1,t) dataset
The variables of interest should be read from the data file and matched to a key for naming.  That key is in the inst_config file seen below and should represent common conversion names in the raw data

In [109]:
with open(inst_meta_file) as file:
    inst_config = yaml.full_load(file)

## Add institutional meta-information


In [110]:
with open(group_meta_file) as file:
    group_config = yaml.full_load(file)

## Save CF Netcdf files

Currently stick to netcdf3 classic... but migrating to netcdf4 (default) may be no problems for most modern purposes.  Its easy enough to pass the `format` kwargs through to the netcdf api of xarray.

In [111]:
#loop over all casts and perform tasks shown above

for cast in cruise_data.keys():
    try:
        cruise_data[cast] = cruise_data[cast].rename(columns={
                            'Sil (uM)':'SI',
                            'PO4 (uM)':'PO4',
                            'NO2 (uM)':'NO2', 
                            'NO3 (uM)':'NO3',
                            'NH4 (uM)':'NH4',
                            'Sil Flag':'SI_WOCE_FLAG',
                            'PO4 Flag':'PO4_WOCE_FLAG',
                            'NO2 Flag':'NO2_WOCE_FLAG', 
                            'NO3 Flag':'NO3_WOCE_FLAG',
                            'NH4 Flag':'NH4_WOCE_FLAG',
                            'Niskin':'BTLID',
                            'prdm':'pressure',
                            'empty':'empty', #this will be ignored
                            'flag':'flag'})

        cruise_data_nc = ncCFsave.EcoFOCI_CFnc(df=cruise_data[cast], 
                                    instrument_yaml=inst_config, 
                                    operation_yaml=cruise_config,
                                    operation_type='ctd')

        cruise_data_nc.expand_dimensions(dim_names=['latitude','longitude','time'],geophys_sort=False)

        cruise_data_nc.variable_meta_data(variable_keys=list(cruise_data[cast].columns.values),drop_missing=False)
        #adding dimension meta needs to come after updating the dimension values... BUG?
        cruise_data_nc.dimension_meta_data(variable_keys=['time','latitude','longitude'])
        cruise_data_nc.temporal_geospatioal_meta_data_ctd(positiveE=False,conscastno=cast.split('.')[0])

        #add global attributes
        cruise_data_nc.deployment_meta_add(conscastno=cast.split('.')[0].upper())

        #add instituitonal global attributes
        cruise_data_nc.institution_meta_add(group_config)

        #add creation date/time - provenance data
        cruise_data_nc.provinance_meta_add()

        #provide intial qc status field
        cruise_data_nc.qc_status(qc_status='excellent') #<- options are unknown, excellent, probably good, mixed, unqcd

        cast = cast.lower().split('d')[-1].split('.')[0]
        cruise_data_nc.xarray2netcdf_save(xdf = cruise_data_nc.get_xdf(),
                                   filename=cruise_name+'c'+cast.zfill(3)+'_nut.nc',format="NETCDF3_CLASSIC")
    except:
        print(f'Skipping {cast}')

Skipping 001
Skipping 003
Skipping 004
Skipping 005
Skipping 006
Skipping 007
Skipping 008
Skipping 009
Skipping 010
Skipping 014
Skipping 042
Skipping 043
Skipping 044
Skipping 045
Skipping 046
Skipping 047
Skipping 048
Skipping 049
Skipping 051
Skipping 052


## Next Steps

QC of data (plot parameters with other instruments)
- be sure to updated the qc_status and the history