# Using EcoFOCIpy to process raw field data

## Cruise ID - DY2206

## BTL Data + Nutrient Data

This is a streamlined version of generation routines to merge bottle data and Mordy Nut. Lab Nutrient Data for long term archive

In [11]:
import yaml
import glob
import pandas as pd

import EcoFOCIpy.io.sbe_ctd_parser as sbe_ctd_parser #<- instrument specific
import EcoFOCIpy.io.ncCFsave as ncCFsave
import EcoFOCIpy.metaconfig.load_config as load_config

In [12]:
sample_data_dir = '/Users/bell/ecoraid/2022/CTDcasts/dy2206/' #root path to cruise directory
ecofocipy_dir = '/Users/bell/Programs/EcoFOCIpy/'

In [13]:
###############################################################
# edit to point to {cruise sepcific} raw datafiles 
datafile = sample_data_dir+'rawconverted/' #<- point to cruise and process all files within
nutdatafile = sample_data_dir+'working/DiscreteNutrients/DY2206 Nutrient Data.txt' #<- point to cruise and process all files within
cruise_name = 'dy2206' #no hyphens
cruise_meta_file = sample_data_dir+'logs/dy2206.yaml'
inst_meta_file = sample_data_dir+'logs/FOCI_standard_CTDpNutsWOCE.yaml' #<- copy to each deployment for simplicity?
group_meta_file = ecofocipy_dir+'staticdata/institutional_meta_example.yaml'
###############################################################

#init and load data
cruise = sbe_ctd_parser.sbe_btl()
filename_list = sorted(glob.glob(datafile + '*.btl'))

cruise_data = cruise.manual_parse(filename_list)

Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd001.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd002.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd003.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd004.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd005.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd006.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd007.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd008.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd009.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd010.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd011.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd012.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/dy2206/rawconverted/ctd013.btl
Processing /

## Load csv Nutrient File

In [14]:
nut_data = pd.read_csv(nutdatafile,delimiter='\t')
nut_data

Unnamed: 0,Cast,Niskin,PO4 (uM),PO4 Flag,Sil (uM),Sil Flag,NO3 (uM),NO3 Flag,NO2 (uM),NO2 Flag,NH4 (uM),NH4 Flag
0,1,7,0.426,2,3.9,2,0.3,2,0.05,2,0.33,2
1,1,6,0.528,2,5.2,2,1.7,2,0.08,2,0.12,2
2,1,5,0.972,2,13.5,2,7.7,2,0.22,2,0.52,2
3,1,4,1.041,2,15.4,2,8.9,2,0.22,2,0.65,2
4,1,3,1.068,2,15.9,2,9.2,2,0.23,2,0.72,2
...,...,...,...,...,...,...,...,...,...,...,...,...
180,29,5,0.776,2,6.9,2,0.5,2,0.04,2,0.09,2
181,29,4,0.747,2,6.0,2,1.7,2,0.04,2,0.38,2
182,29,3,1.206,2,16.6,2,6.8,2,0.06,2,0.96,2
183,29,2,1.400,2,15.9,2,6.7,2,0.05,2,0.95,2


## Rare Bottle File Edits

<div class="warning" style='background-color:#ffcccb; color: #FF0000; border-left: solid #805AD5 4px; border-radius: 4px; padding:0.7em;'>
<span>
<p style='margin-top:1em; text-align:center'>
<b>WARNING</b></p>
<p style='margin-left:1em;'>bottle/niskin and rosette position should be the same but can be different (example, bottles are labeled sequentially but a rosette position is skipped due to balancing or other instruments.  On this cruise the following rosette positin was fired, while the bottles where labled differently</p>
cruise_data[cast]
<style type="text/css">
.tg  {border-collapse:collapse;border-spacing:0;}
.tg td{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px;
  overflow:hidden;padding:10px 5px;word-break:normal;}
.tg th{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px;
  font-weight:normal;overflow:hidden;padding:10px 5px;word-break:normal;}
.tg .tg-0lax{text-align:left;vertical-align:top}
</style>
<table class="tg">
<thead>
  <tr>
    <th class="tg-0lax"><span style="font-weight:bold">Rosette</span></th>
    <th class="tg-0lax">1</th>
    <th class="tg-0lax">12</th>
    <th class="tg-0lax">11</th>
    <th class="tg-0lax">10</th>
    <th class="tg-0lax">9</th>
    <th class="tg-0lax">8</th>
    <th class="tg-0lax">7<br></th>
    <th class="tg-0lax">6</th>
    <th class="tg-0lax">5</th>
    <th class="tg-0lax">4</th>
    <th class="tg-0lax">3</th>
    <th class="tg-0lax">2</th>
  </tr>
</thead>
<tbody>
  <tr>
    <td class="tg-0lax"><span style="font-weight:bold">Niskin</span><br></td>
    <td class="tg-0lax">1</td>
    <td class="tg-0lax">2</td>
    <td class="tg-0lax">3</td>
    <td class="tg-0lax">4</td>
    <td class="tg-0lax">5</td>
    <td class="tg-0lax">6</td>
    <td class="tg-0lax">7</td>
    <td class="tg-0lax">8</td>
    <td class="tg-0lax">9</td>
    <td class="tg-0lax">10<br></td>
    <td class="tg-0lax">11</td>
    <td class="tg-0lax">12</td>
  </tr>
</tbody>
</table>
</div>

In [15]:
nisk_btl = pd.DataFrame(data=[[1,1],[2,12],[3,11],[4,10],[5,9],[6,8],[7,7],[8,6],[9,5],[10,4],[11,3],[12,2]],columns=['bottle','rosette']) 
for cast in cruise_data.keys():
    cruise_data[cast] = pd.merge(nisk_btl, cruise_data[cast], left_on='rosette', right_on='bottle').drop('rosette',axis=1).set_index('bottle')      

## Merge Bottle and Nutrient Data but drop non nutrient vars?

In [16]:
keep_param = ['bottle','prdm']
# keep_param = ['bottle','prsm']

for cast,cdata in cruise_data.items():
    try:
        matchcast = int((cast.split('.')[0]).lower().split('ctd')[-1])
        cruise_data[cast] = pd.merge(nut_data[nut_data['Cast']==matchcast],cdata.reset_index()[keep_param],right_on='bottle',left_on='Niskin').set_index('bottle').drop(columns=['Cast'])
    except:
        continue

## Add Deployment meta information

In [17]:
#just a dictionary of dictionaries - simple
with open(cruise_meta_file) as file:
    cruise_config = yaml.full_load(file)

## Add Instrument meta information

Time, depth, lat, lon should be added regardless (always our coordinates) but for a mooring site its going to be a (1,1,1,t) dataset
The variables of interest should be read from the data file and matched to a key for naming.  That key is in the inst_config file seen below and should represent common conversion names in the raw data

In [18]:
with open(inst_meta_file) as file:
    inst_config = yaml.full_load(file)

## Add institutional meta-information


In [19]:
with open(group_meta_file) as file:
    group_config = yaml.full_load(file)

## Save CF Netcdf files

Currently stick to netcdf3 classic... but migrating to netcdf4 (default) may be no problems for most modern purposes.  Its easy enough to pass the `format` kwargs through to the netcdf api of xarray.

In [20]:
#loop over all casts and perform tasks shown above

for cast in cruise_data.keys():
    try:
        cruise_data[cast] = cruise_data[cast].rename(columns={
                            'Sil (uM)':'SI',
                            'PO4 (uM)':'PO4',
                            'NO2 (uM)':'NO2', 
                            'NO3 (uM)':'NO3',
                            'NH4 (uM)':'NH4',
                            'Sil Flag':'SI_WOCE_FLAG',
                            'PO4 Flag':'PO4_WOCE_FLAG',
                            'NO2 Flag':'NO2_WOCE_FLAG', 
                            'NO3 Flag':'NO3_WOCE_FLAG',
                            'NH4 Flag':'NH4_WOCE_FLAG',
                            'Niskin':'BTLID',
                            'prdm':'pressure',
                            'empty':'empty', #this will be ignored
                            'flag':'flag'})

        cruise_data_nc = ncCFsave.EcoFOCI_CFnc(df=cruise_data[cast], 
                                    instrument_yaml=inst_config, 
                                    operation_yaml=cruise_config,
                                    operation_type='ctd')

        cruise_data_nc.expand_dimensions(dim_names=['latitude','longitude','time'],geophys_sort=False)

        cruise_data_nc.variable_meta_data(variable_keys=list(cruise_data[cast].columns.values),drop_missing=False)
        #adding dimension meta needs to come after updating the dimension values... BUG?
        cruise_data_nc.dimension_meta_data(variable_keys=['time','latitude','longitude'])
        cruise_data_nc.temporal_geospatioal_meta_data_ctd(positiveE=False,conscastno=cast.split('.')[0])

        #add global attributes
        cruise_data_nc.deployment_meta_add(conscastno=cast.split('.')[0].upper())

        #add instituitonal global attributes
        cruise_data_nc.institution_meta_add(group_config)

        #add creation date/time - provenance data
        cruise_data_nc.provinance_meta_add()

        #provide intial qc status field
        cruise_data_nc.qc_status(qc_status='excellent') #<- options are unknown, excellent, probably good, mixed, unqcd

        cast = cast.lower().split('d')[-1].split('.')[0]
        cruise_data_nc.xarray2netcdf_save(xdf = cruise_data_nc.get_xdf(),
                                   filename=cruise_name+'c'+cast.zfill(3)+'_nut.nc',format="NETCDF3_CLASSIC")
    except:
        print(f'Skipping {cast}')

Skipping 006
Skipping 014
Skipping 015


## Next Steps

QC of data (plot parameters with other instruments)
- be sure to updated the qc_status and the history