# Using EcoFOCIpy to process raw field data

## BTL Data + Oxygen Data

This is a streamlined version of generation routines to merge bottle data and Mordy Nut. Lab Nutrient Data for long term archive

In [84]:
import yaml
import glob
import pandas as pd
import os
import xarray as xa

import EcoFOCIpy.io.sbe_ctd_parser as sbe_ctd_parser #<- instrument specific
import EcoFOCIpy.io.ncCFsave as ncCFsave
import EcoFOCIpy.metaconfig.load_config as load_config

In [85]:
sample_data_dir = '/Users/bell/ecoraid/2013/CTDcasts/aq1301/' #root path to cruise directory
ecofocipy_dir = '/Users/bell/Programs/EcoFOCIpy/'

In [86]:
###############################################################
# point to netcdf btl files
datafile = sample_data_dir+'working/AQ1301_Bottle_Data.csv' #<- point to cruise and process all files within
oxydatafile = sample_data_dir+'working/DiscreteOxygen/AQ1301 Oxygen Data.txt' #<- point to cruise and process all files within
cruise_name = 'aq1301' #no hyphens
cruise_meta_file = sample_data_dir+'logs/aq1301.yaml'
inst_meta_file = sample_data_dir+'logs/FOCI_standard_CTDpOxy.yaml' #<- copy to each deployment for simplicity?
group_meta_file = ecofocipy_dir+'staticdata/institutional_meta_example.yaml'
###############################################################

In [87]:
cdata = pd.read_csv(datafile,delimiter=',')
cdata=cdata.dropna(how='all',axis=1)

In [88]:
cdata

Unnamed: 0,cruise,Cast,yyyy-mm-dd hh:mm,bottle,prdm
0,AQ1301,1,8/23/13 21:30,1,45.616
1,AQ1301,1,8/23/13 21:30,2,40.525
2,AQ1301,1,8/23/13 21:30,3,30.490
3,AQ1301,1,8/23/13 21:30,4,20.780
4,AQ1301,1,8/23/13 21:30,5,10.462
...,...,...,...,...,...
249,AQ1301,52,9/14/13 8:00,3,40.396
250,AQ1301,52,9/14/13 8:00,4,31.057
251,AQ1301,52,9/14/13 8:00,5,20.401
252,AQ1301,52,9/14/13 8:00,6,10.470


## Load csv Oxygen File

In [89]:
oxy_data = pd.read_csv(oxydatafile,delimiter='\t')
# oxy_data.drop('Cruise',axis=1,inplace=True)
oxy_data.columns=["Cast","Niskin","O2 uM/l"]
# oxy_data["O2 uM/l"] = oxy_data["O2 uM/l"] * 44.66
oxy_data

Unnamed: 0,Cast,Niskin,O2 uM/l
0,43,1,301.899159
1,45,4,338.982326
2,41,5,378.455244
3,38,1,248.92482
4,36,6,356.867349
5,33,1,270.366512
6,31,6,358.43043
7,28,8,352.419398
8,26,1,353.367925
9,16,1,297.128819


## Merge Bottle and Nutrient Data but drop non nutrient vars?

In [90]:
keep_param = ['bottle','prdm','Cast']

cruise_data = pd.merge(oxy_data,cdata.reset_index()[keep_param],right_on=['Cast','bottle'],left_on=['Cast','Niskin']).set_index('bottle')

In [91]:
cruise_data.sort_values(['Cast','Niskin'])

Unnamed: 0_level_0,Cast,Niskin,O2 uM/l,prdm
bottle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6,4,6,334.227517,2.885
1,7,1,344.309863,38.644
5,13,5,401.302944,1.962
1,16,1,297.128819,29.464
5,19,5,385.475708,1.96
1,22,1,261.378152,44.944
1,26,1,353.367925,91.175
8,28,8,352.419398,1.955
6,31,6,358.43043,1.76
1,33,1,270.366512,49.214


## Add Deployment meta information

In [92]:
#just a dictionary of dictionaries - simple
with open(cruise_meta_file) as file:
    cruise_config = yaml.full_load(file)

In [93]:
with open(inst_meta_file) as file:
    inst_oxy_config = yaml.full_load(file)

## Add institutional meta-information


In [94]:
with open(group_meta_file) as file:
    group_config = yaml.full_load(file)

## Save CF Netcdf files

Currently stick to netcdf3 classic... but migrating to netcdf4 (default) may be no problems for most modern purposes.  Its easy enough to pass the `format` kwargs through to the netcdf api of xarray.

In [95]:
for ind,cast in cruise_data.groupby('Cast'):
    print(cast)

        Cast  Niskin     O2 uM/l   prdm
bottle                                 
6          4       6  334.227517  2.885
        Cast  Niskin     O2 uM/l    prdm
bottle                                  
1          7       1  344.309863  38.644
        Cast  Niskin     O2 uM/l   prdm
bottle                                 
5         13       5  401.302944  1.962
        Cast  Niskin     O2 uM/l    prdm
bottle                                  
1         16       1  297.128819  29.464
        Cast  Niskin     O2 uM/l  prdm
bottle                                
5         19       5  385.475708  1.96
        Cast  Niskin     O2 uM/l    prdm
bottle                                  
1         22       1  261.378152  44.944
        Cast  Niskin     O2 uM/l    prdm
bottle                                  
1         26       1  353.367925  91.175
        Cast  Niskin     O2 uM/l   prdm
bottle                                 
8         28       8  352.419398  1.955
        Cast  Niskin    O2 uM/l

In [96]:
#loop over all casts and perform tasks shown above

for ind,cast in cruise_data.groupby('Cast'):
    try:
        cast = cast.rename(columns={
                            'O2 (uM)':'O2',
                            'O2, uM/l':'O2',
                            'O2 uM/l':'O2',
                            'Niskin':'BTLID',
                            'prdm':'pressure',
                            'empty':'empty', #this will be ignored
                            'flag':'flag'})
    
        cruise_data_nc = ncCFsave.EcoFOCI_CFnc(df=cast, 
                                    instrument_yaml=inst_oxy_config, 
                                    operation_yaml=cruise_config,
                                    operation_type='ctd')
    
        cruise_data_nc.expand_dimensions(dim_names=['latitude','longitude','time'],geophys_sort=False)
    
        cruise_data_nc.variable_meta_data(variable_keys=list(cast.columns.values),drop_missing=False)
        #adding dimension meta needs to come after updating the dimension values... BUG?
        cruise_data_nc.dimension_meta_data(variable_keys=['time','latitude','longitude'])
        cruise_data_nc.temporal_geospatioal_meta_data_ctd(positiveE=False,conscastno=f'CTD{str(ind).zfill(3)}')
    
        #add global attributes
        cruise_data_nc.deployment_meta_add(conscastno=f'CTD{str(ind).zfill(3)}')
    
        #add instituitonal global attributes
        cruise_data_nc.institution_meta_add(group_config)
    
        #add creation date/time - provenance data
        cruise_data_nc.provinance_meta_add()
    
        #provide intial qc status field
        cruise_data_nc.qc_status(qc_status='excellent') #<- options are unknown, excellent, probably good, mixed, unqcd
    
        cruise_data_nc.xarray2netcdf_save(xdf = cruise_data_nc.get_xdf(),
                                   filename=f'{cruise_name}c{str(ind).zfill(3)}_oxy.nc',format="NETCDF3_CLASSIC")
    except KeyError:
        print(f'Skipping {ind}')
    except RuntimeError:
        print(f'Skipping & Removing {ind}')
        os.remove(path=f'{cruise_name}c{str(ind).zfill(3)}_oxy.nc')

  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days since 1900-01-01'}})
  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days since 1900-01-01'}})
  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days since 1900-01-01'}})
  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days since 1900-01-01'}})
  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days since 1900-01-01'}})
  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days since 1900-01-01'}})
  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days since 1900-01-01'}})
  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days since 1900-01-01'}})
  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days since 1900-01-01'}})
  xdf.to_netcdf(filename,format=kwargs['format'],encoding={'time':{'units':'days s

## Next Steps

QC of data (plot parameters with other instruments)
- be sure to updated the qc_status and the history