# Seabird CTD (.cnv) to CF-Netcdf files for PMEL/EcoFOCI


__pyversion__ == 3.7   
__author__ == S.Bell  
__date__ == Feb 20, 2019

*Description*

Using as many community tools as is available, ingest seabird cnv files and output CF/COARDS compliant NetCDF files for archiving and editing.  

Standards:
CF/COARDS (1.7) - http://cfconventions.org/   
ACDD (1.3) - Data Discovery (sits on top of CF standards - http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery)   

__Meta Information__ should come from sources in the following order:

+ cnv files
+ database files (over-rules cnv files when flagged)
+ yaml based config files (over-rules database and cnv files when flagged)

In [82]:

import ctd
import pandas as pd
import xarray as xa
import yaml

# science stack
import numpy as np


## Information
Read and display the contents of our test file.  The ctd (python-ctd - https://github.com/pyoceans/python-ctd) software will ingest the contents of the cnv file into a pandas table

In [148]:
test_file = 'data/test'
ctd_df = ctd.DataFrame.from_cnv(test_file+'.cnv')

  self.header = header
  self.config = config


In [149]:
ctd_df.describe()

Unnamed: 0,c0mS/cm,c1mS/cm,flECO-AFL,sbeox0V,t090C,t190C,timeS,sbeox1V,par,turbWETntu0,...,sal11,sigma-t00,sigma-t11,sbeox0ML/L,sbox0Mm/Kg,sbeox0PS,sbeox1ML/L,sbox1Mm/Kg,sbeox1PS,nbin
count,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,...,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0
mean,29.769978,29.772047,0.789513,2.488628,3.610676,3.610854,341.44903,2.59619,22.77178,0.7787,...,32.068264,25.489587,25.491396,7.24503,315.520418,97.113104,7.308734,318.294388,97.968955,36.985075
std,0.006822,0.006822,0.089931,0.041992,0.004228,0.00412,92.84064,0.006939,54.862609,0.239176,...,0.000206,0.000442,0.000454,0.145682,6.34442,1.956144,0.00974,0.42384,0.135408,16.090174
min,29.753597,29.756555,0.4821,2.3835,3.6057,3.6059,112.124,2.5825,0.20983,0.6036,...,32.0679,25.4886,25.4902,6.878,299.536,92.19,7.2834,317.191,97.628,14.0
25%,29.765546,29.76735,0.78045,2.4788,3.6079,3.6081,287.5195,2.591,0.212345,0.64895,...,32.0681,25.4895,25.49125,7.236,315.1275,96.9875,7.3025,318.024,97.886,25.5
50%,29.768302,29.770389,0.8143,2.5041,3.609,3.6094,341.452,2.5952,1.2341,0.6851,...,32.0682,25.4897,25.4915,7.2973,317.799,97.818,7.3086,318.29,97.966,35.0
75%,29.774869,29.777149,0.83885,2.51775,3.612,3.6123,394.2645,2.6016,13.5115,0.77615,...,32.06835,25.4899,25.4917,7.328,319.135,98.227,7.31545,318.5865,98.056,44.5
max,29.783384,29.785494,0.8836,2.5467,3.6213,3.6215,543.732,2.61,304.01,1.6697,...,32.0688,25.4904,25.4923,7.5336,328.086,100.977,7.3321,319.312,98.302,95.0


In [150]:
rossum = ctd.rosette_summary(test_file+'.ros')
rossum.groupby(rossum.bpos)['t090C', 't190C', 'c0mS/cm', 'c1mS/cm', 'depSM'].mean()

Unnamed: 0_level_0,t090C,t190C,c0mS/cm,c1mS/cm,depSM
bpos,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,3.609163,3.609482,29.782319,29.784502,64.804408
2,3.609247,3.609541,29.782231,29.784364,64.552878
3,3.609673,3.610008,29.77666,29.778806,50.764306
4,3.609122,3.609414,29.772157,29.774314,41.653102
5,3.611108,3.611592,29.768367,29.770653,29.452673
6,3.611212,3.611373,29.767733,29.76985,27.565694
7,3.611386,3.612029,29.767226,29.769664,26.311959
8,3.632188,3.632131,29.779031,29.780918,14.123327
9,3.6183,3.620363,29.765324,29.768815,9.006898
10,3.626857,3.627402,29.770237,29.772524,3.911878


## The following is the meta information in the cnv files

In [70]:
print("Metadata from Header")
print("lon:{longitude}".format(longitude=ctd_df.longitude))
print("lat:{latitude}".format(latitude=ctd_df.latitude))
print("time:{time}".format(time=ctd_df.time))

Metadata from Header
lon:-164.05033333333333
lat:56.871833333333335
time:2018-04-30 19:11:51


In [71]:
print("name:{name}".format(name=ctd_df.name))

name:data


In [73]:
ctd_df.config

['# nquan = 23',
 '# nvalues = 67',
 '# units = specified',
 '# name 0 = c0mS/cm: Conductivity [mS/cm]',
 '# name 1 = c1mS/cm: Conductivity, 2 [mS/cm]',
 '# name 2 = flECO-AFL: Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]',
 '# name 3 = sbeox0V: Oxygen raw, SBE 43 [V]',
 '# name 4 = t090C: Temperature [ITS-90, deg C]',
 '# name 5 = t190C: Temperature, 2 [ITS-90, deg C]',
 '# name 6 = timeS: Time, Elapsed [seconds]',
 '# name 7 = prDM: Pressure, Digiquartz [db]',
 '# name 8 = sbeox1V: Oxygen raw, SBE 43, 2 [V]',
 '# name 9 = par: PAR/Irradiance, Biospherical/Licor',
 '# name 10 = turbWETntu0: Turbidity, WET Labs ECO [NTU]',
 '# name 11 = sal00: Salinity, Practical [PSU]',
 '# name 12 = sal11: Salinity, Practical, 2 [PSU]',
 '# name 13 = sigma-t00: Density [sigma-t, kg/m^3 ]',
 '# name 14 = sigma-t11: Density, 2 [sigma-t, kg/m^3 ]',
 '# name 15 = sbeox0ML/L: Oxygen, SBE 43 [ml/l], WS = 2',
 '# name 16 = sbox0Mm/Kg: Oxygen, SBE 43 [umol/kg], WS = 2',
 '# name 17 = sbeox0PS: Oxygen, SBE 43 [

In [75]:
ctd_df.header

['* Sea-Bird SBE 9 Data File:',
 '* FileName = C:\\Sea-Bird Data\\2018\\DY-18-05\\CTD\\CTD001.hex',
 '* Software Version Seasave V 7.23.2',
 '* Temperature SN = 4379',
 '* Conductivity SN = 042985',
 '* Number of Bytes Per Scan = 37',
 '* Number of Voltage Words = 4',
 '* Number of Scans Averaged by the Deck Unit = 1',
 '* System UpLoad Time = Apr 30 2018 19:11:53',
 '* NMEA Latitude = 56 52.31 N',
 '* NMEA Longitude = 164 03.02 W',
 '* NMEA UTC (Time) = Apr 30 2018  19:11:51',
 '* Store Lat/Lon Data = Append to Every Scan',
 '* SBE 11plus V 5.2',
 '* number of scans to average = 1',
 '* pressure baud rate = 9600',
 '* NMEA baud rate = 4800',
 '* Latitude/Longitude added to scan',
 '* GPIB address = 1',
 '* advance primary conductivity  0.073 seconds',
 '* advance secondary conductivity  0.073 seconds',
 '* autorun on power up is disabled',
 '* S>',
 '* System UTC = Apr 30 2018 19:11:53',
 '*END*']

## Using the file metadata, prefill out global/variable config data

This will primarily be -
+ From header
    - Sea-Bird system (line 0)
    - original filename (line 1)
    - Software Version (line 2)
    - (lat, lon, time if from_cnv is True)
+ From config
    - sensor serial number and cal-date -> variable meta
+ From Database of cast logs (even if piped to config file first)
    - bottom_depth, max_depth 
    - lat, lon, time if from_database is True
+ From yaml_config (see test.yaml)
    - variable names (long_name, standard_name, units)
    - institution name
    - contact information
    - lat, lon, time if from_config is True (unlikely as this would be in database)
    - title (ACDD 1.3)
    - summary (ACDD 1.3)
    - keywords 
    - conventions (ACDD 1.3, CF 1.7)
    - history
    - standard_name_vocabulary ()

**To Think About**
- is station name, castnumber, watermass - a variable or an attribute? (or both... maybe best)
- flags? and flag_values

## From Pandas to XArray

use pandas variable names to identify variable meta information from config file

In [151]:
var_config= yaml.load(open('data/test_profile_variables.yaml'))

In [152]:
ctd_df.index.rename('depth', inplace=True) #units are dbar

for cname in ctd_df.columns:
    if cname in list(var_config.keys()):
        ctd_df.rename(columns = {cname:var_config[cname]['name']}, inplace = True)
        
drop_vars = ['nbin','flag']

In [153]:
ctd_xa = ctd_df.to_xarray()

ValueError: cannot convert DataFrame with non-unique columns

In [147]:
ctd_xa
#ctd_xa.to_netcdf(test_file+'.nc')

<xarray.Dataset>
Dimensions:                                    (depth: 67)
Coordinates:
  * depth                                      (depth) float64 1.0 2.0 ... 67.0
Data variables:
    sea_water_electrical_conductivity_1        (depth) float64 29.75 ... 29.78
    sea_water_electrical_conductivity_2        (depth) float64 29.76 ... 29.79
    concentration_of_chlorophyll_in_sea_water  (depth) float64 0.4821 ... 0.8792
    sbeox0V                                    (depth) float64 2.52 ... 2.525
    sea_water_temperature_1                    (depth) float64 3.609 ... 3.61
    sea_water_temperature_2                    (depth) float64 3.611 ... 3.61
    timeS                                      (depth) float64 112.1 ... 543.7
    sbeox1V                                    (depth) float64 2.594 ... 2.583
    par                                        (depth) float64 304.0 ... 0.21
    turbWETntu0                                (depth) float64 0.6675 ... 1.67
    sea_water_salinity_1   