In [1]:
import pandas as pd

In [2]:
fn = 'Sta0001.cnv'

In [31]:
joinline.split()[5:]

NameError: name 'joinline' is not defined

In [33]:
str.join??

[0;31mSignature:[0m [0mstr[0m[0;34m.[0m[0mjoin[0m[0;34m([0m[0mself[0m[0;34m,[0m [0miterable[0m[0;34m,[0m [0;34m/[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Concatenate any number of strings.

The string whose method is called is inserted in between each given string.
The result is returned as a new string.

Example: '.'.join(['ab', 'pq', 'rs']) -> 'ab.pq.rs'
[0;31mType:[0m      method_descriptor

In [37]:
col_longname = ' '.join(line.split()[5:])

In [38]:
col_longname

'Pressure, Digiquartz [db]'

In [149]:
with open(fn, 'r') as f:

    hkeys = ['col_nums', 'col_names', 'col_longnames', 'SN_info', 'moon_pool', 'SBEproc_hist', 'hdr_end_line', 'latitude', 'longitude', 'time']
    
    hdict = {hkey:[] for hkey in hkeys}
    start_read_history = False # Flag that will be turned on when we read the SBE history section 

    for n_line, line in enumerate(f.readlines()):

        # Read the column header info (which variable is in which data column)
        if '# name' in line:
            hdict['col_nums'] += [int(line.split()[2])]
            hdict['col_names'] += [line.split()[4]]
            hdict['col_longnames'] += [' '.join(line.split()[5:])]            

        # Read NMEA lat/lon/time
        if 'NMEA Latitude' in line:
            hdict['latitude'] = _nmea_lat_to_decdeg(*line.split()[-3:])
        if 'NMEA Longitude' in line:
            hdict['longitude'] = _nmea_lon_to_decdeg(*line.split()[-3:])

        if 'NMEA UTC' in line:

            nmea_time_split = line.split()[-4:]
            hdict['time'] = _nmea_time_to_datetime(*nmea_time_split)
            
        # Read serial numbers
        if ' SN' in line:
            sn_info_str = ' '.join(line.split()[1:])
            hdict['SN_info'] += [sn_info_str]

        # Read moon pool info
        if 'Skuteside' in line:
            mp_str = line.split()[-1]
            if mp_str == 'M':
                hdict['moon_pool'] = True
            elif mp_str == 'S':
                hdict['moon_pool'] = False

        # At the end of the SENSORS section: read the history lines
        if '</Sensors>' in line:
            start_read_history = True
            
        if start_read_history:
            hdict['SBEproc_hist'] += [line] 
        
        # Read the line containing the END string
        # (and stop reading the file after that)
        if '*END*' in line:
            hdict['hdr_end_line'] = n_line
            break

In [150]:
hdict

{'col_nums': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
 'col_names': ['prDM:',
  't090C:',
  't190C:',
  'c0S/m:',
  'c1S/m:',
  'sbox0Mm/Kg:',
  'wetCDOM:',
  'flECO-AFL:',
  'CStarAt0:',
  'timeS:',
  'latitude:',
  'longitude:',
  'sal00:',
  'sal11:',
  'flag:'],
 'col_longnames': ['Pressure, Digiquartz [db]',
  'Temperature [ITS-90, deg C]',
  'Temperature, 2 [ITS-90, deg C]',
  'Conductivity [S/m]',
  'Conductivity, 2 [S/m]',
  'Oxygen, SBE 43 [umol/kg]',
  'Fluorescence, WET Labs CDOM [mg/m^3]',
  'Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]',
  'Beam Attenuation, WET Labs C-Star [1/m]',
  'Time, Elapsed [seconds]',
  'Latitude [deg]',
  'Longitude [deg]',
  'Salinity, Practical [PSU]',
  'Salinity, Practical, 2 [PSU]',
  'flag'],
 'SN_info': ['Temperature SN = 5884', 'Conductivity SN = 2860'],
 'moon_pool': False,
 'SBEproc_hist': ['# </Sensors>\n',
  '# datcnv_date = Oct 05 2023 12:14:33, 7.26.7.129 [datcnv_vars = 12]\n',
  '# datcnv_in = Z:\\cruise_data\\ctd_classic_

In [133]:
nmeatime = '* NMEA UTC (Time) = Jan 05 2021  15:58:23'

nmea_time_str = ' '.join(nmea_time_split)
pd.to_datetime(nmea_time_str, format = '%b %d %Y %H:%M:%S')

In [141]:
nmea_time_split

['Jan', '05', '2021', '15:58:23']

In [135]:
import pandas as pd

In [140]:
pd.to_datetime(nmea_time_str)

Timestamp('2021-01-05 15:58:23')

In [139]:
pd.to_datetime(nmea_time_str, format = '%b %d %Y %H:%M:%S')

Timestamp('2021-01-05 15:58:23')

In [134]:
nmea_time_str

'Jan 05 2021 15:58:23'

In [143]:
def _nmea_time_to_datetime(mon, da, yr, hms):
    '''
    Convert NMEA time to datetime timestamp.
    
    E.g.:
    
    ['Jan', '05', '2021', '15:58:23']  --> Timestamp('2021-01-05 15:58:23')
    '''
    nmea_time_str = ' '.join(nmea_time_split)
    nmea_time_dt = pd.to_datetime(nmea_time_str, format = '%b %d %Y %H:%M:%S')
    
    return nmea_time_dt

In [146]:
_nmea_time_to_datetime(*nmea_time_split)

Timestamp('2021-01-05 15:58:23')

In [117]:
def _nmea_lon_to_decdeg(deg_str, min_str, EW_str):
    '''
    Convert NMEA longitude to decimal degrees longitude.
    
    E.g.:
    
    ['006', '02.87', 'E'] (string) --> 6.04783333 (float)
    '''

    if EW_str=='E':
        dec_sign = 1
    elif EW_str=='W':
        dec_sign = -1

    decdeg = int(deg_str) + float(min_str)/60 

    return decdeg*dec_sign


def _nmea_lat_to_decdeg(deg_str, min_str, NS_str):
    '''
    Convert NMEA latitude to decimal degrees latitude.
    
    E.g.:
    
    ['69', '03.65', 'S'] (string) --> -69.060833 (float)
    '''
    
    if NS_str=='N':
        dec_sign = 1
    elif NS_str=='S':
        dec_sign = -1

    decdeg = int(deg_str) + float(min_str)/60 

    return decdeg*dec_sign

In [127]:
hdict

{'col_nums': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
 'col_names': ['prDM:',
  't090C:',
  't190C:',
  'c0S/m:',
  'c1S/m:',
  'sbox0Mm/Kg:',
  'wetCDOM:',
  'flECO-AFL:',
  'CStarAt0:',
  'timeS:',
  'latitude:',
  'longitude:',
  'sal00:',
  'sal11:',
  'flag:'],
 'col_longnames': ['Pressure, Digiquartz [db]',
  'Temperature [ITS-90, deg C]',
  'Temperature, 2 [ITS-90, deg C]',
  'Conductivity [S/m]',
  'Conductivity, 2 [S/m]',
  'Oxygen, SBE 43 [umol/kg]',
  'Fluorescence, WET Labs CDOM [mg/m^3]',
  'Fluorescence, WET Labs ECO-AFL/FL [mg/m^3]',
  'Beam Attenuation, WET Labs C-Star [1/m]',
  'Time, Elapsed [seconds]',
  'Latitude [deg]',
  'Longitude [deg]',
  'Salinity, Practical [PSU]',
  'Salinity, Practical, 2 [PSU]',
  'flag'],
 'SN_info': ['Temperature SN = 5884', 'Conductivity SN = 2860'],
 'moon_pool': False,
 'SBEproc_hist': ['# </Sensors>\n',
  '# datcnv_date = Oct 05 2023 12:14:33, 7.26.7.129 [datcnv_vars = 12]\n',
  '# datcnv_in = Z:\\cruise_data\\ctd_classic_