In [6]:
import os
import pandas as pd
from datetime import datetime
import numpy as np

# Home folder path
home_folder = 'C:\\Users\\hanshil\\Documents\\GitHub\\biocarbon_nrt_data_viz'

# File name
fname = 'navis_102'

# Input and output folder paths
in_folder = os.path.join(home_folder, 'Data', 'navis', fname, 'raw')
csv_folder = os.path.join(home_folder, 'Data', 'navis', fname, 'csv_files')

# Change directory to input folder
os.chdir(in_folder)

# Get list of all .msg files in the directory
files = [f for f in os.listdir() if f.endswith('.msg')]

# Initialize empty DataFrames
ftab_all = pd.DataFrame()
fbintab_all = pd.DataFrame()


In [5]:

# Iterate over each file
for fix, file in enumerate(files):
    os.chdir(in_folder)
    
    # Read the content of the file
    with open(file, 'r') as f:
        bufraw = f.read()
    
    # Find start and end indices for hex data
    start_ix = bufraw.find('pH: 720-10285') + 15 + 49
    if start_ix == -1:
        start_ix = bufraw.find('pH: 720-10290') + 15 + 49
    
    end_ix = bufraw.find('Resm') - 1 - 53 - 53
    if start_ix != -1 and end_ix != -1:
        # Extract date and parse it
        date_startix = bufraw.find('terminated: ') + 16
        date_str = bufraw[date_startix:date_startix + 20]
        ptime = datetime.strptime(date_str, '%b %d %H:%M:%S %Y')
        
        # Extract latitude and longitude
        lon_startix = bufraw.find('Fix:  ') + 5
        lon_startix = bufraw.find('Fix:  ', lon_startix)
        plon = float(bufraw[lon_startix:lon_startix + 9])
        plat = float(bufraw[lon_startix + 9:lon_startix + 17])
        
        # Extract binary data section
        bindat_startix = bufraw.find('(Park Sample)') + 14
        bindat_endix = bufraw.find('#')
        buf_bin = bufraw[bindat_startix:bindat_endix].splitlines()
        
        # Convert binary data to DataFrame
        fbintab = pd.DataFrame({
            'mtime': [ptime] * len(buf_bin),
            'pnum': [fix] * len(buf_bin),
            'lat': [plat] * len(buf_bin),
            'lon': [plon] * len(buf_bin)
        })
        
        for ii, line in enumerate(buf_bin):
            fbintab.loc[ii, 'pres'] = float(line[0:9])
            fbintab.loc[ii, 'T'] = float(line[11:18])
            fbintab.loc[ii, 'S'] = float(line[19:25])
            fbintab.loc[ii, 'NO3'] = float(line[26:31])
            fbintab.loc[ii, 'O2ph'] = float(line[32:38])
            fbintab.loc[ii, 'O2tV'] = float(line[39:47])
            fbintab.loc[ii, 'mcoms1'] = float(line[49:54])
            fbintab.loc[ii, 'mcoms2'] = float(line[55:61])
            fbintab.loc[ii, 'mcoms3'] = float(line[62:68])
            fbintab.loc[ii, 'phVrs'] = float(line[69:78])
            fbintab.loc[ii, 'phVk'] = float(line[79:88])
            fbintab.loc[ii, 'phIb'] = float(line[89:99])
            fbintab.loc[ii, 'phIk'] = float(line[100:])
        
        # Convert hexadecimal data to DataFrame
        buf_hex = bufraw[start_ix:end_ix].splitlines()
        ftab = pd.DataFrame({
            'mtime': [ptime] * len(buf_hex),
            'pnum': [fix] * len(buf_hex),
            'lat': [plat] * len(buf_hex),
            'lon': [plon] * len(buf_hex)
        })
        
        for ii, line in enumerate(buf_hex):
            ftab.loc[ii, 'pres'] = int(line[0:4], 16) / 10
            if ftab.loc[ii, 'pres'] > 6000:
                ftab.loc[ii, 'pres'] -= int('FFFF', 16) / 10
            ftab.loc[ii, 'T'] = int(line[5:8], 16) / 1000
            ftab.loc[ii, 'C'] = int(line[9:14], 16)
            ftab.loc[ii, 'oxy1'] = int(line[15:20], 16)
            ftab.loc[ii, 'oxy2'] = int(line[21:26], 16)
            ftab.loc[ii, 'v1'] = int(line[27:28], 16)
            mcoms_count_offset = 500
            ftab.loc[ii, 'mcoms1'] = int(line[29:34], 16) - mcoms_count_offset
            if ftab.loc[ii, 'mcoms1'] == (int('FFFFFF', 16) - mcoms_count_offset):
                ftab.loc[ii, 'mcoms1'] = np.nan
            ftab.loc[ii, 'mcoms2'] = int(line[35:40], 16) - mcoms_count_offset
            if ftab.loc[ii, 'mcoms2'] == (int('FFFFFF', 16) - mcoms_count_offset):
                ftab.loc[ii, 'mcoms2'] = np.nan
            ftab.loc[ii, 'mcoms3'] = int(line[41:46], 16) - mcoms_count_offset
            if ftab.loc[ii, 'mcoms3'] == (int('FFFFFF', 16) - mcoms_count_offset):
                ftab.loc[ii, 'mcoms3'] = np.nan
            ftab.loc[ii, 'FChl'] = (ftab.loc[ii, 'mcoms1'] - 50) * 2.006E-03
            ftab.loc[ii, 'beta'] = (ftab.loc[ii, 'mcoms2'] - 49) * 3.524E-07
            ftab.loc[ii, 'FDOM'] = (ftab.loc[ii, 'mcoms3'] - 51) * 6.619E-03
            ftab.loc[ii, 'pH1'] = int(line[47:48], 16)
            ftab.loc[ii, 'pH2'] = int(line[49:54], 16)
            ftab.loc[ii, 'pH3'] = int(line[55:56], 16)
        
        # Convert MATLAB datenum to Python datetime
        ftab['mtime'] = pd.to_datetime(ftab['mtime'])
        
        # Change directory to CSV folder and save the table
        os.chdir(csv_folder)
        fname = file[5:-4] + '.csv'
        ftab.to_csv(fname, index=False)
        print(f'{fname} saved ! (thanks Nathan and Hans for that awesome piece of work.)')

15
ned in 22 seconds.
#


ValueError: time data 'ned in 22 seconds.\n#' does not match format '%b %d %H:%M:%S %Y'