### SBD Stitcher Program for Popup Buoys

Allows the retrieval of a specific file but not fully implemented wget

__pyversion__==3.7 ***tested***  
__author__==S.Bell

In [13]:
source_url = 'http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/'

***Unix command line for cronjobs***

`wget http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/ -np -r -nc -R "index.html*"`

### Test reassembly of files

In [14]:
import os
import glob #python >= 3.5

import datetime

import pandas as pd

In [15]:
# read hex incoded binary
def HexView(file):
    with open(file, 'rb') as in_file:
        while True:
            hexdata = in_file.read().hex().upper()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(hexdata.upper())

# convert identifying bytes for processing
def HexIdent(hexstr):
    if hexstr[0:4] == '0001':
        message_source = 'summary.txt'
    elif hexstr[0:4] == '0002':
        message_source = 'prodat.txt'
    elif hexstr[0:4] == '0003':
        message_source = 'icedat.txt'
    elif hexstr[0:4] == '0004':
        message_source = 'botdat.txt'
    elif hexstr[0:4] == 'FFFE':
        message_source = 'sstdat.txt'        
    else:
        message_source = 'image{}.txt'.format(int(hexstr[0:4],16))        
        
    return(message_source)        
        
def Summary(hexstr):
    '''SUMMARY.TXT'''
    
    PositionInFile = int(hexstr[4:12],16)
    ProfileDataSize = int(hexstr[12:20],16)
    IceDataSize = int(hexstr[20:28],16) 
    BottomDataSize = int(hexstr[28:36],16)
    NumberofImages = int(hexstr[36:40],16)
    
    return({'PositionInFile':PositionInFile, 'ProfileDataSize':ProfileDataSize, 
            'IceDataSize': IceDataSize, 'BottomDataSize':BottomDataSize, 'NumberofImages':NumberofImages})

def Data(hexstr):
    ''' PRODAT.TXT
        ICEDAT.TXT
        SSTDAT.TXT
        BOTDAT.TXT
    '''
    
    PositionInFile = int(hexstr[4:12],16)
    Data = hexstr[12:]
    
    return(PositionInFile, Data)

def Image(file):
    '''{IMAGENUMER}.JPG'''
    
    with open(file, 'rb') as in_file:
        while True:
            header = in_file.read(8).hex()
            hexdata = in_file.read()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(header,hexdata)
    



In [16]:
#list id's in download path
root_path = '/Users/bell/ecoraid/SBD_DataRetrieval/eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/'

id_dir = os.listdir(root_path)
id_dir

['300434063823800',
 '300434063925210',
 '300434063826810',
 '300434063924230',
 '300434063921240',
 '300434063928220']

In [17]:
# cycle through each id
#  Within each ID is a sequential transmission number.  Within the transmissions there may be multiple
#   sample periods.  Unfortunately, the only way to identify these is when a file restarts and the 
#   byte position is 0.  Thus, to get the most recent (and therefore most likely desired) sample period
#   one must progress in reverse order through the files to find the transimission number with a 0 position.
#
# Filelist needs to be sorted into sequential files

year = str(datetime.datetime.now().year)

begin_index = {}
verbose = True

for msg_id_dir in id_dir:

    #find summary information which provides record information - exit loop after summary is found
    for filename in sorted(glob.iglob(root_path + msg_id_dir + '/' + year + '/*.sbd*', recursive=True), reverse=True):
        hexstr = HexView(filename)

        if (HexIdent(hexstr)) == 'summary.txt':
            begin_index.update({msg_id_dir: filename.split('.sbd')[0]})
            break



    if not os.path.exists(msg_id_dir+ '/' + year):
        os.makedirs(msg_id_dir+ '/' + year)
    else:
        files = glob.glob(msg_id_dir+ '/' + year + '/*')
        for f in files:
            os.remove(f)
            
    read_datafiles=False

    for filename in sorted(glob.iglob(root_path + msg_id_dir + '/' + year + '/*.sbd*', recursive=True), reverse=False):

        try: 
            if filename.split('.sbd')[0] == begin_index[msg_id_dir]:
                read_datafiles = True
        except KeyError:
            print("Directory {} does not have a summary file".format(msg_id_dir))

        if not read_datafiles:
            continue

        print(filename.split('ecofoci.popupsbd')[-1])

        hexstr = HexView(filename)

        if verbose:
            print(HexIdent(hexstr))
            
        if (HexIdent(hexstr)) == 'summary.txt':
            print(Summary(hexstr))

        elif (HexIdent(hexstr)) == 'prodat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year +'/PRODAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))               

        elif (HexIdent(hexstr)) == 'sstdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year + '/SSTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'icedat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year + '/ICEDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'botdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year + '/BOTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        else:

            print("image file")
            header, hexdata = Image(filename)

            try:

                with open(msg_id_dir + '/' + year + '/{:05d}.jpg'.format(int(header[12:16],16)), 'rb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.truncate()
                    out_file.write((hexdata))

            except FileNotFoundError:

                with open(msg_id_dir + '/' + year + '/{:05d}.jpg'.format(int(header[12:16],16)), 'wb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.truncate()
                    out_file.write((hexdata))
                

/300434063925210/2019/300434063925210_000040.sbd.190302_160401
summary.txt
{'PositionInFile': 0, 'ProfileDataSize': 4727, 'IceDataSize': 391, 'BottomDataSize': 65501, 'NumberofImages': 1}
/300434063925210/2019/300434063925210_000041.sbd.190302_160413
prodat.txt
/300434063925210/2019/300434063925210_000042.sbd.190302_160424
prodat.txt
/300434063925210/2019/300434063925210_000043.sbd.190302_160437
prodat.txt
/300434063925210/2019/300434063925210_000044.sbd.190302_160512
prodat.txt
/300434063925210/2019/300434063925210_000045.sbd.190302_160740
prodat.txt
/300434063925210/2019/300434063925210_000046.sbd.190302_160750
prodat.txt
/300434063925210/2019/300434063925210_000047.sbd.190302_160856
prodat.txt
/300434063925210/2019/300434063925210_000048.sbd.190302_161044
prodat.txt
/300434063925210/2019/300434063925210_000049.sbd.190303_073405
image65535.txt
image file
/300434063925210/2019/300434063925210_000050.sbd.190303_073417
prodat.txt
/300434063925210/2019/300434063925210_000051.sbd.190303_0

### Build MSG summary File - latlons and status messages

example

    MOMSN: 49
    MTMSN: 0
    Time of Session (UTC): Sun Mar  3 00:13:51 2019
    Session Status: 13 - Incomplete Transfer
    Message Size (bytes): 0

    Unit Location: Lat = 59.89769 Long = -171.24297
    CEPradius = 2

In [18]:
year = str(datetime.datetime.now().year)

position_only = True
good_gps = True

for msg_id_dir in id_dir:

    #find summary information which provides record information - exit loop after summary is found
    for filename in sorted(glob.iglob(root_path + msg_id_dir + '/' + year + '/*.msg*', recursive=True), reverse=False):
        with open(filename, 'r') as in_file:
            for line in in_file:
                if 'MOMSN:' in line:
                    MOMSN = line.split()[-1]
                elif 'MTMSN:' in line:
                    MTMSN = line.split()[-1]
                elif 'Time' in line:
                    TimeUTC = line.split(': ')[-1].strip()
                    datetimeutc = datetime.datetime.strptime(TimeUTC,'%a %b  %d %H:%M:%S %Y')
                elif 'Status:' in line:
                    Status = line.split(': ')[-1].strip()
                elif 'Message Size' in line:
                    MSize = line.split()[-1]
                elif 'Unit Location:' in line:
                    lat = line.split()[4]
                    lon = line.split()[7]
                elif 'CEPradius' in line:
                    rad = line.split()[-1]
                else:
                    continue
            if position_only:
                if good_gps:
                    if int(rad) < 10:
                        print("{},{},{},{}".format(datetimeutc,lat,lon,rad))
                else:
                    print("{},{},{},{}".format(datetimeutc,lat,lon,rad))
            else:
                print(MOMSN,MTMSN,datetimeutc,Status,MSize,lat,lon,rad)
                

2019-03-03 00:03:49,59.89294,-171.17231,3
2019-03-03 00:04:00,59.89769,-171.24297,3
2019-03-03 00:04:09,59.89769,-171.24297,2
2019-03-03 00:04:21,59.89769,-171.24297,2
2019-03-03 00:04:34,59.89769,-171.24297,2
2019-03-03 00:05:02,59.89769,-171.24297,2
2019-03-03 00:05:02,59.89769,-171.24297,2
2019-03-03 00:07:36,59.86699,-171.25384,4
2019-03-03 00:07:47,59.86224,-171.18327,5
2019-03-03 00:08:54,59.89294,-171.17231,8
2019-03-03 00:13:51,59.89769,-171.24297,2
2019-03-03 15:34:05,60.03527,-171.35209,4
2019-03-03 15:34:15,60.03058,-171.28112,4
2019-03-03 15:34:27,60.03527,-171.35209,3
2019-03-03 15:34:42,60.03058,-171.28112,3
2019-03-03 15:35:19,60.03993,-171.42308,2
2019-03-03 15:38:39,60.06134,-171.27023,6
2019-03-03 15:38:49,60.06134,-171.27023,7
2019-03-03 15:38:59,60.06134,-171.27023,8
2019-03-03 15:42:50,60.06134,-171.27023,7
2019-03-03 15:43:09,60.03058,-171.28112,6
2019-03-03 15:43:18,60.03527,-171.35209,5
2019-03-03 15:43:29,60.03527,-171.35209,5
2019-03-03 15:43:37,60.03527,-171.