### SBD Stitcher Program for Popup Buoys

Allows the retrieval of a specific file but not fully implemented wget

__pyversion__==3.7 ***tested***  
__author__==S.Bell

In [1]:
source_url = 'http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/'

***Unix command line for cronjobs***

`wget http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/ -np -r -nc -R "index.html*"`

### Test reassembly of files

In [2]:
import os
import glob #python >= 3.5

import datetime

import pandas as pd

In [3]:
# read hex incoded binary
def HexView(file):
    with open(file, 'rb') as in_file:
        while True:
            hexdata = in_file.read().hex().upper()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(hexdata.upper())

# convert identifying bytes for processing
def HexIdent(hexstr):
    if hexstr[0:4] == '0001':
        message_source = 'summary.txt'
    elif hexstr[0:4] == '0002':
        message_source = 'prodat.txt'
    elif hexstr[0:4] == '0003':
        message_source = 'icedat.txt'
    elif hexstr[0:4] == '0004':
        message_source = 'botdat.txt'
    elif hexstr[0:4] == 'FFFE':
        message_source = 'sstdat.txt'        
    else:
        message_source = 'image{}.txt'.format(int(hexstr[0:4],16))        
        
    return(message_source)        
        
def Summary(hexstr):
    '''SUMMARY.TXT'''
    
    PositionInFile = int(hexstr[4:12],16)
    ProfileDataSize = int(hexstr[12:20],16)
    IceDataSize = int(hexstr[20:28],16) 
    BottomDataSize = int(hexstr[28:36],16)
    NumberofImages = int(hexstr[36:40],16)
    
    return({'PositionInFile':PositionInFile, 'ProfileDataSize':ProfileDataSize, 
            'IceDataSize': IceDataSize, 'BottomDataSize':BottomDataSize, 'NumberofImages':NumberofImages})

def Data(hexstr):
    ''' PRODAT.TXT
        ICEDAT.TXT
        SSTDAT.TXT
        BOTDAT.TXT
    '''
    
    PositionInFile = int(hexstr[4:12],16)
    Data = hexstr[12:]
    
    return(PositionInFile, Data)

def Image(file):
    '''{IMAGENUMER}.JPG'''
    
    with open(file, 'rb') as in_file:
        while True:
            header = in_file.read(8).hex()
            hexdata = in_file.read()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(header,hexdata)
    



In [4]:
#list id's in download path
root_path = '/Users/bell/ecoraid/SBD_DataRetrieval/eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/'

id_dir = os.listdir(root_path)
id_dir

# comment below to do all id's
id_dir=['300434063823800']


In [5]:
# cycle through each id
#  Within each ID is a sequential transmission number.  Within the transmissions there may be multiple
#   sample periods.  Unfortunately, the only way to identify these is when a file restarts and the 
#   byte position is 0.  Thus, to get the most recent (and therefore most likely desired) sample period
#   one must progress in reverse order through the files to find the transimission number with a 0 position.
#
# Filelist needs to be sorted into sequential files

year = str(datetime.datetime.now().year)

begin_index = {}
verbose = False

for msg_id_dir in id_dir:

    #find summary information which provides record information - exit loop after summary is found
    for filename in sorted(glob.iglob(root_path + msg_id_dir + '/' + year + '/*.sbd*', recursive=True), reverse=True):
        hexstr = HexView(filename)

        if (HexIdent(hexstr)) == 'summary.txt':
            begin_index.update({msg_id_dir: filename.split('.sbd')[0]})
            break



    if not os.path.exists(msg_id_dir+ '/' + year):
        os.makedirs(msg_id_dir+ '/' + year)
    else:
        files = glob.glob(msg_id_dir+ '/' + year + '/*')
        for f in files:
            os.remove(f)
            
    read_datafiles=False

    for filename in sorted(glob.iglob(root_path + msg_id_dir + '/' + year + '/*.sbd*', recursive=True), reverse=False):

        try: 
            if filename.split('.sbd')[0] == begin_index[msg_id_dir]:
                read_datafiles = True
        except KeyError:
            print("Directory {} does not have a summary file".format(msg_id_dir))

        if not read_datafiles:
            continue

        print(filename.split('ecofoci.popupsbd')[-1])

        hexstr = HexView(filename)

        if verbose:
            print(HexIdent(hexstr))
            
        if (HexIdent(hexstr)) == 'summary.txt':
            print(Summary(hexstr))

        elif (HexIdent(hexstr)) == 'prodat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year +'/PRODAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))               

        elif (HexIdent(hexstr)) == 'sstdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year + '/SSTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'icedat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year + '/ICEDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'botdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year + '/BOTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        else:

            print("image file")
            header, hexdata = Image(filename)

            try:

                with open(msg_id_dir + '/' + year + '/{:05d}.jpg'.format(int(header[12:16],16)), 'rb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.truncate()
                    out_file.write((hexdata))

            except FileNotFoundError:

                with open(msg_id_dir + '/' + year + '/{:05d}.jpg'.format(int(header[12:16],16)), 'wb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.truncate()
                    out_file.write((hexdata))
                

/300434063823800/2019/300434063823800_000004.sbd.190507_015806
{'PositionInFile': 0, 'ProfileDataSize': 4727, 'IceDataSize': 2040, 'BottomDataSize': 108749, 'NumberofImages': 5}
/300434063823800/2019/300434063823800_000005.sbd.190507_015908
/300434063823800/2019/300434063823800_000006.sbd.190507_022423
image file
/300434063823800/2019/300434063823800_000007.sbd.190507_022529
/300434063823800/2019/300434063823800_000008.sbd.190507_022547
/300434063823800/2019/300434063823800_000009.sbd.190507_031024
image file
/300434063823800/2019/300434063823800_000010.sbd.190507_031138
/300434063823800/2019/300434063823800_000011.sbd.190507_031155
/300434063823800/2019/300434063823800_000012.sbd.190507_031208
/300434063823800/2019/300434063823800_000013.sbd.190507_051354
image file
/300434063823800/2019/300434063823800_000014.sbd.190507_053708
image file
/300434063823800/2019/300434063823800_000015.sbd.190507_053718
/300434063823800/2019/300434063823800_000016.sbd.190507_053729
/300434063823800/2019/

### Build MSG summary File - latlons and status messages

example

    MOMSN: 49
    MTMSN: 0
    Time of Session (UTC): Sun Mar  3 00:13:51 2019
    Session Status: 13 - Incomplete Transfer
    Message Size (bytes): 0

    Unit Location: Lat = 59.89769 Long = -171.24297
    CEPradius = 2

In [6]:
year = str(datetime.datetime.now().year)

position_only = True
good_gps = True

for msg_id_dir in id_dir:
    print(msg_id_dir)
    #find summary information which provides record information - exit loop after summary is found
    for filename in sorted(glob.iglob(root_path + msg_id_dir + '/' + year + '/*.msg*', recursive=True), reverse=False):
        with open(filename, 'r') as in_file:
            for line in in_file:
                if 'MOMSN:' in line:
                    MOMSN = line.split()[-1]
                elif 'MTMSN:' in line:
                    MTMSN = line.split()[-1]
                elif 'Time' in line:
                    TimeUTC = line.split(': ')[-1].strip()
                    datetimeutc = datetime.datetime.strptime(TimeUTC,'%a %b  %d %H:%M:%S %Y')
                elif 'Status:' in line:
                    Status = line.split(': ')[-1].strip()
                elif 'Message Size' in line:
                    MSize = line.split()[-1]
                elif 'Unit Location:' in line:
                    lat = line.split()[4]
                    lon = line.split()[7]
                elif 'CEPradius' in line:
                    rad = line.split()[-1]
                else:
                    continue
            if position_only:
                if good_gps:
                    if int(rad) < 10:
                        print("{},{},{},{}".format(datetimeutc,lat,lon,rad))
                else:
                    print("{},{},{},{}".format(datetimeutc,lat,lon,rad))
            else:
                print(MOMSN,MTMSN,datetimeutc,Status,MSize,lat,lon,rad)
                

300434063823800
2019-05-07 08:57:57,70.77815,-167.15230,3
2019-05-07 08:58:06,70.77815,-167.15230,3
2019-05-07 08:59:05,70.77059,-167.04617,3
2019-05-07 10:10:23,70.77815,-167.15230,4
2019-05-07 10:11:35,70.77815,-167.15230,3
2019-05-07 10:11:48,70.77059,-167.04617,3
2019-05-07 10:12:04,70.77059,-167.04617,3
2019-05-07 12:13:53,70.77815,-167.15230,4
2019-05-07 12:37:07,70.74513,-167.17646,2
2019-05-07 12:37:15,70.77815,-167.15230,2
2019-05-07 12:37:25,70.77815,-167.15230,2
2019-05-07 12:37:53,70.75261,-167.28249,2
2019-05-07 12:38:15,70.77815,-167.15230,2
2019-05-07 12:38:26,70.77815,-167.15230,2
2019-05-07 12:38:47,70.77815,-167.15230,2
2019-05-07 12:38:59,70.77815,-167.15230,2
2019-05-07 12:39:14,70.77815,-167.15230,2
2019-05-07 13:05:12,70.77815,-167.15230,3
2019-05-07 13:05:22,70.77815,-167.15230,3
2019-05-07 13:06:31,70.78565,-167.25851,3
2019-05-07 13:59:59,70.75261,-167.28249,6
2019-05-07 14:00:12,70.77815,-167.15230,6
2019-05-07 14:00:25,70.77815,-167.15230,5
2019-05-07 14:00:4