### SBD Stitcher Program for Popup Buoys

Allows the retrieval of a specific file but not fully implemented wget

__pyversion__==3.7 ***tested***  
__author__==S.Bell

In [12]:
source_url = 'http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/'

***Unix command line for cronjobs***

`wget http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/ -np -r -nc -R "index.html*"`

### Test reassembly of files

In [13]:
import os
import glob #python >= 3.5

import datetime


In [14]:
# read hex incoded binary
def HexView(file):
    with open(file, 'rb') as in_file:
        while True:
            hexdata = in_file.read().hex().upper()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(hexdata.upper())

# convert identifying bytes for processing
def HexIdent(hexstr):
    if hexstr[0:4] == '0001':
        message_source = 'summary.txt'
    elif hexstr[0:4] == '0002':
        message_source = 'prodat.txt'
    elif hexstr[0:4] == '0003':
        message_source = 'icedat.txt'
    elif hexstr[0:4] == '0004':
        message_source = 'botdat.txt'
    elif hexstr[0:4] == 'FFFE':
        message_source = 'sstdat.txt'        
    else:
        message_source = 'image{}.txt'.format(int(hexstr[0:4],16))        
        
    return(message_source)        
        
def Summary(hexstr):
    '''SUMMARY.TXT'''
    
    PositionInFile = int(hexstr[4:12],16)
    ProfileDataSize = int(hexstr[12:20],16)
    IceDataSize = int(hexstr[20:28],16) 
    BottomDataSize = int(hexstr[28:36],16)
    NumberofImages = int(hexstr[36:40],16)
    
    return({'PositionInFile':PositionInFile, 'ProfileDataSize':ProfileDataSize, 
            'IceDataSize': IceDataSize, 'BottomDataSize':BottomDataSize, 'NumberofImages':NumberofImages})

def Data(hexstr):
    ''' PRODAT.TXT
        ICEDAT.TXT
        SSTDAT.TXT
        BOTDAT.TXT
    '''
    
    PositionInFile = int(hexstr[4:12],16)
    Data = hexstr[12:]
    
    return(PositionInFile, Data)

def Image(file):
    '''{IMAGENUMER}.JPG'''
    
    with open(file, 'rb') as in_file:
        while True:
            header = in_file.read(8).hex()
            hexdata = in_file.read()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(header,hexdata)
    



In [15]:
#list id's in download path
root_path = '/Users/bell/ecoraid/SBD_DataRetrieval/eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/'

id_dir = os.listdir(root_path)
id_dir

['300434063823800',
 '300434063925210',
 '300434063826810',
 '300434063924230',
 '300434063921240',
 '300434063928220']

In [19]:
# cycle through each id
#  Within each ID is a sequential transmission number.  Within the transmissions there may be multiple
#   sample periods.  Unfortunately, the only way to identify these is when a file restarts and the 
#   byte position is 0.  Thus, to get the most recent (and therefore most likely desired) sample period
#   one must progress in reverse order through the files to find the transimission number with a 0 position.
#
# Filelist needs to be sorted into sequential files

year = str(datetime.datetime.now().year)
year = '2018'
begin_index = {}

for msg_id_dir in id_dir:

    for filename in sorted(glob.iglob(root_path + msg_id_dir + '/' + year + '/*.sbd*', recursive=True), reverse=False):
        hexstr = HexView(filename)

        if (HexIdent(hexstr)) == 'summary.txt':
            begin_index.update({msg_id_dir: filename.split('.sbd')[0]})
            exitstatus = False
            break
        else:
            exitstatus = True


    if not os.path.exists(msg_id_dir+ '/' + year):
        os.makedirs(msg_id_dir+ '/' + year)
    read_datafiles=False

    for filename in sorted(glob.iglob(root_path + msg_id_dir + '/' + year + '/*.sbd*', recursive=True), reverse=False):

        try: 
            if filename.split('.sbd')[0] == begin_index[msg_id_dir]:
                read_datafiles = True
        except KeyError:
            print("Directory {} does not have a summary file".format(msg_id_dir))

        if not read_datafiles:
            continue

        print(filename.split('ecofoci.popupsbd')[-1])

        hexstr = HexView(filename)

        if (HexIdent(hexstr)) == 'summary.txt':
            print(Summary(hexstr))

        elif (HexIdent(hexstr)) == 'prodat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year +'/PRODAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.write( bytes.fromhex(outstr))               

        elif (HexIdent(hexstr)) == 'sstdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year + '/SSTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'icedat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year + '/ICEDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'botdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(msg_id_dir + '/' + year + '/BOTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.write( bytes.fromhex(outstr))

        else:

            print("image file")
            header, hexdata = Image(filename)

            try:

                with open(msg_id_dir + '/' + year + '/{:05d}.jpg'.format(int(header[12:16],16)), 'rb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.write((hexdata))

            except FileNotFoundError:

                with open(msg_id_dir + '/' + year + '/{:05d}.jpg'.format(int(header[12:16],16)), 'wb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.write((hexdata))
                

/300434063925210/2018/300434063925210_000001.sbd.180831_090648
{'PositionInFile': 0, 'ProfileDataSize': 4727, 'IceDataSize': 17, 'BottomDataSize': 289, 'NumberofImages': 0}
/300434063925210/2018/300434063925210_000002.sbd.180831_090658
/300434063925210/2018/300434063925210_000003.sbd.180831_090711
/300434063925210/2018/300434063925210_000004.sbd.180831_090724
/300434063925210/2018/300434063925210_000005.sbd.180831_090746
/300434063925210/2018/300434063925210_000006.sbd.180831_103313
image file
/300434063925210/2018/300434063925210_000007.sbd.180831_103323
/300434063925210/2018/300434063925210_000008.sbd.180831_103354
/300434063925210/2018/300434063925210_000009.sbd.180831_103405
/300434063925210/2018/300434063925210_000010.sbd.180831_103419
/300434063925210/2018/300434063925210_000011.sbd.180831_103431
/300434063925210/2018/300434063925210_000012.sbd.180831_103457
/300434063925210/2018/300434063925210_000013.sbd.180831_103613
/300434063925210/2018/300434063925210_000014.sbd.180831_1036