### SBD Stitcher Program for Popup Buoys

Allows the retrieval of a specific file but not fully implemented wget

__pyversion__==3.7 ***tested***  
__author__==S.Bell

In [13]:
source_url = 'http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/'

***Unix command line for cronjobs***

`wget http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/ -np -r -nc -R "index.html*"`

### Test reassembly of files

In [14]:
import os
import glob #python >= 3.5

import datetime

import pandas as pd

In [15]:
# read hex incoded binary
def HexView(file):
    with open(file, 'rb') as in_file:
        while True:
            hexdata = in_file.read().hex().upper()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(hexdata.upper())

# convert identifying bytes for processing
def HexIdent(hexstr):
    if hexstr[0:4] == '0001':
        message_source = 'summary.txt'
    elif hexstr[0:4] == '0002':
        message_source = 'prodat.txt'
    elif hexstr[0:4] == '0003':
        message_source = 'icedat.txt'
    elif hexstr[0:4] == '0004':
        message_source = 'botdat.txt'
    elif hexstr[0:4] == 'FFFE':
        message_source = 'sstdat.txt'        
    else:
        message_source = 'image{}.txt'.format(int(hexstr[0:4],16))        
        
    return(message_source)        
        
def Summary(hexstr):
    '''SUMMARY.TXT'''
    
    PositionInFile = int(hexstr[4:12],16)
    ProfileDataSize = int(hexstr[12:20],16)
    IceDataSize = int(hexstr[20:28],16) 
    BottomDataSize = int(hexstr[28:36],16)
    NumberofImages = int(hexstr[36:40],16)
    
    return({'PositionInFile':PositionInFile, 'ProfileDataSize':ProfileDataSize, 
            'IceDataSize': IceDataSize, 'BottomDataSize':BottomDataSize, 'NumberofImages':NumberofImages})

def Data(hexstr):
    ''' PRODAT.TXT
        ICEDAT.TXT
        SSTDAT.TXT
        BOTDAT.TXT
    '''
    
    PositionInFile = int(hexstr[4:12],16)
    Data = hexstr[12:]
    
    return(PositionInFile, Data)

def Image(file):
    '''{IMAGENUMER}.JPG'''
    
    with open(file, 'rb') as in_file:
        while True:
            header = in_file.read(8).hex()
            hexdata = in_file.read()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(header,hexdata)
    



In [16]:
#list id's in download path
data_path = '..//ecofoci.popupsbd'
results = '..//results'

id_dir = os.listdir(data_path)
print(id_dir)

# year deployed - year of data - imei number - site: SBD Active/Inactive 

# 2018 - 2019 - 300434063921240 - C2: Inactive
# 2018 - 2019 - 300434063823800 - C10/C11: Inactive
# 2018 - 2019 - 300434063928220 - C12: Inactive
# 2018 - 2019 - 300434063925210 - M5: Inactive

# 2019 - 2020 - 300434063470010 - S.W. of M5: Inative
# 2019 - 2020 - 300434063477010 - N.W. of M5: Inactive
# 2019 - 2020 - 300434063861360 - N.E. of Saint Lawrence: Inactive

id_dir=['300434063861360']


['300434063470010', '300434063477010', '300434063861360']


In [17]:
# cycle through each id
#  Within each ID is a sequential transmission number.  Within the transmissions there may be multiple
#   sample periods.  Unfortunately, the only way to identify these is when a file restarts and the 
#   byte position is 0.  Thus, to get the most recent (and therefore most likely desired) sample period
#   one must progress in reverse order through the files to find the transimission number with a 0 position.
#
# Filelist needs to be sorted into sequential files

#year = str(datetime.datetime.now().year)
year = '2020'

begin_index = {}
verbose = False

for msg_id_dir in id_dir:

    #find summary information which provides record information - exit loop after summary is found
    for filename in sorted(glob.iglob(data_path +'/'+ msg_id_dir + '/' + year + '/*.sbd*', recursive=True), reverse=True):
        hexstr = HexView(filename)

        if (HexIdent(hexstr)) == 'summary.txt':
            begin_index.update({msg_id_dir: filename.split('.sbd')[0]})
            break



    if not os.path.exists(results + '/' + msg_id_dir + '/' + year + '/' + 'camera_output'):
            os.makedirs(results + '/' + msg_id_dir + '/' + year + '/' + 'camera_output')
    else:
        files = glob.glob(results + '/' + msg_id_dir + '/' + year + '/*')
        for f in files:
            os.remove(f)
            
    read_datafiles=False

    for filename in sorted(glob.iglob(data_path + '/' + msg_id_dir + '/' + year + '/*.sbd*', recursive=True), reverse=False):

        try: 
            if filename.split('.sbd')[0] == begin_index[msg_id_dir]:
                read_datafiles = True
        except KeyError:
            print("Directory {} does not have a summary file".format(msg_id_dir))

        if not read_datafiles:
            continue

        print(filename.split('ecofoci.popupsbd')[-1])

        hexstr = HexView(filename)

        if verbose:
            print(HexIdent(hexstr))
            
        if (HexIdent(hexstr)) == 'summary.txt':
            print(Summary(hexstr))

        elif (HexIdent(hexstr)) == 'prodat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(results + '/' + msg_id_dir + '/' + year +'/PRODAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))               

        elif (HexIdent(hexstr)) == 'sstdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(results + '/' + msg_id_dir + '/' + year + '/SSTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'icedat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(results + '/' + msg_id_dir + '/' + year + '/ICEDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'botdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(results + '/' + msg_id_dir + '/' + year + '/BOTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        else:

            print("image file")
            header, hexdata = Image(filename)

            try:

                with open(results + '/' + msg_id_dir + '/' + year + '/' + '/{:05d}.jpg'.format(int(header[12:16],16)), 'rb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.truncate()
                    out_file.write((hexdata))

            except FileNotFoundError:

                with open(results + '/' + msg_id_dir + '/' + year + '/' +  '/{:05d}.jpg'.format(int(header[12:16],16)), 'wb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.truncate()
                    out_file.write((hexdata))
                

/300434063861360/2020\300434063861360_000060.sbd.201105_133042
{'PositionInFile': 0, 'ProfileDataSize': 4727, 'IceDataSize': 17, 'BottomDataSize': 161534, 'NumberofImages': 0}
/300434063861360/2020\300434063861360_000061.sbd.201105_153417
image file
/300434063861360/2020\300434063861360_000062.sbd.201106_002244
image file
/300434063861360/2020\300434063861360_000063.sbd.201106_024433
image file
/300434063861360/2020\300434063861360_000064.sbd.201106_071226
image file
/300434063861360/2020\300434063861360_000065.sbd.201106_091648
image file
/300434063861360/2020\300434063861360_000066.sbd.201106_113900
image file
/300434063861360/2020\300434063861360_000067.sbd.201106_134223
image file
/300434063861360/2020\300434063861360_000068.sbd.201106_152819
image file
/300434063861360/2020\300434063861360_000069.sbd.201106_221332
image file
/300434063861360/2020\300434063861360_000070.sbd.201107_022022
image file
/300434063861360/2020\300434063861360_000071.sbd.201107_050329
image file
/300434063

### Build MSG summary File - latlons and status messages

example

    MOMSN: 49
    MTMSN: 0
    Time of Session (UTC): Sun Mar  3 00:13:51 2019
    Session Status: 13 - Incomplete Transfer
    Message Size (bytes): 0

    Unit Location: Lat = 59.89769 Long = -171.24297
    CEPradius = 2

In [18]:
#year = str(datetime.datetime.now().year)
year = "2020"

position_only = True
good_gps = True


for msg_id_dir in id_dir:
    #find summary information which provides record information - exit loop after summary is found
    print(msg_id_dir)

    with open(results + '/' + msg_id_dir + '/' + year + '/message_position_' + year +'.csv', 'a') as out_file:
        out_file.write("IMEI_message_id,datetime,latitude,longitude,CEPradius\n")
    
    
    for filename in sorted(glob.iglob(data_path + '/' + msg_id_dir + '/' + year + '/*.msg*', recursive=True), reverse=False):
        with open(filename, 'r') as in_file:
            for line in in_file:
                if 'MOMSN:' in line:
                    MOMSN = line.split()[-1]
                elif 'MTMSN:' in line:
                    MTMSN = line.split()[-1]
                elif 'Time' in line:
                    TimeUTC = line.split(': ')[-1].strip()
                    datetimeutc = datetime.datetime.strptime(TimeUTC,'%a %b  %d %H:%M:%S %Y')
                elif 'Status:' in line:
                    Status = line.split(': ')[-1].strip()
                elif 'Message Size' in line:
                    MSize = line.split()[-1]
                elif 'Unit Location:' in line:
                    lat = line.split()[4]
                    lon = line.split()[7]
                elif 'CEPradius' in line:
                    rad = line.split()[-1]
                else:
                    continue
            if position_only:
                if good_gps:
                    if int(rad) < 10:
                        with open(results + '/' + msg_id_dir +  '/' + year +  '/message_position_' + year +'.csv', 'a') as out_file:
                            out_file.write("{},{},{},{},{}\n".format(msg_id_dir,datetimeutc,lat,lon,rad))   
                else:
                    with open(results + '/' + msg_id_dir +  '/' + year + '/message_position_' + year +'.csv', 'a') as out_file:
                        out_file.write("{},{},{},{},{}\n".format(msg_id_dir,datetimeutc,lat,lon,rad)) 
            else:
                print(MOMSN,MTMSN,datetimeutc,Status,MSize,lat,lon,rad)
                

300434063861360
