In [1]:
import glob
import os
import sys
import datetime
import numpy as np
import pandas as pd

# import struct
# More General: Can handle a wider variety of data types (integers, floats, strings, etc.) and complex data structures.
# Explicit Formatting: Provides explicit control over data types, byte order, and padding.
# Required for Complex Structures: Necessary when packing multiple data elements into a single byte string, or when you need fine-grained control over data alignment and padding

import matplotlib.pyplot as plt

In [2]:
def DF_tmp_data(file_name):
    
    with open(file_name, 'rb') as f:
        mpduPackets = f.read().split(b'\x1A\xCF\xFC\x1D')[1:]

    headers = [[], [], []]
    for k, packet in enumerate(mpduPackets):
        # classify the packets by the VCDU header
        if packet[:2] == b'\x55\x40':
            headers[0].append('IM')
            headers[2].append(packet[5:])
        elif packet[:2] == b'\x40\x3F':
            headers[0].append('HK')
            headers[2].append(packet[5:])
        else:
            continue
        
        headers[1].append(int.from_bytes(packet[2:5], 'big'))
        
    headerDF = pd.DataFrame({
        'VCDU': headers[0],
        'PSC': pd.Series(headers[1], dtype=int),
        'data': pd.Series(headers[2], dtype='object')  # Preserve binary data
    })
    
    return headerDF

def DF_raw_data(file_name):
    
    '''
    Read the raw data file and return a DataFrame containing the header information.
    Input:
        file_name: str
            The name of the raw data file.
    Output:
        dataDF: DataFrame
            The DataFrame containing the header information.
    '''
    
    with open(file_name, 'rb') as f:
        mpduPackets = f.read().split(b'\x1A\xCF\xFC\x1D')[1:]

    headers = [[], [], [], [], []]
    for k, packet in enumerate(mpduPackets):
        # classify the packets by the VCDU header
        if packet[28:30] == b'\x55\x40':
            headers[0].append('IM')
            headers[4].append(packet[56:-160])
            # headers[4].append(0)
        elif packet[28:30] == b'\x40\x3F':
            headers[0].append('HK')
            headers[4].append(packet[56:-160])
            # headers[4].append(0)
        else:
            continue
        
        headers[1].append(int.from_bytes(packet[30:33], 'big'))
        headers[2].append(packet[34])
        headers[3].append(packet[1])
        
    dataDF = pd.DataFrame({
        'VCDU': headers[0],
        'PSC': pd.Series(headers[1], dtype=int),
        'IB': headers[2],
        'DQ': pd.Series(headers[3], dtype=int),
        'data': pd.Series(headers[4], dtype='object')  # Preserve binary data
    })
    
    return dataDF

def encode_data(filename, VCDU, PSC_DF, data_DF, mode, sync_bytes=b'\x1A\xCF\xFC\x1D'):
    '''
    Used for store incomplete data.
    ------Parameters------
    filename: str
        The name of the file to write to.
    VCDU: bytes
        The VCDU header for identifying the data.
    PSC_DF: DataFrame
        The DataFrame containing the PSC values.
    data_DF: DataFrame
        The DataFrame containing the data values.
    mode: str
        The mode to open the file in. 'wb' and 'ab'.
    sync_bytes: bytes
        The sync bytes to use to separate records.
    '''
    try:
        PSC_list = PSC_DF.values.tolist()
        data_list = data_DF.values.tolist()
        with open(filename, mode) as f: 
            for i in range(0, len(data_DF)):
                f.write(sync_bytes)
                f.write(VCDU)
                # Pack the sequence count into bytes
                PSC_bytes = PSC_list[i].to_bytes(3, byteorder='big')
                f.write(PSC_bytes)
                f.write(data_list[i])
            if mode == 'wb':
                print(f"Data write to {filename}")
            else:
                print(f"Data append to {filename}")
    except Exception as e:
         print(f"Error writing to file: {e}")

def find_consecutive_ranges(lst):
    
    '''
    Find the consecutive ranges in a list of integers.
    Input:
        lst: a list of integers
    Output:
        ranges: a list of lists, each sublist contains the start and end of a consecutive range
    '''
    
    if not lst:
        return []
    
    ranges = []
    start = lst[0]
    
    for i in range(1, len(lst)):
        if lst[i] != lst[i - 1] + 1:
            ranges.append([start, lst[i - 1]])
            start = lst[i]
    
    ranges.append([start, lst[-1]])  # Append the last range
    
    return ranges

tmp_files = glob.glob('./tmp/*.bin')
mock_request = glob.glob('./requested_data/*.bin')

In [13]:
tmp_data = DF_tmp_data('/home/amos/VERTECS/python_program/x-band-software/mock_data/C20250110163605.bin')
requested_data = DF_raw_data('/home/amos/VERTECS/python_program/x-band-software/mock_data/F20250109155612.bin')
IM_mask = lambda x: (x['VCDU'] == 'IM')
HK_mask = lambda x: (x['VCDU'] == 'HK')
DQ_mask = lambda x: (x['DQ'] == 0)

IM_range = set(range(0, 16620))
try:
    HK_range = set(range(tmp_data[HK_mask]['PSC'].min(), tmp_data[HK_mask]['PSC'].max()+1)) # if the number of HK is fixed, please change the range 
except:
    HK_range = set(range(800, 8000))
missing_IM = IM_range - set(tmp_data[IM_mask(tmp_data)]['PSC'])
missing_HK = HK_range - set(tmp_data[HK_mask(tmp_data)]['PSC'])
requested_IM_PSC = requested_data[IM_mask(requested_data)&DQ_mask(requested_data)]['PSC'].isin(missing_IM)
requested_HK_PSC = requested_data[HK_mask(requested_data)&DQ_mask(requested_data)]['PSC'].isin(missing_HK)
requested_IM = requested_data[IM_mask(requested_data)&DQ_mask(requested_data)][requested_IM_PSC]
requested_HK = requested_data[HK_mask(requested_data)&DQ_mask(requested_data)][requested_HK_PSC]
# combine the data
combined_IM = pd.concat([tmp_data[IM_mask(tmp_data)], requested_IM[['VCDU', 'PSC', 'data']]]).sort_values(by='PSC')
combined_HK = pd.concat([tmp_data[HK_mask(tmp_data)], requested_HK[['VCDU', 'PSC', 'data']]]).sort_values(by='PSC')
missing_IM = IM_range - set(combined_IM['PSC'])
missing_HK = HK_range - set(combined_HK['PSC'])
missing_IM = sorted(list(missing_IM))
missing_HK = sorted(list(missing_HK))

In [15]:
len(combined_IM)

16620

In [8]:
3008*3003-9032970

54

In [None]:
DF = DF_tmp_data('/home/amos/VERTECS/python_program/x-band-software/optical/opt_frame_0001_C20250110163605.bin')
DF = DF_tmp_data
# DF = DF_tmp_data('/home/amos/VERTECS/python_program/image_check/optical/opt_frame_mock1_0010_F20250109130351.bin')

IM = DF[DF['VCDU']=='IM']
DATA = IM['data'] #data
data = bytes()
for i in range(len(IM)):
    data += DATA[i]
data_rs = data.rstrip(b'\0')
data_array = np.frombuffer(data_rs,dtype=np.uint16)

In [6]:
len(data_array)

9033024

In [None]:
IDs = ['01', '02', '02', '02', '05']
IDlist = list(set(IDs))

In [5]:
IDlist[0]

'05'

In [7]:
glob.glob('./requested_data/*.bin')

['./requested_data/F20250109141442_mock2.bin',
 './requested_data/F20250109155612_mock1.bin']

In [5]:
file_name = tmp_files[0]
tmp_data = DF_tmp_data(file_name)
requested_data = DF_raw_data(mock_request[0])
IM_mask = lambda x: (x['VCDU'] == 'IM')
HK_mask = lambda x: (x['VCDU'] == 'HK')
DQ_mask = lambda x: (x['DQ'] == 0)
IM_range = set(range(0, 16620))
HK_range = set(range(tmp_data[HK_mask]['PSC'].min(), tmp_data[HK_mask]['PSC'].max()+1)) # if the number of HK is fixed, please change the range 

In [6]:
missing_IM = IM_range - set(tmp_data[IM_mask]['PSC'])
missing_HK = HK_range - set(tmp_data[HK_mask]['PSC'])
requested_IM_PSC = requested_data[(IM_mask)]['PSC'].isin(missing_IM)
requested_HK_PSC = requested_data[HK_mask]['PSC'].isin(missing_HK)
requested_IM = requested_data[IM_mask][requested_IM_PSC]
requested_HK = requested_data[HK_mask][requested_HK_PSC]
combined_IM = pd.concat([tmp_data[IM_mask], requested_IM[['VCDU', 'PSC', 'data']]]).sort_values(by='PSC')
combined_HK = pd.concat([tmp_data[HK_mask], requested_HK[['VCDU', 'PSC', 'data']]]).sort_values(by='PSC')
missing_IM = IM_range - set(combined_IM['PSC'])
missing_HK = HK_range - set(combined_HK['PSC'])
missing_IM = sorted(list(missing_IM))
missing_HK = sorted(list(missing_HK))

In [14]:

output_IM_folder_path = "./optical/"
VCDU_image = b'\x55\x40'
VCDU_HK = b'\x40\x3F'

# determine normal mode report file
report_path = "./report/"
reports = glob.glob('./report/*.csv')
reports.sort()
fout_name = reports[-1]
if os.path.getsize(fout_name) > 1e7: # size limit of a report file is ~ 10MB
    print('The last report file is too large, create a new one.')
    dt_now = datetime.datetime.now()
    time_now = dt_now.strftime('%d_%H%M%S')
    fout_name = f'{report_path}report_{str(len(reports)).zfill(4)}_{time_now}.csv'
    with open(fout_name, 'w') as f:
        f.write('Filename,Type,Start_Packet_number,End_Packet_number,Incompleteness(100*missing/16621)\n')
else:
    print(f'Write to the last report file: {fout_name}')

# check if there are missing packets
missing_segment_IM = find_consecutive_ranges(list(missing_IM))
missing_segment_HK = find_consecutive_ranges(list(missing_HK))
if (len(missing_IM) == 0) and (len(missing_HK) == 0):
    # no missing packets, save the image data
    nfiles = len(glob.glob(output_IM_folder_path+'*.bin'))
    nfiles = str(nfiles).zfill(4)
    outfile = f'./optical/opt_frame_{nfiles}_{file_name.split("/")[-1][4:]}'  # output file name
    # write the image data to the optical folder
    encode_data(outfile, VCDU_image, combined_IM['PSC'], combined_IM['data'], 'wb')
    # append the HK data to the optical folder
    encode_data(outfile, VCDU_HK, combined_HK['PSC'], combined_HK['data'], 'ab')
    # output the report
    with open(fout_name, 'a') as f:
        f.write(f'{file_name.split('/')[-1]},OK,0,0,0\n')
else:
    outfile = f'./tmp/mock_2_{file_name.split("/")[-1]}'
    # store the incomplete image data
    encode_data(outfile, VCDU_image, combined_IM['PSC'], combined_IM['data'], 'wb')
    # append the incomplete HK data
    encode_data(outfile, VCDU_HK, combined_HK['PSC'], combined_HK['data'], 'ab')
    # output the report for the missing packets
    with open(fout_name, 'a') as f:
        for segment in missing_segment_IM:
            f.write(f'{file_name.split('/')[-1]},IM,{segment[0]},{segment[1]},{(len(missing_IM)/16621)*100}\n')
        for segment in missing_segment_HK:
            f.write(f'{file_name.split('/')[-1]},HK,{segment[0]},{segment[1]},-1\n')

Write to the last report file: ./report/report_0000_05_143846.csv
Data write to ./tmp/mock_2_tmp_F20250109130351.bin
Data append to ./tmp/mock_2_tmp_F20250109130351.bin


In [14]:
tmp_data[tmp_data['VCDU']=="HK"]['PSC'].min(), requested_data[requested_data['VCDU']=="HK"]['PSC'].min()

(675, 353)

## testing

In [4]:
with open(tmp_files[0], 'rb') as f:
    mpduPackets = f.read().split(b'\x1A\xCF\xFC\x1D')[1:]
mpduPackets[0]

b'@?\x00\x00\x00\x02\x04\x00\x04\x01\x04\xff\x03\xfe\x03\x01\x04\x02\x04\x02\x04\x01\x04\x02\x04\x01\x04\x01\x04\x00\x04\x03\x04\x00\x04\x02\x04\xff\x03\xfe\x03\x02\x04\x02\x04\x03\x04\x00\x04\x00\x04\x01\x04\x01\x04\x02\x04\x00\x04\x01\x04\x01\x04\x02\x04\x02\x04\x00\x04\x02\x04\x00\x04\x01\x04\x01\x04\x01\x04\x04\x04\x01\x04\x03\x04\x02\x04\x01\x04\x02\x04\x02\x04\x01\x04\x01\x04\xff\x03\x02\x04\xfe\x03\x02\x04\x00\x04\x01\x04\x00\x04\x04\x04\xfd\x03\x01\x04\x03\x04\xff\x03\x01\x04\x00\x04\x00\x04\x00\x04\x03\x04\x01\x04\x02\x04\x02\x04\x02\x04\x00\x04\x03\x04\x02\x04\x01\x04\x00\x04\xfe\x03\x00\x04\x02\x04\x00\x04\x02\x04\x01\x04\x01\x04\x03\x04\x01\x04\x00\x04\x02\x04\x01\x04\x02\x04\x02\x04\x02\x04\x02\x04\xff\x03\x02\x04\x03\x04\x01\x04\x04\x04\x01\x04\x02\x04\xff\x03\x01\x04\x01\x04\x01\x04\x00\x04\x02\x04\x00\x04\x00\x04\x02\x04\xff\x03\x01\x04\x01\x04\x03\x04\x02\x04\x03\x04\x02\x04\x02\x04\xff\x03\x02\x04\x02\x04\x02\x04\x02\x04\x03\x04\x01\x04\x03\x04\x01\x04\x00\x04\x03\x04

In [5]:
b'\x40\x3F', b'\x55\x40'

(b'@?', b'U@')

In [72]:
file_name = tmp_files[0]
request_name = mock_request[0]

IM_mask = lambda x: (x['VCDU'] == 'IM') & (x['DQ'] == 0)

incomplete_data = DF_data(file_name)
incomplete_IM = incomplete_data[IM_mask(incomplete_data)][['PSC', 'data']]

request_data = DF_data(request_name)
# please add other mask to filter the packets belongs to the corresponding request, should be in the header
request_IM = request_data[IM_mask(request_data)][['PSC', 'data']]

# last step
# imgData = imgData.rstrip(b'\0')

In [83]:
requested_PSC = set(range(0, 16620)) - set(incomplete_IM['PSC'].values)
requested_data = request_IM.loc[request_IM['PSC'].isin(requested_PSC)]
complete_data = pd.concat([incomplete_IM, requested_data]).sort_values(by='PSC')

In [89]:
complete_data['data'] = complete_data['data'].apply(lambda x: x.rstrip(b'\0'))

In [131]:
datalist = complete_data['data'].values.tolist()
datalist[0]

b'\x02\x04\x00\x04\x01\x04\xff\x03\xfe\x03\x01\x04\x02\x04\x02\x04\x01\x04\x02\x04\x01\x04\x01\x04\x00\x04\x03\x04\x00\x04\x02\x04\xff\x03\xfe\x03\x02\x04\x02\x04\x03\x04\x00\x04\x00\x04\x01\x04\x01\x04\x02\x04\x00\x04\x01\x04\x01\x04\x02\x04\x02\x04\x00\x04\x02\x04\x00\x04\x01\x04\x01\x04\x01\x04\x04\x04\x01\x04\x03\x04\x02\x04\x01\x04\x02\x04\x02\x04\x01\x04\x01\x04\xff\x03\x02\x04\xfe\x03\x02\x04\x00\x04\x01\x04\x00\x04\x04\x04\xfd\x03\x01\x04\x03\x04\xff\x03\x01\x04\x00\x04\x00\x04\x00\x04\x03\x04\x01\x04\x02\x04\x02\x04\x02\x04\x00\x04\x03\x04\x02\x04\x01\x04\x00\x04\xfe\x03\x00\x04\x02\x04\x00\x04\x02\x04\x01\x04\x01\x04\x03\x04\x01\x04\x00\x04\x02\x04\x01\x04\x02\x04\x02\x04\x02\x04\x02\x04\xff\x03\x02\x04\x03\x04\x01\x04\x04\x04\x01\x04\x02\x04\xff\x03\x01\x04\x01\x04\x01\x04\x00\x04\x02\x04\x00\x04\x00\x04\x02\x04\xff\x03\x01\x04\x01\x04\x03\x04\x02\x04\x03\x04\x02\x04\x02\x04\xff\x03\x02\x04\x02\x04\x02\x04\x02\x04\x03\x04\x01\x04\x03\x04\x01\x04\x00\x04\x03\x04\x03\x04\x00\x

In [146]:
tmp_encode_IM2hex(
    f'./tmp/{file_name.split("/")[-1]}',
    b'\x40\x3F',
    complete_data['PSC'],
    complete_data['data']
    )

True

In [151]:
file_name = '/home/amos/VERTECS/python_program/image_check/tmp/tmp_F20250109130351.bin'
with open(file_name, 'rb') as f:
        mpduPackets = f.read().split(b'\x1A\xCF\xFC\x1D')[1:]

In [147]:
with open(f'./tmp/{file_name.split("/")[-1]}', 'rb') as f:
    mpduPackets = f.read().split(b'\x1A\xCF\xFC\x1D')[1:]

In [148]:
mpduPackets

[b'@?\x00\x00\x00\x02\x04\x00\x04\x01\x04\xff\x03\xfe\x03\x01\x04\x02\x04\x02\x04\x01\x04\x02\x04\x01\x04\x01\x04\x00\x04\x03\x04\x00\x04\x02\x04\xff\x03\xfe\x03\x02\x04\x02\x04\x03\x04\x00\x04\x00\x04\x01\x04\x01\x04\x02\x04\x00\x04\x01\x04\x01\x04\x02\x04\x02\x04\x00\x04\x02\x04\x00\x04\x01\x04\x01\x04\x01\x04\x04\x04\x01\x04\x03\x04\x02\x04\x01\x04\x02\x04\x02\x04\x01\x04\x01\x04\xff\x03\x02\x04\xfe\x03\x02\x04\x00\x04\x01\x04\x00\x04\x04\x04\xfd\x03\x01\x04\x03\x04\xff\x03\x01\x04\x00\x04\x00\x04\x00\x04\x03\x04\x01\x04\x02\x04\x02\x04\x02\x04\x00\x04\x03\x04\x02\x04\x01\x04\x00\x04\xfe\x03\x00\x04\x02\x04\x00\x04\x02\x04\x01\x04\x01\x04\x03\x04\x01\x04\x00\x04\x02\x04\x01\x04\x02\x04\x02\x04\x02\x04\x02\x04\xff\x03\x02\x04\x03\x04\x01\x04\x04\x04\x01\x04\x02\x04\xff\x03\x01\x04\x01\x04\x01\x04\x00\x04\x02\x04\x00\x04\x00\x04\x02\x04\xff\x03\x01\x04\x01\x04\x03\x04\x02\x04\x03\x04\x02\x04\x02\x04\xff\x03\x02\x04\x02\x04\x02\x04\x02\x04\x03\x04\x01\x04\x03\x04\x01\x04\x00\x04\x03\x0

In [150]:
mpduPackets[10][:2] == b'\x40\x3F'

True

In [140]:
int.from_bytes(mpduPackets[10][:3], 'big')

10

In [None]:
headers = [[], [], [], []]
for k, packet in enumerate(mpduPackets):
    # classify the packets by the VCDU header
    if packet[28:30] == b'\x55\x40':
        headers[0].append('IM')
    elif packet[28:30] == b'\x40\x3F':
        headers[0].append('HK')
        hk.append(packet.hex())
    else:
        headers[0].append('UnClassified')
    
    headers[1].append(int.from_bytes(packet[30:33], 'big'))
    headers[2].append(packet[34])
    headers[3].append(packet[1])

In [121]:
# data = np.fromfile('mock_full_storage.csv', dtype=np.uint16)
# with open('your_file.csv', 'r') as file:
#     data = file.read()
#     decoded_data = codecs.decode(data.encode(), 'unicode_escape')
#     print(decoded_data)
with open('mock_full_storage.csv', 'r') as f:
    txt_data = f.read().split(',')[1:]
    
data = [codecs.decode(item.encode(), 'unicode_escape') for item in txt_data]
    

In [None]:
with open('mock_full_storage.csv', 'rb') as f:
    data = f.read()