In [1]:
import struct
import os

import numpy as np
import pandas as pd

In [44]:
path = './data/HL_R1_1.0'

In [45]:
class OpusParser:

    channel_dict = {
        "raman": 40
    }

    type_dict = {
        (0, 4): '<I',
        (1, 4): '<f',
        (1, 8): '<d'
    }

    def __init__(self, file, signal="raman"):
        self.file = file
        self.signal = signal

        self._check_params()

        with open(path, 'rb') as f:
            self.bin_data = f.read()
        self.header = self.bin_data[24:504]

        self.chunks = []
        self.param_chunks = []
        self.data_chunk = None
        self.params = {}
        self.data = None
        self.is_map_file = False

    def _check_params(self):
        if self.signal not in self.channel_dict.keys():
            raise ValueError("Unknown signal type")
        if not os.path.exists(self.file):
            raise FileNotFoundError(f"File {self.file} does not exist.")

    def _parse_header(self):
        self.header = [self.header[i:i+12] for i in range(0, len(self.header), 12)]

        for chunk in self.header:
            if chunk == b'\x00' * 12:
                break
            self.chunks.append({"offset": struct.unpack('<I', chunk[-4:])[0],
                                "length": struct.unpack('<I', chunk[-8:-4])[0],
                                "block": struct.unpack('<B', chunk[0:1])[0],
                                "channel": struct.unpack('<B', chunk[1:2])[0],
                                "type": struct.unpack('<B', chunk[2:3])[0]})

        self.chunks = pd.DataFrame(self.chunks)

        data_mask = self.chunks.block == 15
        param_mask = self.chunks.block == 31
        acquisition_mask = self.chunks.block == 32
        optics_mask = self.chunks.block == 96
        sample_mask = self.chunks.block == 160
        channel_mask = self.chunks.channel == self.channel_dict[self.signal]

        self.data_chunk = self.chunks[data_mask & channel_mask].iloc[0]
        self.param_chunks = [
            self.chunks[param_mask & channel_mask].iloc[0],
            self.chunks[acquisition_mask].iloc[0],
            self.chunks[optics_mask].iloc[0],
            self.chunks[sample_mask].iloc[0]
        ]


    def _parse_param_block(self, offset, length):
        param_bin = self.bin_data[offset:offset+length*4]
        i = 0

        while i < len(param_bin):
            tag = param_bin[i:i+3].decode('utf-8')
            if tag == 'END':
                break
            i += 4
            dtype = struct.unpack('<H', param_bin[i:i+2])[0]
            length = struct.unpack('<H', param_bin[i+2:i+4])[0] * 2
            i += 4
            if dtype >= 2:
                content = param_bin[i:i+length].rstrip(b'\x00').decode('utf-8')
            else:
                content = struct.unpack(self.type_dict[dtype, length], param_bin[i:i+length])[0]
            self.params[tag] = content
            i += length

    def _parse_param_blocks(self):
        for block in self.param_chunks:
            self._parse_param_block(block.offset, block.length)

    def _parse_data_block(self):
        offset = self.data_chunk.offset
        length = self.data_chunk.length
        data_bin = self.bin_data[offset:offset+length*4]

        if not self.params:
            raise ValueError('Parameter list is empty. Was \'_parse_param_blocks\' executed first?')

        if data_bin.startswith(b'\x00\x00'):
            self.is_map_file = True

        self.data = data_bin #struct.unpack('<' + 'f' * length, data_bin)

    def _parse_data_single(self):
        pass

    def _parse_data_multiple(self):
        pass

    def parse(self):
        self._parse_header()
        self._parse_param_blocks()
        self._parse_data_block()
        print(self.params)
        print(self.data[-20:])

In [46]:
parser = OpusParser(path)
parser.parse()

{'NPT': 2741, 'FXV': 440.0, 'LXV': 1810.0, 'CSF': 1.0, 'MXY': 89734.375, 'MNY': 403.2350769042969, 'DPF': 0, 'DAT': '2022/12/06', 'TIM': '12:54:04 (GMT+1)', 'DXU': 'WN', 'RLW': 12771.82442, 'RLP': 25, 'NLP': 100.0, 'LNO': -0.911214, 'LS1': 753.7087, 'LS2': 0.070193, 'LS3': -3e-06, 'LS4': 0.0, 'RNO': -4.602314, 'SC1': 809.4419, 'SC2': 0.105175, 'SC3': -3e-06, 'SC4': 0.0, 'RLT': 25.9386, 'TCC': -64.0, 'CTS': -65.0, 'LPX': 0.0, 'LPW': 0.0, 'LPH': 1, 'INT': 4.0, 'ASS': 4, 'TDS': 61, 'TDE': 120, 'TDB': 60, 'GRT': 1, 'GRP': 140850, 'CLT': 'Tue Dec 06 12:41:14 2022', 'CRT': 'Tue Dec 06 12:41:14 2022', 'CSR': 4294967295, 'SSC': 0, 'AFR': 0, 'GRN': '785 nm, 1200d, 440-1810cm-1', 'BGA': 1, 'RC0': 'YES', 'RC1': 'YES', 'QC3': 'Shape_785 nm_1200_D_Microscope.SPC', 'SRC': '785 nm', 'CHN': 'Microscope', 'OPF': '25%', 'APT': '50x1000 um', 'BLD': '', 'CNM': 'Default', 'CPY': '', 'DPM': '', 'EXP': 'Senterra.XPM', 'SFM': 'getrocknet', 'SNM': 'HL', 'XPP': 'C:\\OPUS_7.2.139.1294\\XPM', 'IST': 'OK'}
b'\x00\

In [47]:
parser.chunks

Unnamed: 0,offset,length,block,channel,type
0,24,120,0,52,0
1,504,42,96,0,0
2,672,30,64,0,0
3,792,56,48,0,0
4,1016,42,160,0,0
5,1184,127168,15,40,80
6,510016,154,32,0,0
7,510632,58488,7,4,80
8,744744,58488,7,132,80
9,978856,58488,11,4,80


In [19]:
tmp = parser.data.split(b',')

In [21]:
tmp[0] = tmp[0][-10:]

In [38]:
tmp = tmp[:-200]

In [39]:
tmp[-200:]

[b'1791.545754',
 b'1788.272688',
 b'1787.753586',
 b'1788.159935',
 b'1786.546008',
 b'1780.641678',
 b'1773.213572',
 b'1768.202285',
 b'1767.989665',
 b'1771.962411',
 b'1775.962353',
 b'1775.673325',
 b'1769.70959',
 b'1761.099702',
 b'1753.35375',
 b'1748.070937',
 b'1744.316541',
 b'1739.675307',
 b'1733.137836',
 b'1725.599918',
 b'1718.528706',
 b'1712.729752',
 b'1707.815106',
 b'1703.868939',
 b'1701.122129',
 b'1700.205666',
 b'1700.515238',
 b'1701.246738',
 b'1702.07325',
 b'1703.293878',
 b'1704.822414',
 b'1705.33722',
 b'1702.940746',
 b'1698.264172',
 b'1693.50405',
 b'1691.041437',
 b'1691.04283',
 b'1690.123044',
 b'1685.200609',
 b'1676.894912',
 b'1669.564717',
 b'1668.278801',
 b'1672.746155',
 b'1678.684964',
 b'1680.895585',
 b'1676.892036',
 b'1669.7349',
 b'1664.192931',
 b'1662.918138',
 b'1664.856443',
 b'1667.01893',
 b'1666.627588',
 b'1662.039916',
 b'1655.735119',
 b'1650.36308',
 b'1647.790781',
 b'1648.375028',
 b'1650.010234',
 b'1649.902731',
 b'1646