In [1]:
import struct
import os

import numpy as np
import pandas as pd

In [2]:
path = './data/HL_R1_1.0'

In [32]:
class OpusParser:

    channel_dict = {
        "raman": 40
    }

    type_dict = {
        (0, 4): '<I',
        (1, 4): '<f',
        (1, 8): '<d'
    }

    def __init__(self, file, signal="raman", metadata=False):
        self.file = file
        self.signal = signal

        self._check_params()

        with open(path, 'rb') as f:
            self.bin_data = f.read()
        self.header = self.bin_data[24:504]

        self.chunks = []
        self.param_chunks = []
        self.data_chunk = None
        self.params = {}
        self.data = None
        self.is_map_file = False
        self.metadata=metadata

    def _check_params(self):
        if self.signal not in self.channel_dict.keys():
            raise ValueError("Unknown signal type")
        if not os.path.exists(self.file):
            raise FileNotFoundError(f"File {self.file} does not exist.")

    def _parse_header(self):
        self.header = [self.header[i:i+12] for i in range(0, len(self.header), 12)]

        for chunk in self.header:
            if chunk == b'\x00' * 12:
                break
            self.chunks.append({"offset": struct.unpack('<I', chunk[-4:])[0],
                                "length": struct.unpack('<I', chunk[-8:-4])[0],
                                "block": struct.unpack('<B', chunk[0:1])[0],
                                "channel": struct.unpack('<B', chunk[1:2])[0],
                                "type": struct.unpack('<B', chunk[2:3])[0]})

        self.chunks = pd.DataFrame(self.chunks)

        data_mask = self.chunks.block == 15
        param_mask = self.chunks.block == 31
        acquisition_mask = self.chunks.block == 32
        optics_mask = self.chunks.block == 96
        sample_mask = self.chunks.block == 160
        channel_mask = self.chunks.channel == self.channel_dict[self.signal]

        self.data_chunk = self.chunks[data_mask & channel_mask].iloc[0]
        self.param_chunks = [
            self.chunks[param_mask & channel_mask].iloc[0],
            self.chunks[acquisition_mask].iloc[0],
            self.chunks[optics_mask].iloc[0],
            self.chunks[sample_mask].iloc[0]
        ]


    def _parse_param_block(self, offset, length):
        param_bin = self.bin_data[offset:offset+length*4]
        i = 0

        while i < len(param_bin):
            tag = param_bin[i:i+3].decode('utf-8')
            if tag == 'END':
                break
            i += 4
            dtype = struct.unpack('<H', param_bin[i:i+2])[0]
            length = struct.unpack('<H', param_bin[i+2:i+4])[0] * 2
            i += 4
            if dtype >= 2:
                content = param_bin[i:i+length].rstrip(b'\x00').decode('utf-8')
            else:
                content = struct.unpack(self.type_dict[dtype, length], param_bin[i:i+length])[0]
            self.params[tag] = content
            i += length

    def _parse_param_blocks(self):
        for block in self.param_chunks:
            self._parse_param_block(block.offset, block.length)

    def _parse_data_block(self):
        offset = self.data_chunk.offset
        length = self.data_chunk.length
        data_bin = self.bin_data[offset:offset+length*4]

        if not self.params:
            raise ValueError('Parameter list is empty. Was \'_parse_param_blocks\' executed first?')

        if len(data_bin) > (self.params['NPT'] + 1) * 4:
            self.is_map_file = True

        #self.data = data_bin #struct.unpack('<' + 'f' * length, data_bin)
        if self.is_map_file:
            self.data = self._parse_data_multiple(data_bin)
        else:
            self.data = self._parse_data_single(data_bin)


    def _parse_data_single(self, data_bin):
        npt = self.params['NPT']
        if len(data_bin) > npt * 4:
            data_bin = data_bin[:4*npt]
        return np.asarray(struct.unpack('<' + 'f' * npt, data_bin))

    def _parse_data_multiple(self, data_bin):
        header  = struct.unpack('<'+'I'*4, data_bin[4:20])

        data = []
        ix = header[1]
        i = 0

        while i < header[0]:
            tmp = data_bin[ix:ix+header[2]]
            data.append(self._parse_data_single(tmp))
            ix += header[2] + header[3]
            i += 1
        return np.stack(data)


    def parse(self):
        self._parse_header()
        self._parse_param_blocks()
        self._parse_data_block()

In [33]:
parser = OpusParser(path)
parser.parse()

{'NPT': 2741, 'FXV': 440.0, 'LXV': 1810.0, 'CSF': 1.0, 'MXY': 89734.375, 'MNY': 403.2350769042969, 'DPF': 0, 'DAT': '2022/12/06', 'TIM': '12:54:04 (GMT+1)', 'DXU': 'WN', 'RLW': 12771.82442, 'RLP': 25, 'NLP': 100.0, 'LNO': -0.911214, 'LS1': 753.7087, 'LS2': 0.070193, 'LS3': -3e-06, 'LS4': 0.0, 'RNO': -4.602314, 'SC1': 809.4419, 'SC2': 0.105175, 'SC3': -3e-06, 'SC4': 0.0, 'RLT': 25.9386, 'TCC': -64.0, 'CTS': -65.0, 'LPX': 0.0, 'LPW': 0.0, 'LPH': 1, 'INT': 4.0, 'ASS': 4, 'TDS': 61, 'TDE': 120, 'TDB': 60, 'GRT': 1, 'GRP': 140850, 'CLT': 'Tue Dec 06 12:41:14 2022', 'CRT': 'Tue Dec 06 12:41:14 2022', 'CSR': 4294967295, 'SSC': 0, 'AFR': 0, 'GRN': '785 nm, 1200d, 440-1810cm-1', 'BGA': 1, 'RC0': 'YES', 'RC1': 'YES', 'QC3': 'Shape_785 nm_1200_D_Microscope.SPC', 'SRC': '785 nm', 'CHN': 'Microscope', 'OPF': '25%', 'APT': '50x1000 um', 'BLD': '', 'CNM': 'Default', 'CPY': '', 'DPM': '', 'EXP': 'Senterra.XPM', 'SFM': 'getrocknet', 'SNM': 'HL', 'XPP': 'C:\\OPUS_7.2.139.1294\\XPM', 'IST': 'OK'}
(39, 27

In [34]:
parser.data

array([[ 6200.74951172,  6199.67773438,  6199.08740234, ...,
         1377.92883301,  1379.99450684,  1379.09667969],
       [25481.87304688, 25457.59570312, 25426.7578125 , ...,
         3272.91821289,  3285.08544922,  3281.96289062],
       [ 6189.95849609,  6190.67919922,  6186.71435547, ...,
         1672.83325195,  1672.56005859,  1669.35839844],
       ...,
       [12204.11914062, 12202.46875   , 12188.28125   , ...,
         1953.33056641,  1950.16796875,  1938.24255371],
       [ 6735.87207031,  6725.36376953,  6720.59082031, ...,
         1303.07165527,  1303.66137695,  1301.78259277],
       [ 8204.6484375 ,  8211.54882812,  8215.24121094, ...,
         1784.45947266,  1793.18737793,  1794.40942383]])