In [17]:
import struct
import os

import numpy as np
import pandas as pd

In [18]:
path = './data/EXTRACT_E_G1_E.coli_785nm_V7_M7.0_000000.0'

In [94]:
class OpusParser:

    channel_dict = {
        "raman": 40
    }

    type_dict = {
        (0, 4): '<I',
        (1, 4): '<f',
        (1, 8): '<d'
    }

    def __init__(self, file, signal="raman"):
        self.file = file
        self.signal = signal

        self.check_params()

        with open(path, 'rb') as f:
            self.bin_data = f.read()
        self.header = self.bin_data[24:504]

        self.chunks = []
        self.param_chunks = []
        self.data_chunk = None
        self.params = {}
        self.data = None

    def check_params(self):
        if self.signal not in self.channel_dict.keys():
            raise ValueError("Unknown signal type")
        if not os.path.exists(self.file):
            raise FileNotFoundError(f"File {self.file} does not exist.")

    def parse_header(self):
        self.header = [self.header[i:i+12] for i in range(0, len(self.header), 12)]

        for chunk in self.header:
            if chunk == b'\x00' * 12:
                break
            self.chunks.append({"offset": struct.unpack('<I', chunk[-4:])[0],
                                "length": struct.unpack('<I', chunk[-8:-4])[0],
                                "block": struct.unpack('<B', chunk[0:1])[0],
                                "channel": struct.unpack('<B', chunk[1:2])[0],
                                "type": struct.unpack('<B', chunk[2:3])[0]})

        self.chunks = pd.DataFrame(self.chunks)

        data_mask = self.chunks.block == 15
        param_mask = self.chunks.block == 31
        acquisition_mask = self.chunks.block == 32
        optics_mask = self.chunks.block == 96
        sample_mask = self.chunks.block == 160
        channel_mask = self.chunks.channel == self.channel_dict[self.signal]

        self.data_chunk = self.chunks[data_mask & channel_mask].iloc[0]
        self.param_chunks = [
            self.chunks[param_mask & channel_mask].iloc[0],
            self.chunks[acquisition_mask].iloc[0],
            self.chunks[optics_mask].iloc[0],
            self.chunks[sample_mask].iloc[0]
        ]


    def parse_param_block(self, offset, length):
        param_bin = self.bin_data[offset:offset+length*4]
        i = 0

        while i < len(param_bin):
            tag = param_bin[i:i+3].decode('utf-8')
            if tag == 'END':
                break
            i += 4
            dtype = struct.unpack('<H', param_bin[i:i+2])[0]
            length = struct.unpack('<H', param_bin[i+2:i+4])[0] * 2
            i += 4
            if dtype >= 2:
                content = param_bin[i:i+length].rstrip(b'\x00').decode('utf-8')
            else:
                content = struct.unpack(self.type_dict[dtype, length], param_bin[i:i+length])[0]
            self.params[tag] = content
            i += length

    def parse_param_blocks(self):
        for block in self.param_chunks:
            self.parse_param_block(block.offset, block.length)

    def parse_data_block(self):
        offset = self.data_chunk.offset
        length = self.data_chunk.length
        data_bin = self.bin_data[offset:offset+length*4]
        self.data = struct.unpack('<' + 'f' * length, data_bin)

    def parse(self):
        self.parse_header()
        self.parse_param_blocks()
        self.parse_data_block()
        print(self.params)
        print(self.data[-20:])

In [95]:
parser = OpusParser(path)
parser.parse()

{'NPT': 6821, 'FXV': 90.0, 'LXV': 3500.0, 'CSF': 1.0, 'MXY': 2974.1318359375, 'MNY': 4.041933536529541, 'DPF': 1, 'DAT': '2020/05/27', 'TIM': '17:03:13 (GMT+2)', 'DXU': 'WN', 'RLW': 12765.66143, 'RLP': 100, 'NLP': 100.0, 'LNO': 0.569516, 'LS1': 753.7087, 'LS2': 0.070193, 'LS3': -3e-06, 'LS4': 0.0, 'RNO': 0.899705, 'SC1': 784.4013, 'SC2': 0.32821, 'SC3': -0.0, 'SC4': -0.0, 'RLT': 25.9612, 'TCC': -62.0, 'CTS': -65.0, 'LPX': 0.0, 'LPW': 0.0, 'LPH': 1, 'INT': 2.0, 'ASS': 5, 'TDS': 61, 'TDE': 120, 'TDB': 60, 'GRT': 2, 'GRP': 35330, 'CLT': 'Wed May 27 17:01:20 2020', 'CRT': 'Wed May 27 17:01:20 2020', 'CSR': 4294967295, 'SSC': 0, 'AFR': 0, 'GRN': '785 nm, 400a, 90-3500cm-1', 'BGA': 2, 'RC0': 'YES', 'RC1': 'YES', 'QC3': 'Shape_785 nm_400_A_Microscope.SPC', 'GFW': 0, 'GBW': 0, 'BFW': 0, 'BBW': 0, 'PKA': 0, 'PRA': 0, 'PKL': 0, 'PRL': 0, 'SRT': 0.0, 'DUR': 0.0, 'ERT': 0.0, 'SRC': '785 nm', 'CHN': 'Microscope', 'OPF': '100%', 'APT': '25x1000 um', 'BLD': '', 'CNM': 'Default', 'CPY': '', 'DPM': '',

In [87]:
parser.chunks

Unnamed: 0,offset,length,block,channel,type
0,24,120,0,52,0
1,504,43,23,4,0
2,680,43,23,132,0
3,856,43,23,56,0
4,1120,56,48,0,0
5,1344,30,64,0,0
6,1464,20,96,0,0
7,1544,42,160,0,0
8,1712,44,31,40,0
9,1888,6822,15,40,0
