In [2]:
from pathlib import Path
from binascii import hexlify, b2a_hex
from io import BufferedReader

folder = Path("data")
file = folder / "timvoz.bin"

In [2]:
BUFFER_SIZE = None

######################
#  global variables  #
######################

# ASN.1 class
UNIVERSAL = 0
APPLICATION = 1
CONTEXT = 2
PRIVATE = 3
className = ["Universal", "Application", "Context-specific", "Private"]

# encoding P/C
PRIMITIVE = 0
CONSTRUCTOR = 1
encodeName = ["Primitive", "Constructor"]

# define the attributes of end-of-contents
EOC = [0, 0, 0, 0]

CLASS_SHIFT = 6
ENCODE_SHIFT = 5
CLASSNUM_MASK = 0x1F

BITS7_MASK = 0x7F
BIT8_SHIFT = 7

HIGH_CLASS_NUM = 0x1F

In [3]:
raw_data = file.read_bytes()
print(f"Read {len(raw_data)} bytes")

Read 8385524 bytes


In [4]:
raw_data[:1]

b'\xa0'

In [5]:
raw_data[0]

160

In [6]:
b2a_hex(raw_data[:1])

b'a0'

In [7]:
int(b2a_hex(raw_data[:1]), 16)

160

In [8]:
current_tag = raw_data[:1].hex()
current_tag

'a0'

In [9]:
hex_string = b2a_hex(raw_data).decode("utf-8")
hex_string[:2]

'a0'

In [10]:
current_pos = 0
bytes_read = 1

In [11]:
data = raw_data[current_pos:]

In [12]:
start = data[0]
start

160

In [13]:
start >> CLASS_SHIFT

2

In [14]:
tag_class = (start >> CLASS_SHIFT) & 0x03
tag_class

2

In [15]:
constructed = bool((start >> ENCODE_SHIFT) & 0x01)
constructed

True

In [16]:
tag_num = start & CLASSNUM_MASK
tag_num

0

In [17]:
tag_num == HIGH_CLASS_NUM

False

In [18]:
def get_tag_id(data):
    position = 0
    bytes_read = 1

    start = data[position]
    tag_class = start >> CLASS_SHIFT
    constructed = bool((start >> ENCODE_SHIFT) % 2)
    tag_number = start & CLASSNUM_MASK

    if tag_number == HIGH_CLASS_NUM:
        tag_number = 0
        position += 1
        while True:
            bytes_read = bytes_read + 1
            byte = data[position] & BITS7_MASK
            tag_number = tag_number * 128 + byte
            if data[position] >> BIT8_SHIFT == 0:
                break
            position += 1

    return (tag_class, constructed, tag_number, bytes_read)

In [19]:
get_tag_id(raw_data)

(2, True, 0, 1)

In [20]:
current_pos += 1
data = raw_data[current_pos:]
first_byte = data[0]

In [21]:
first_byte

129

In [22]:
first_byte >> BIT8_SHIFT

1

In [23]:
length_size = first_byte & 0x7F
length_size

1

In [24]:
length_bytes = data[current_pos + 1 : current_pos + 1 + length_size]
len(length_bytes)

1

In [25]:
length = 0
for b in length_bytes:
    length = (length << 8) | b
length

162

In [26]:
def getLength(data):
    bytesConsumed = 1
    first_byte = data[0]

    # definite short form
    if first_byte >> BIT8_SHIFT == 0:
        return (first_byte, bytesConsumed)

    length = 0
    length_size = first_byte & BITS7_MASK

    # indefinite form
    if length_size == 0:
        return (length_size, bytesConsumed)

    # definite long form
    for ptr in range(1, length_size + 1):
        length = length * 256 + data[ptr]

    bytesConsumed += length_size

    return (length, bytesConsumed)

In [27]:
getLength(data)

(191, 2)

In [5]:
from dataclasses import dataclass
from enum import Enum
from typing import Optional, Tuple
from io import BytesIO


class BerClass(Enum):
    UNIVERSAL = 0
    APPLICATION = 1
    CONTEXT = 2
    PRIVATE = 3


class BerTag:
    def __init__(self, tag_bytes: bytes):
        first_byte = tag_bytes[0]
        self.tag_string = hexlify(
            tag_bytes[:1]
        ).decode(
            "utf-8"
        )  # This doesn't belong to the original ber encoding, it's specific to this implementation
        self.tag_class = BerClass((first_byte >> 6) & 0x03)
        self.constructed = bool((first_byte >> 5) & 0x01)
        self.tag_number = first_byte & 0x1F

        if self.tag_number == 0x1F:
            # Handle multi-byte tag
            self.tag_number = 0
            for b in tag_bytes[1:]:
                self.tag_number = (self.tag_number << 7) | (b & 0x7F)


@dataclass
class TlvObject:
    """Tag-Length-Value object for BER encoding"""

    tag: BerTag
    length: int
    value: bytes
    offset: int
    children: list["TlvObject"] = None


class BerDecoder:
    """Basic Encoding Rules decoder"""

    def __init__(self):
        self.max_depth = None  # Prevent stack overflow

    def decode_tlv(
        self, stream: BufferedReader, offset: int = 0, depth: int = 0
    ) -> Optional[TlvObject]:
        if self.max_depth is not None and depth > self.max_depth:
            raise ValueError("Maximum decoding depth exceeded")

        start_offset = offset
        tag_bytes = self._read_tag(stream)
        if not tag_bytes:
            return None

        tag = BerTag(tag_bytes)
        length, length_size = self._read_length(stream)

        # Update offset after tag and length
        offset += len(tag_bytes) + length_size

        # Read value
        value = stream.read(length)
        if len(value) != length:
            raise ValueError("Unexpected end of data")

        tlv = TlvObject(tag, length, value, start_offset)

        # Parse constructed types recursively
        if tag.constructed:
            tlv.children = []
            value_stream = BufferedReader(BytesIO(value))
            while value_stream.tell() < length:
                if child := self.decode_tlv(value_stream, offset, depth + 1):
                    tlv.children.append(child)
                    offset += child.length

        return tlv

    def _read_tag(self, stream: BufferedReader) -> Optional[bytes]:
        first_byte = stream.read(1)
        if not first_byte:
            return None

        tag_bytes = bytearray(first_byte)
        if (first_byte[0] & 0x1F) == 0x1F:
            # Multi-byte tag
            while True:
                b = stream.read(1)
                if not b:
                    raise ValueError("Unexpected end of tag")
                tag_bytes.append(b[0])
                if not (b[0] & 0x80):
                    break

        return bytes(tag_bytes)

    def _read_length(self, stream: BufferedReader) -> Tuple[int, int]:
        first_byte = stream.read(1)[0]
        if not (first_byte & 0x80):
            return first_byte, 1

        length_size = first_byte & 0x7F
        length_bytes = stream.read(length_size)
        if len(length_bytes) != length_size:
            raise ValueError("Unexpected end of length")

        length = 0
        for b in length_bytes:
            length = (length << 8) | b

        return length, length_size + 1

In [None]:
# file = folder / "clarovoz.bin"
# file_buffer = BufferedReader(file.open("rb"))


In [None]:
# from time import sleep
# blocks = []
# ber = BerDecoder()
# while tlv := ber.decode_tlv(file_buffer):
#     if tlv.children is not None:
#         blocks.extend(child.tag.tag_string for child in tlv.children)

In [22]:
len(blocks)

5324

In [23]:
set(blocks)

{'00',
 '30',
 'a0',
 'a1',
 'a2',
 'a3',
 'a4',
 'a5',
 'a7',
 'a9',
 'ab',
 'ae',
 'af',
 'b0',
 'b9'}