In [1]:
from pathlib import Path
from binascii import hexlify, unhexlify, b2a_hex
from io import BufferedReader

folder = Path("data")
file = folder / "timvoz.bin"


In [2]:
BUFFER_SIZE = None

######################
#  global variables  #
######################

# ASN.1 class
UNIVERSAL = 0
APPLICATION = 1
CONTEXT = 2
PRIVATE = 3
className = ["Universal", "Application", "Context-specific", "Private"]

# encoding P/C
PRIMITIVE = 0
CONSTRUCTOR = 1
encodeName = ["Primitive", "Constructor"]

# define the attributes of end-of-contents
EOC = [0, 0, 0, 0]

CLASS_SHIFT = 6
ENCODE_SHIFT = 5
CLASSNUM_MASK = 0x1F

BITS7_MASK = 0x7F
BIT8_SHIFT = 7

HIGH_CLASS_NUM = 0x1F


In [3]:
raw_data = file.read_bytes()
print(f"Read {len(raw_data)} bytes")

Read 8385524 bytes


In [4]:
raw_data[:1]

b'\xa0'

In [5]:
raw_data[0]

160

In [6]:
b2a_hex(raw_data[:1])

b'a0'

In [7]:
int(b2a_hex(raw_data[:1]), 16)

160

In [8]:
current_tag = raw_data[:1].hex()
current_tag

'a0'

In [9]:
hex_string = b2a_hex(raw_data).decode('utf-8')
hex_string[:2]

'a0'

In [10]:
current_pos = 0
bytes_read = 1

In [11]:
data = raw_data[current_pos:]

In [12]:
start = data[0]
start

160

In [13]:
start >> CLASS_SHIFT

2

In [14]:
tag_class = (start >> CLASS_SHIFT) & 0x03
tag_class

2

In [15]:
constructed = bool((start >> ENCODE_SHIFT) & 0x01)
constructed


True

In [16]:
tag_num = start & CLASSNUM_MASK
tag_num

0

In [17]:
tag_num == HIGH_CLASS_NUM

False

In [18]:
def get_tag_id(data):
    position = 0
    bytes_read = 1

    start = data[position]
    tag_class = start >> CLASS_SHIFT
    constructed = bool((start >> ENCODE_SHIFT) % 2)
    tag_number = start & CLASSNUM_MASK

    if tag_number == HIGH_CLASS_NUM:
        tag_number = 0
        position += 1
        while True:
            bytes_read = bytes_read + 1
            byte = data[position] & BITS7_MASK
            tag_number = tag_number * 128 + byte
            if data[position] >> BIT8_SHIFT == 0:
                break
            position += 1

    return (tag_class, constructed, tag_number, bytes_read)


In [19]:
get_tag_id(raw_data)

(2, True, 0, 1)

In [20]:
current_pos += 1
data = raw_data[current_pos:]
first_byte = data[0]


In [21]:
first_byte

129

In [22]:
first_byte >> BIT8_SHIFT

1

In [23]:
length_size = first_byte & 0x7F
length_size

1

In [25]:
length_bytes = data[current_pos+1:current_pos+1+length_size]
len(length_bytes)

1

In [27]:
length = 0
for b in length_bytes:
    length = (length << 8) | b
length

162

In [28]:
def getLength(data):
    bytesConsumed = 1
    first_byte = data[0]

    # definite short form
    if first_byte >> BIT8_SHIFT == 0:
        return (first_byte, bytesConsumed)

    length = 0
    length_size = first_byte & BITS7_MASK

    # indefinite form
    if length_size == 0:
        return (length_size, bytesConsumed)

    # definite long form
    for ptr in range(1, length_size + 1):
        length = length * 256 + data[ptr]

    bytesConsumed += length_size

    return (length, bytesConsumed)


In [29]:
getLength(data)

(191, 2)

In [None]:
from dataclasses import dataclass
from enum import Enum, auto
from typing import Optional, Tuple
from io import BufferedReader, BytesIO


class BerClass(Enum):
    UNIVERSAL = 0
    APPLICATION = 1
    CONTEXT = 2
    PRIVATE = 3


class BerTag:
    def __init__(self, tag_bytes: bytes):
        first_byte = tag_bytes[0]
        self.tag_string = hexlify(tag_bytes[:1]).decode('utf-8') #This doesn't belong to the original ber encoding, it's specific to this implementation
        self.tag_class = BerClass((first_byte >> 6) & 0x03)
        self.constructed = bool((first_byte >> 5) & 0x01)
        self.tag_number = first_byte & 0x1F

        if self.tag_number == 0x1F:
            # Handle multi-byte tag
            self.tag_number = 0
            for b in tag_bytes[1:]:
                self.tag_number = (self.tag_number << 7) | (b & 0x7F)


@dataclass
class TlvObject:
    """Tag-Length-Value object for BER encoding"""

    tag: BerTag
    length: int
    value: bytes
    offset: int
    children: list["TlvObject"] = None


class BerDecoder:
    """Basic Encoding Rules decoder"""

    def __init__(self):
        self.max_depth = None  # Prevent stack overflow

    def decode_tlv(
        self, stream: BufferedReader, offset: int = 0, depth: int = 0
    ) -> Optional[TlvObject]:
        if self.max_depth is not None and depth > self.max_depth:
            raise ValueError("Maximum decoding depth exceeded")

        start_offset = offset
        tag_bytes = self._read_tag(stream)
        if not tag_bytes:
            return None

        tag = BerTag(tag_bytes)
        length, length_size = self._read_length(stream)

        # Update offset after tag and length
        offset += len(tag_bytes) + length_size

        # Read value
        value = stream.read(length)
        if len(value) != length:
            raise ValueError("Unexpected end of data")

        tlv = TlvObject(tag, length, value, start_offset)

        # Parse constructed types recursively
        if tag.constructed:
            tlv.children = []
            value_stream = BufferedReader(BytesIO(value))
            while value_stream.tell() < length:
                if child := self.decode_tlv(value_stream, offset, depth + 1):
                    tlv.children.append(child)
                    offset += child.length

        return tlv

    def _read_tag(self, stream: BufferedReader) -> Optional[bytes]:
        first_byte = stream.read(1)
        if not first_byte:
            return None

        tag_bytes = bytearray(first_byte)
        if (first_byte[0] & 0x1F) == 0x1F:
            # Multi-byte tag
            while True:
                b = stream.read(1)
                if not b:
                    raise ValueError("Unexpected end of tag")
                tag_bytes.append(b[0])
                if not (b[0] & 0x80):
                    break

        return bytes(tag_bytes)

    def _read_length(self, stream: BufferedReader) -> Tuple[int, int]:
        first_byte = stream.read(1)[0]
        if not (first_byte & 0x80):
            return first_byte, 1

        length_size = first_byte & 0x7F
        length_bytes = stream.read(length_size)
        if len(length_bytes) != length_size:
            raise ValueError("Unexpected end of length")

        length = 0
        for b in length_bytes:
            length = (length << 8) | b

        return length, length_size + 1


In [55]:
file_buffer = BufferedReader(file.open('rb'))

In [56]:
ber = BerDecoder()

In [57]:
tlv = ber.decode_tlv(file_buffer)

In [58]:
tlv.tag.tag_number

0

In [60]:
int('a0', 16)

160

In [59]:
(
    tlv.tag.tag_string,
    tlv.tag.tag_class.name,
    tlv.tag.constructed,
    tlv.tag.tag_number,
    tlv.length,
    tlv.offset,
    tlv.value.hex(),
)

('a0',
 'CONTEXT',
 True,
 0,
 191,
 0,
 'a281bc9901018c04000000008903180c0e930f5a43544130395020425230303330388d03000000820316085a910200018a030b000f8b030b000f9f3002253f850741409388117108870d11550a230100003489181187f0940711550044010190860801230100000000f09f31052da020253f8407413429005221f0960734315457594d499f2c01309e01009f29037c3bac81037c3be283010180030002049801009001009b0202178e030000008f010088010295075a464e3341454f9a0136')

In [46]:
(
    tlv.children[0].tag.tag_class.name,
    tlv.children[0].tag.constructed,
    tlv.children[0].tag.tag_number,
    tlv.children[0].length,
    tlv.children[0].offset,
    tlv.children[0].value.hex(),
)

('CONTEXT',
 True,
 2,
 188,
 3,
 '9901018c04000000008903180c0e930f5a43544130395020425230303330388d03000000820316085a910200018a030b000f8b030b000f9f3002253f850741409388117108870d11550a230100003489181187f0940711550044010190860801230100000000f09f31052da020253f8407413429005221f0960734315457594d499f2c01309e01009f29037c3bac81037c3be283010180030002049801009001009b0202178e030000008f010088010295075a464e3341454f9a0136')

In [165]:
tlv.children[0].children

[TlvObject(tag=<__main__.BerTag object at 0x0000021E8A8D49B0>, length=1, value=b'\x01', offset=6, children=None),
 TlvObject(tag=<__main__.BerTag object at 0x0000021E8A8D4FE0>, length=4, value=b'\x00\x00\x00\x00', offset=7, children=None),
 TlvObject(tag=<__main__.BerTag object at 0x0000021E8A8D5AC0>, length=3, value=b'\x18\x0c\x0e', offset=11, children=None),
 TlvObject(tag=<__main__.BerTag object at 0x0000021E8A8D5A00>, length=15, value=b'ZCTA09P BR00308', offset=14, children=None),
 TlvObject(tag=<__main__.BerTag object at 0x0000021E8A8D45F0>, length=3, value=b'\x00\x00\x00', offset=29, children=None),
 TlvObject(tag=<__main__.BerTag object at 0x0000021E8A8D7A70>, length=3, value=b'\x16\x08Z', offset=32, children=None),
 TlvObject(tag=<__main__.BerTag object at 0x0000021E8A8D5610>, length=2, value=b'\x00\x01', offset=35, children=None),
 TlvObject(tag=<__main__.BerTag object at 0x0000021E8A8D4B00>, length=3, value=b'\x0b\x00\x0f', offset=37, children=None),
 TlvObject(tag=<__main__.

In [49]:
int("59", 16)

89

In [47]:
for c in tlv.children[0].children:
    print(c.tag.tag_class.name, c.tag.constructed, c.tag.tag_number, c.length, c.value.hex())

CONTEXT False 25 1 01
CONTEXT False 12 4 00000000
CONTEXT False 9 3 180c0e
CONTEXT False 19 15 5a4354413039502042523030333038
CONTEXT False 13 3 000000
CONTEXT False 2 3 16085a
CONTEXT False 17 2 0001
CONTEXT False 10 3 0b000f
CONTEXT False 11 3 0b000f
CONTEXT False 48 2 253f
CONTEXT False 5 7 41409388117108
CONTEXT False 7 13 11550a230100003489181187f0
CONTEXT False 20 7 11550044010190
CONTEXT False 6 8 01230100000000f0
CONTEXT False 49 5 2da020253f
CONTEXT False 4 7 413429005221f0
CONTEXT False 22 7 34315457594d49
CONTEXT False 44 1 30
CONTEXT False 30 1 00
CONTEXT False 41 3 7c3bac
CONTEXT False 1 3 7c3be2
CONTEXT False 3 1 01
CONTEXT False 0 3 000204
CONTEXT False 24 1 00
CONTEXT False 16 1 00
CONTEXT False 27 2 0217
CONTEXT False 14 3 000000
CONTEXT False 15 1 00
CONTEXT False 8 1 02
CONTEXT False 21 7 5a464e3341454f
CONTEXT False 26 1 36


In [109]:
c.length, c.offset

(3, 11)

In [110]:
c.value, c.value.hex()

(b'\x18\x0c\x0e', '180c0e')