OpenCyphal · pavel-kirienko · Oct 10, 2020 · Oct 10, 2020 · Oct 10, 2020 · Oct 10, 2020
diff --git a/pyuavcan/transport/serial/__init__.py b/pyuavcan/transport/serial/__init__.py
@@ -64,32 +64,29 @@
 Total header size: 32 bytes (256 bits).
 
 The header is prepended before the frame payload; the resulting structure is
-encoded into its serialized form using the following packet format (influenced by HDLC, SLIP, POPCOP):
+encoded into its serialized form using the following packet format:
 
 +-------------------------+--------------+---------------+--------------------------------+-------------------------+
-| Frame delimiter **0x9E**|Escaped header|Escaped payload| Escaped CRC-32C of the payload | Frame delimiter **0x9E**|
+| Frame delimiter **0x00**|Escaped header|Escaped payload| Escaped CRC-32C of the payload | Frame delimiter **0x00**|
 +=========================+==============+===============+================================+=========================+
 | 1 byte                  | 32..64 bytes | >=0 bytes     | 4..8 bytes                     | 1 byte                  |
 +-------------------------+--------------+---------------+--------------------------------+-------------------------+
-| Single-byte frame       | The following bytes are      | Four bytes long, little-endian | Same frame delimiter as |
-| delimiter **0x9E**.     | escaped: **0x9E** (frame     | byte order; bytes 0x9E (frame  | at the start.           |
-| Begins a new frame and  | delimiter); **0x8E**         | delimiter) and 0x8E (escape    | Terminates the current  |
-| possibly terminates the | (escape character). An       | character) are escaped like in | frame and possibly      |
-| previous frame.         | escaped byte is bitwise      | the header/payload. The CRC is | begins the next frame.  |
-|                         | inverted and prepended with  | computed over the unescaped    |                         |
-|                         | the escape character 0x8E.   | (i.e., original form) payload, |                         |
-|                         | For example: byte 0x9E is    | not including the header       |                         |
-|                         | transformed into 0x8E        | (because the header has a      |                         |
-|                         | followed by 0x71.            | dedicated CRC).                |                         |
+| Single-byte frame       |                              | Four bytes long, little-endian | Same frame delimiter as |
+| delimiter **0x00**.     |                              | byte order; The CRC is         | at the start.           |
+| Begins a new frame and  |                              | computed over the unescaped    | Terminates the current  |
+| possibly terminates the |                              | (i.e., original form) payload, | frame and possibly      |
+| previous frame.         |                              | not including the header       | begins the next frame.  |
+|                         |                              | (because the header has a      |                         |
+|                         |                              | dedicated CRC).                |                         |
+|                         +------------------------------+--------------------------------+                         |
+|                         | This part is escaped using COBS alorithm by Chesire and Baker |                         |
+|                         | http://www.stuartcheshire.org/papers/COBSforToN.pdf           |                         |
 +-------------------------+------------------------------+--------------------------------+-------------------------+
 
 There are no magic bytes in this format because the strong CRC and the data type hash field render the
-format sufficiently recognizable. The worst case overhead exceeds 100% if every byte of the payload and the CRC
-is either 0x9E or 0x8E. Despite the overhead, this format is still considered superior to the alternatives
-since it is robust and guarantees a constant recovery time. Consistent-overhead byte stuffing (COBS) is sometimes
-employed for similar tasks, but it should be understood that while it offers a substantially lower overhead,
-it undermines the synchronization recovery properties of the protocol. There is a somewhat relevant discussion
-at https://github.com/vedderb/bldc/issues/79.
+format sufficiently recognizable. The worst case overhead 1 byte in every 254 bytes of the payload and the CRC
+There is a somewhat relevant discussion
+at https://forum.uavcan.org/t/uavcan-serial-issues-with-dma-friendliness-and-bandwidth-overhead/846.
 
 The format can share the same serial medium with ASCII text exchanges such as command-line interfaces or
 real-time logging. The special byte values employed by the format do not belong to the ASCII character set.

diff --git a/pyuavcan/transport/serial/_frame.py b/pyuavcan/transport/serial/_frame.py
@@ -7,9 +7,10 @@
 from __future__ import annotations
 import typing
 import struct
-import itertools
+import math
 import dataclasses
 import pyuavcan
+from cobs import cobs
-from cobs import cobs
+from cobs import cobs    # type: ignore
-from cobs import cobs
+from cobs import cobs    # type: ignore
 
 _VERSION = 0
 
@@ -34,8 +35,7 @@ class SerialFrame(pyuavcan.transport.commons.high_overhead_transport.Frame):
 
     NODE_ID_RANGE = range(NODE_ID_MASK + 1)
 
-    FRAME_DELIMITER_BYTE = 0x9E
-    ESCAPE_PREFIX_BYTE   = 0x8E
+    FRAME_DELIMITER_BYTE = 0x00
 
     NUM_OVERHEAD_BYTES_EXCEPT_DELIMITERS_AND_ESCAPING = _HEADER_SIZE + _CRC_SIZE_BYTES
 
@@ -105,23 +105,37 @@ def compile_into(self, out_buffer: bytearray) -> memoryview:
 
         payload_crc_bytes = pyuavcan.transport.commons.crc.CRC32C.new(self.payload).value_as_bytes
 
-        escapees = self.FRAME_DELIMITER_BYTE, self.ESCAPE_PREFIX_BYTE
         out_buffer[0] = self.FRAME_DELIMITER_BYTE
         next_byte_index = 1
-        for nb in itertools.chain(header, self.payload, payload_crc_bytes):
-            if nb in escapees:
-                out_buffer[next_byte_index] = self.ESCAPE_PREFIX_BYTE
-                next_byte_index += 1
-                nb ^= 0xFF
-            out_buffer[next_byte_index] = nb
-            next_byte_index += 1
+
+        packet_bytes = header + self.payload + payload_crc_bytes
+        encoded_image = cobs.encode(packet_bytes)
+        # place in the buffer and update next_byte_index:
+        out_buffer[next_byte_index:next_byte_index+len(encoded_image)] = encoded_image
+        next_byte_index += len(encoded_image)
 
         out_buffer[next_byte_index] = self.FRAME_DELIMITER_BYTE
         next_byte_index += 1
 
         assert (next_byte_index - 2) >= (len(header) + len(self.payload) + len(payload_crc_bytes))
         return memoryview(out_buffer)[:next_byte_index]
 
+    @staticmethod
+    def calc_cobs_size(payload_size_bytes: int) -> int:
+        return int(math.ceil(payload_size_bytes * 255.0 / 254.0))
-        return int(math.ceil(payload_size_bytes * 255.0 / 254.0))
+        return (payload_size_bytes * 255 + 253) // 254
-        return int(math.ceil(payload_size_bytes * 255.0 / 254.0))
+        return (payload_size_bytes * 255 + 253) // 254
+
+    @staticmethod
+    def parse_from_cobs_image(header_payload_crc_image: memoryview,
+                              timestamp: pyuavcan.transport.Timestamp) -> typing.Optional[SerialFrame]:
+        """
+        :returns: Frame or None if the image is invalid.
+        """
+        try:
+            unescaped_image = cobs.decode(bytearray(header_payload_crc_image))
+        except cobs.DecodeError:
+            return None
+        return SerialFrame.parse_from_unescaped_image(memoryview(unescaped_image), timestamp)
+
     @staticmethod
     def parse_from_unescaped_image(header_payload_crc_image: memoryview,
                                    timestamp: pyuavcan.transport.Timestamp) -> typing.Optional[SerialFrame]:
@@ -184,43 +198,39 @@ def _unittest_frame_compile_message() -> None:
     f = SerialFrame(timestamp=Timestamp.now(),
                     priority=Priority.HIGH,
                     source_node_id=SerialFrame.FRAME_DELIMITER_BYTE,
-                    destination_node_id=SerialFrame.ESCAPE_PREFIX_BYTE,
+                    destination_node_id=SerialFrame.FRAME_DELIMITER_BYTE,
                     data_specifier=MessageDataSpecifier(12345),
                     data_type_hash=0xdead_beef_bad_c0ffe,
                     transfer_id=1234567890123456789,
                     index=1234567,
                     end_of_transfer=True,
-                    payload=memoryview(b'abcd\x9Eef\x8E'))
+                    payload=memoryview(b'abcd\x00ef\x00'))
 
     buffer = bytearray(0 for _ in range(1000))
     mv = f.compile_into(buffer)
 
     assert mv[0] == SerialFrame.FRAME_DELIMITER_BYTE
     assert mv[-1] == SerialFrame.FRAME_DELIMITER_BYTE
-    segment = bytes(mv[1:-1])
-    assert SerialFrame.FRAME_DELIMITER_BYTE not in segment
+
+    segment_cobs = bytes(mv[1:-1])
+    assert SerialFrame.FRAME_DELIMITER_BYTE not in segment_cobs
+
+    segment = cobs.decode(segment_cobs)
 
     # Header validation
     assert segment[0] == _VERSION
     assert segment[1] == int(Priority.HIGH)
-    assert segment[2] == SerialFrame.ESCAPE_PREFIX_BYTE
-    assert (segment[3], segment[4]) == (SerialFrame.FRAME_DELIMITER_BYTE ^ 0xFF, 0)
-    assert segment[5] == SerialFrame.ESCAPE_PREFIX_BYTE
-    assert (segment[6], segment[7]) == (SerialFrame.ESCAPE_PREFIX_BYTE ^ 0xFF, 0)
-    assert segment[8:10] == 12345 .to_bytes(2, 'little')
-    assert segment[10:18] == 0xdead_beef_bad_c0ffe .to_bytes(8, 'little')
-    assert segment[18:26] == 1234567890123456789 .to_bytes(8, 'little')
-    assert segment[26:30] == (1234567 + 0x8000_0000).to_bytes(4, 'little')
+    assert (segment[2], segment[3]) == (SerialFrame.FRAME_DELIMITER_BYTE, 0)
+    assert (segment[4], segment[5]) == (SerialFrame.FRAME_DELIMITER_BYTE, 0)
+    assert segment[6:8] == 12345 .to_bytes(2, 'little')
+    assert segment[8:16] == 0xdead_beef_bad_c0ffe .to_bytes(8, 'little')
+    assert segment[16:24] == 1234567890123456789 .to_bytes(8, 'little')
+    assert segment[24:28] == (1234567 + 0x8000_0000).to_bytes(4, 'little')
     # Header CRC here
 
     # Payload validation
-    assert segment[34:38] == b'abcd'
-    assert segment[38] == SerialFrame.ESCAPE_PREFIX_BYTE
-    assert segment[39] == 0x9E ^ 0xFF
-    assert segment[40:42] == b'ef'
-    assert segment[42] == SerialFrame.ESCAPE_PREFIX_BYTE
-    assert segment[43] == 0x8E ^ 0xFF
-    assert segment[44:] == pyuavcan.transport.commons.crc.CRC32C.new(f.payload).value_as_bytes
+    assert segment[32:40] == b'abcd\x00ef\x00'
+    assert segment[40:] == pyuavcan.transport.commons.crc.CRC32C.new(f.payload).value_as_bytes
 
 
 def _unittest_frame_compile_service() -> None:
@@ -241,23 +251,24 @@ def _unittest_frame_compile_service() -> None:
     mv = f.compile_into(buffer)
 
     assert mv[0] == mv[-1] == SerialFrame.FRAME_DELIMITER_BYTE
-    segment = bytes(mv[1:-1])
-    assert SerialFrame.FRAME_DELIMITER_BYTE not in segment
+    segment_cobs = bytes(mv[1:-1])
+    assert SerialFrame.FRAME_DELIMITER_BYTE not in segment_cobs
+
+    segment = cobs.decode(segment_cobs)
 
     # Header validation
     assert segment[0] == _VERSION
     assert segment[1] == int(Priority.FAST)
-    assert segment[2] == SerialFrame.ESCAPE_PREFIX_BYTE
-    assert (segment[3], segment[4]) == (SerialFrame.FRAME_DELIMITER_BYTE ^ 0xFF, 0)
-    assert (segment[5], segment[6]) == (0xFF, 0xFF)
-    assert segment[7:9] == ((1 << 15) | (1 << 14) | 123) .to_bytes(2, 'little')
-    assert segment[9:17] == 0xdead_beef_bad_c0ffe .to_bytes(8, 'little')
-    assert segment[17:25] == 1234567890123456789 .to_bytes(8, 'little')
-    assert segment[25:29] == 1234567 .to_bytes(4, 'little')
+    assert (segment[2], segment[3]) == (SerialFrame.FRAME_DELIMITER_BYTE, 0)
+    assert (segment[4], segment[5]) == (0xFF, 0xFF)
+    assert segment[6:8] == ((1 << 15) | (1 << 14) | 123).to_bytes(2, 'little')
+    assert segment[8:16] == 0xdead_beef_bad_c0ffe .to_bytes(8, 'little')
+    assert segment[16:24] == 1234567890123456789 .to_bytes(8, 'little')
+    assert segment[24:28] == 1234567 .to_bytes(4, 'little')
     # Header CRC here
 
     # CRC validation
-    assert segment[33:] == pyuavcan.transport.commons.crc.CRC32C.new(f.payload).value_as_bytes
+    assert segment[32:] == pyuavcan.transport.commons.crc.CRC32C.new(f.payload).value_as_bytes
 
 
 def _unittest_frame_parse() -> None:

diff --git a/pyuavcan/transport/serial/_stream_parser.py b/pyuavcan/transport/serial/_stream_parser.py
@@ -33,7 +33,7 @@ def __init__(self,
         :param max_payload_size_bytes: Frames containing more that this many bytes of payload (after escaping and
             not including the header and CRC) will be considered invalid.
         """
-        max_payload_size_bytes = int(max_payload_size_bytes)
+        max_payload_size_bytes = SerialFrame.calc_cobs_size(max_payload_size_bytes)
         if not (callable(callback) and max_payload_size_bytes > 0):
             raise ValueError('Invalid parameters')
 
@@ -64,13 +64,6 @@ def _process_byte(self, b: int, timestamp: pyuavcan.transport.Timestamp) -> None
             return
 
         # Unescaping is done only if we're inside a frame currently.
-        if self._is_inside_frame():
-            if b == SerialFrame.ESCAPE_PREFIX_BYTE:
-                self._unescape_next = True
-                return
-            if self._unescape_next:
-                self._unescape_next = False
-                b ^= 0xFF
 
         # Appending to the buffer always, regardless of whether we're in a frame or not.
         # We may find out that the data does not belong to the protocol only much later; can't look ahead.
@@ -85,7 +78,7 @@ def _finalize(self, known_invalid: bool) -> None:
             parsed: typing.Optional[SerialFrame] = None
             if (not known_invalid) and len(mv) <= self._max_frame_size_bytes:
                 assert self._current_frame_timestamp is not None
-                parsed = SerialFrame.parse_from_unescaped_image(mv, self._current_frame_timestamp)
+                parsed = SerialFrame.parse_from_cobs_image(mv, self._current_frame_timestamp)
             if parsed:
                 self._callback(parsed)
             elif mv:
@@ -122,15 +115,12 @@ def proc(b: typing.Union[bytes, memoryview]) -> typing.Sequence[typing.Union[Ser
     assert [memoryview(b'abcdef')] == proc(b'abcdef')
     assert [] == proc(b'')
 
-    # The frame is well-delimited, but the content is invalid. Notice the unescaping in action.
-    assert [] == proc(b'\x9E\x8E\x61')
-    assert [memoryview(b'\x9E\x8E')] == proc(b'\x8E\x71\x9E')
 
     # Valid frame.
     f1 = SerialFrame(timestamp=ts,
                      priority=Priority.HIGH,
                      source_node_id=SerialFrame.FRAME_DELIMITER_BYTE,
-                     destination_node_id=SerialFrame.ESCAPE_PREFIX_BYTE,
+                     destination_node_id=SerialFrame.FRAME_DELIMITER_BYTE,
                      data_specifier=MessageDataSpecifier(12345),
                      data_type_hash=0xdead_beef_bad_c0ffe,
                      transfer_id=1234567890123456789,
@@ -146,14 +136,14 @@ def proc(b: typing.Union[bytes, memoryview]) -> typing.Sequence[typing.Union[Ser
     f2 = SerialFrame(timestamp=ts,
                      priority=Priority.HIGH,
                      source_node_id=SerialFrame.FRAME_DELIMITER_BYTE,
-                     destination_node_id=SerialFrame.ESCAPE_PREFIX_BYTE,
+                     destination_node_id=SerialFrame.FRAME_DELIMITER_BYTE,
                      data_specifier=MessageDataSpecifier(12345),
                      data_type_hash=0xdead_beef_bad_c0ffe,
                      transfer_id=1234567890123456789,
                      index=1234567,
                      end_of_transfer=True,
                      payload=f1.compile_into(bytearray(1000)))
-    assert len(f2.payload) == 46 + 2  # The extra two are escapes.
+    assert len(f2.payload) == 43  # Cobs escaping
     result = proc(f2.compile_into(bytearray(1000)))
     assert len(result) == 1
     assert isinstance(result[0], memoryview)

diff --git a/setup.cfg b/setup.cfg
@@ -43,6 +43,7 @@ transport_can_pythoncan =
 
 transport_serial =
     pyserial ~= 3.4
+    cobs     ~= 1.1.4
 
 # Command-line tool. This is not a transport.
 # Per ruamel.yaml docs: "For production systems you should pin the version being used with ``ruamel.yaml<=0.15``"