Skip to content

Commit

Permalink
Fix unaligned access issues in host-network byte I/O on ARM
Browse files Browse the repository at this point in the history
Per report in #216 and https://bugs.debian.org/872408, the current
implementation may raise SIGBUS on ARM platforms that do not support
unaligned memory access.

Fix this by using a stack variable and memcpy, which gives the compiler
more opportunity to generate correct code.  While at it, implement
hton* and ntoh* in terms of byteswap intrinsics with an open-coded
fallback.  This drops the dependency on the platform's implementation,
which might not necessarily be consistently fast.

Smoke-tested locally on cross-compiled armv5tel-softfloat-linux-gnueabi
and armv6j-hardfloat-linux-gnueabi with QEMU TCG.

Fixes: #216.
  • Loading branch information
elprans committed Oct 31, 2017
1 parent 4a3713f commit c04576d
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 84 deletions.
6 changes: 3 additions & 3 deletions asyncpg/protocol/codecs/base.pyx
Expand Up @@ -181,15 +181,15 @@ cdef class Codec:
FastReadBuffer buf):
cdef:
object result
uint32_t elem_count
uint32_t i
ssize_t elem_count
ssize_t i
int32_t elem_len
uint32_t elem_typ
uint32_t received_elem_typ
Codec elem_codec
FastReadBuffer elem_buf = FastReadBuffer.new()

elem_count = <uint32_t>hton.unpack_int32(buf.read(4))
elem_count = <ssize_t><uint32_t>hton.unpack_int32(buf.read(4))
result = record.ApgRecord_New(self.element_names, elem_count)
for i in range(elem_count):
elem_typ = self.element_type_oids[i]
Expand Down
6 changes: 3 additions & 3 deletions asyncpg/protocol/codecs/record.pyx
Expand Up @@ -17,14 +17,14 @@ cdef inline record_encode_frame(ConnectionSettings settings, WriteBuffer buf,
cdef anonymous_record_decode(ConnectionSettings settings, FastReadBuffer buf):
cdef:
tuple result
uint32_t elem_count
ssize_t elem_count
ssize_t i
int32_t elem_len
uint32_t elem_typ
uint32_t i
Codec elem_codec
FastReadBuffer elem_buf = FastReadBuffer.new()

elem_count = <uint32_t>hton.unpack_int32(buf.read(4))
elem_count = <ssize_t><uint32_t>hton.unpack_int32(buf.read(4))
result = cpython.PyTuple_New(elem_count)

for i in range(elem_count):
Expand Down
199 changes: 199 additions & 0 deletions asyncpg/protocol/hton.h
@@ -0,0 +1,199 @@
#include <stdint.h>

#if defined(__linux__) || defined(__CYGWIN__)
#include <endian.h>
#elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__OpenBSD__)
#include <sys/endian.h>
#elif defined(__DragonFly__)
#include <sys/endian.h>
#elif defined(__APPLE__)
#include <libkern/OSByteOrder.h>
#define __BYTE_ORDER BYTE_ORDER
#define __BIG_ENDIAN BIG_ENDIAN
#define __LITTLE_ENDIAN LITTLE_ENDIAN
#elif defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
/* Assume Windows is always LE. There seems to be no reliable way
to detect endianness there */
#define __LITTLE_ENDIAN 1234
#define __BIG_ENDIAN 4321
#define __BYTE_ORDER __LITTLE_ENDIAN
#else
#error Cannot determine platform byte order.
#endif


#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__)

#define apg_bswap16(x) __builtin_bswap16(x)
#define apg_bswap32(x) __builtin_bswap32(x)
#define apg_bswap64(x) __builtin_bswap64(x)

#elif defined(_MSC_VER)

#define apg_bswap16(x) _byteswap_ushort(x)
#define apg_bswap32(x) _byteswap_ulong(x)
#define apg_bswap64(x) _byteswap_uint64(x)

#else

static inline uint16_t
apg_bswap16(uint16_t)
{
return ((x << 8) & 0xff00) | (x >> 8) & 0x00ff));
}

static inline uint32_t
apg_bswap32(uint32_t x)
{
return (
((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) |
((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff)
);
}

static inline uint64_t
apg_bswap64(uint64_t x)
{
return (
((x << 56) & 0xff00000000000000ULL) |
((x << 40) & 0x00ff000000000000ULL) |
((x << 24) & 0x0000ff0000000000ULL) |
((x << 8) & 0x000000ff00000000ULL) |
((x >> 8) & 0x00000000ff000000ULL) |
((x >> 24) & 0x0000000000ff0000ULL) |
((x >> 40) & 0x000000000000ff00ULL) |
((x >> 56) & 0x00000000000000ffULL);
);
}

#endif

#if __BYTE_ORDER == __BIG_ENDIAN

#define apg_hton16(x) (x)
#define apg_hton32(x) (x)
#define apg_hton64(x) (x)

#define apg_ntoh16(x) (x)
#define apg_ntoh32(x) (x)
#define apg_ntoh64(x) (x)

#elif __BYTE_ORDER == __LITTLE_ENDIAN

#define apg_hton16(x) apg_bswap16(x)
#define apg_hton32(x) apg_bswap32(x)
#define apg_hton64(x) apg_bswap64(x)

#define apg_ntoh16(x) apg_bswap16(x)
#define apg_ntoh32(x) apg_bswap32(x)
#define apg_ntoh64(x) apg_bswap64(x)

#else

#error Unsupported byte order.

#endif


static inline void
pack_int16(char *buf, int16_t x)
{
uint16_t nx = apg_hton16((uint16_t)x);
/* NOTE: the memcpy below is _important_ to support systems
which disallow unaligned access. On systems, which do
allow unaligned access it will be optimized away by the
compiler
*/
memcpy(buf, &nx, sizeof(uint16_t));
}


static inline void
pack_int32(char *buf, int64_t x)
{
uint32_t nx = apg_hton32((uint32_t)x);
memcpy(buf, &nx, sizeof(uint32_t));
}


static inline void
pack_int64(char *buf, int64_t x)
{
uint64_t nx = apg_hton64((uint64_t)x);
memcpy(buf, &nx, sizeof(uint64_t));
}


static inline int16_t
unpack_int16(const char *buf)
{
uint16_t nx;
memcpy((char *)&nx, buf, sizeof(uint16_t));
return (int16_t)apg_ntoh16(nx);
}


static inline int32_t
unpack_int32(const char *buf)
{
uint32_t nx;
memcpy((char *)&nx, buf, sizeof(uint32_t));
return (int32_t)apg_ntoh32(nx);
}


static inline int64_t
unpack_int64(const char *buf)
{
uint64_t nx;
memcpy((char *)&nx, buf, sizeof(uint64_t));
return (int64_t)apg_ntoh64(nx);
}


union _apg_floatconv {
uint32_t i;
float f;
};


union _apg_doubleconv {
uint64_t i;
double f;
};


static inline void
pack_float(char *buf, float f)
{
union _apg_floatconv v;
v.f = f;
pack_int32(buf, (int32_t)v.i);
}


static inline void
pack_double(char *buf, double f)
{
union _apg_doubleconv v;
v.f = f;
pack_int64(buf, (int64_t)v.i);
}


static inline float
unpack_float(const char *buf)
{
union _apg_floatconv v;
v.i = (uint32_t)unpack_int32(buf);
return v.f;
}


static inline double
unpack_double(const char *buf)
{
union _apg_doubleconv v;
v.i = (uint64_t)unpack_int64(buf);
return v.f;
}
85 changes: 11 additions & 74 deletions asyncpg/protocol/hton.pxd
Expand Up @@ -8,77 +8,14 @@
from libc.stdint cimport int16_t, int32_t, uint16_t, uint32_t, int64_t, uint64_t


IF UNAME_SYSNAME == "Windows":
cdef extern from "winsock2.h":
uint32_t htonl(uint32_t hostlong)
uint16_t htons(uint16_t hostshort)
uint32_t ntohl(uint32_t netlong)
uint16_t ntohs(uint16_t netshort)
ELSE:
cdef extern from "arpa/inet.h":
uint32_t htonl(uint32_t hostlong)
uint16_t htons(uint16_t hostshort)
uint32_t ntohl(uint32_t netlong)
uint16_t ntohs(uint16_t netshort)


cdef inline void pack_int16(char* buf, int16_t x):
(<uint16_t*>buf)[0] = htons(<uint16_t>x)


cdef inline int16_t unpack_int16(const char* buf):
return <int16_t>ntohs((<uint16_t*>buf)[0])


cdef inline void pack_int32(char* buf, int32_t x):
(<uint32_t*>buf)[0] = htonl(<uint32_t>x)


cdef inline int32_t unpack_int32(const char* buf):
return <int32_t>ntohl((<uint32_t*>buf)[0])


cdef inline void pack_int64(char* buf, int64_t x):
(<uint32_t*>buf)[0] = htonl(<uint32_t>(<uint64_t>(x) >> 32))
(<uint32_t*>&buf[4])[0] = htonl(<uint32_t>(x))


cdef inline int64_t unpack_int64(const char* buf):
cdef int64_t hh = unpack_int32(buf)
cdef uint32_t hl = <uint32_t>unpack_int32(&buf[4])

return (hh << 32) | hl


cdef union _floatconv:
uint32_t i
float f


cdef inline int32_t pack_float(char* buf, float f):
cdef _floatconv v
v.f = f
pack_int32(buf, <int32_t>v.i)


cdef inline float unpack_float(const char* buf):
cdef _floatconv v
v.i = <uint32_t>unpack_int32(buf)
return v.f


cdef union _doubleconv:
uint64_t i
double f


cdef inline int64_t pack_double(char* buf, double f):
cdef _doubleconv v
v.f = f
pack_int64(buf, <int64_t>v.i)


cdef inline double unpack_double(const char* buf):
cdef _doubleconv v
v.i = <uint64_t>unpack_int64(buf)
return v.f
cdef extern from "hton.h":
cdef void pack_int16(char *buf, int16_t x);
cdef void pack_int32(char *buf, int32_t x);
cdef void pack_int64(char *buf, int64_t x);
cdef void pack_float(char *buf, float f);
cdef void pack_double(char *buf, double f);
cdef int16_t unpack_int16(const char *buf);
cdef int32_t unpack_int32(const char *buf);
cdef int64_t unpack_int64(const char *buf);
cdef float unpack_float(const char *buf);
cdef double unpack_double(const char *buf);
6 changes: 2 additions & 4 deletions setup.py
Expand Up @@ -21,10 +21,8 @@
CFLAGS = ['-O2']
LDFLAGS = []

if platform.uname().system == 'Windows':
LDFLAGS.append('ws2_32.lib')
else:
CFLAGS.extend(['-Wall', '-Wsign-compare', '-Wconversion'])
if platform.uname().system != 'Windows':
CFLAGS.extend(['-fsigned-char', '-Wall', '-Wsign-compare', '-Wconversion'])


class build_ext(_build_ext.build_ext):
Expand Down

0 comments on commit c04576d

Please sign in to comment.