Skip to content

Commit c04576d

Browse files
committed
Fix unaligned access issues in host-network byte I/O on ARM
Per report in MagicStack#216 and https://bugs.debian.org/872408, the current implementation may raise SIGBUS on ARM platforms that do not support unaligned memory access. Fix this by using a stack variable and memcpy, which gives the compiler more opportunity to generate correct code. While at it, implement hton* and ntoh* in terms of byteswap intrinsics with an open-coded fallback. This drops the dependency on the platform's implementation, which might not necessarily be consistently fast. Smoke-tested locally on cross-compiled armv5tel-softfloat-linux-gnueabi and armv6j-hardfloat-linux-gnueabi with QEMU TCG. Fixes: MagicStack#216.
1 parent 4a3713f commit c04576d

File tree

5 files changed

+218
-84
lines changed

5 files changed

+218
-84
lines changed

asyncpg/protocol/codecs/base.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -181,15 +181,15 @@ cdef class Codec:
181181
FastReadBuffer buf):
182182
cdef:
183183
object result
184-
uint32_t elem_count
185-
uint32_t i
184+
ssize_t elem_count
185+
ssize_t i
186186
int32_t elem_len
187187
uint32_t elem_typ
188188
uint32_t received_elem_typ
189189
Codec elem_codec
190190
FastReadBuffer elem_buf = FastReadBuffer.new()
191191

192-
elem_count = <uint32_t>hton.unpack_int32(buf.read(4))
192+
elem_count = <ssize_t><uint32_t>hton.unpack_int32(buf.read(4))
193193
result = record.ApgRecord_New(self.element_names, elem_count)
194194
for i in range(elem_count):
195195
elem_typ = self.element_type_oids[i]

asyncpg/protocol/codecs/record.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ cdef inline record_encode_frame(ConnectionSettings settings, WriteBuffer buf,
1717
cdef anonymous_record_decode(ConnectionSettings settings, FastReadBuffer buf):
1818
cdef:
1919
tuple result
20-
uint32_t elem_count
20+
ssize_t elem_count
21+
ssize_t i
2122
int32_t elem_len
2223
uint32_t elem_typ
23-
uint32_t i
2424
Codec elem_codec
2525
FastReadBuffer elem_buf = FastReadBuffer.new()
2626

27-
elem_count = <uint32_t>hton.unpack_int32(buf.read(4))
27+
elem_count = <ssize_t><uint32_t>hton.unpack_int32(buf.read(4))
2828
result = cpython.PyTuple_New(elem_count)
2929

3030
for i in range(elem_count):

asyncpg/protocol/hton.h

+199
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
#include <stdint.h>
2+
3+
#if defined(__linux__) || defined(__CYGWIN__)
4+
#include <endian.h>
5+
#elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__OpenBSD__)
6+
#include <sys/endian.h>
7+
#elif defined(__DragonFly__)
8+
#include <sys/endian.h>
9+
#elif defined(__APPLE__)
10+
#include <libkern/OSByteOrder.h>
11+
#define __BYTE_ORDER BYTE_ORDER
12+
#define __BIG_ENDIAN BIG_ENDIAN
13+
#define __LITTLE_ENDIAN LITTLE_ENDIAN
14+
#elif defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
15+
/* Assume Windows is always LE. There seems to be no reliable way
16+
to detect endianness there */
17+
#define __LITTLE_ENDIAN 1234
18+
#define __BIG_ENDIAN 4321
19+
#define __BYTE_ORDER __LITTLE_ENDIAN
20+
#else
21+
#error Cannot determine platform byte order.
22+
#endif
23+
24+
25+
#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__)
26+
27+
#define apg_bswap16(x) __builtin_bswap16(x)
28+
#define apg_bswap32(x) __builtin_bswap32(x)
29+
#define apg_bswap64(x) __builtin_bswap64(x)
30+
31+
#elif defined(_MSC_VER)
32+
33+
#define apg_bswap16(x) _byteswap_ushort(x)
34+
#define apg_bswap32(x) _byteswap_ulong(x)
35+
#define apg_bswap64(x) _byteswap_uint64(x)
36+
37+
#else
38+
39+
static inline uint16_t
40+
apg_bswap16(uint16_t)
41+
{
42+
return ((x << 8) & 0xff00) | (x >> 8) & 0x00ff));
43+
}
44+
45+
static inline uint32_t
46+
apg_bswap32(uint32_t x)
47+
{
48+
return (
49+
((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) |
50+
((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff)
51+
);
52+
}
53+
54+
static inline uint64_t
55+
apg_bswap64(uint64_t x)
56+
{
57+
return (
58+
((x << 56) & 0xff00000000000000ULL) |
59+
((x << 40) & 0x00ff000000000000ULL) |
60+
((x << 24) & 0x0000ff0000000000ULL) |
61+
((x << 8) & 0x000000ff00000000ULL) |
62+
((x >> 8) & 0x00000000ff000000ULL) |
63+
((x >> 24) & 0x0000000000ff0000ULL) |
64+
((x >> 40) & 0x000000000000ff00ULL) |
65+
((x >> 56) & 0x00000000000000ffULL);
66+
);
67+
}
68+
69+
#endif
70+
71+
#if __BYTE_ORDER == __BIG_ENDIAN
72+
73+
#define apg_hton16(x) (x)
74+
#define apg_hton32(x) (x)
75+
#define apg_hton64(x) (x)
76+
77+
#define apg_ntoh16(x) (x)
78+
#define apg_ntoh32(x) (x)
79+
#define apg_ntoh64(x) (x)
80+
81+
#elif __BYTE_ORDER == __LITTLE_ENDIAN
82+
83+
#define apg_hton16(x) apg_bswap16(x)
84+
#define apg_hton32(x) apg_bswap32(x)
85+
#define apg_hton64(x) apg_bswap64(x)
86+
87+
#define apg_ntoh16(x) apg_bswap16(x)
88+
#define apg_ntoh32(x) apg_bswap32(x)
89+
#define apg_ntoh64(x) apg_bswap64(x)
90+
91+
#else
92+
93+
#error Unsupported byte order.
94+
95+
#endif
96+
97+
98+
static inline void
99+
pack_int16(char *buf, int16_t x)
100+
{
101+
uint16_t nx = apg_hton16((uint16_t)x);
102+
/* NOTE: the memcpy below is _important_ to support systems
103+
which disallow unaligned access. On systems, which do
104+
allow unaligned access it will be optimized away by the
105+
compiler
106+
*/
107+
memcpy(buf, &nx, sizeof(uint16_t));
108+
}
109+
110+
111+
static inline void
112+
pack_int32(char *buf, int64_t x)
113+
{
114+
uint32_t nx = apg_hton32((uint32_t)x);
115+
memcpy(buf, &nx, sizeof(uint32_t));
116+
}
117+
118+
119+
static inline void
120+
pack_int64(char *buf, int64_t x)
121+
{
122+
uint64_t nx = apg_hton64((uint64_t)x);
123+
memcpy(buf, &nx, sizeof(uint64_t));
124+
}
125+
126+
127+
static inline int16_t
128+
unpack_int16(const char *buf)
129+
{
130+
uint16_t nx;
131+
memcpy((char *)&nx, buf, sizeof(uint16_t));
132+
return (int16_t)apg_ntoh16(nx);
133+
}
134+
135+
136+
static inline int32_t
137+
unpack_int32(const char *buf)
138+
{
139+
uint32_t nx;
140+
memcpy((char *)&nx, buf, sizeof(uint32_t));
141+
return (int32_t)apg_ntoh32(nx);
142+
}
143+
144+
145+
static inline int64_t
146+
unpack_int64(const char *buf)
147+
{
148+
uint64_t nx;
149+
memcpy((char *)&nx, buf, sizeof(uint64_t));
150+
return (int64_t)apg_ntoh64(nx);
151+
}
152+
153+
154+
union _apg_floatconv {
155+
uint32_t i;
156+
float f;
157+
};
158+
159+
160+
union _apg_doubleconv {
161+
uint64_t i;
162+
double f;
163+
};
164+
165+
166+
static inline void
167+
pack_float(char *buf, float f)
168+
{
169+
union _apg_floatconv v;
170+
v.f = f;
171+
pack_int32(buf, (int32_t)v.i);
172+
}
173+
174+
175+
static inline void
176+
pack_double(char *buf, double f)
177+
{
178+
union _apg_doubleconv v;
179+
v.f = f;
180+
pack_int64(buf, (int64_t)v.i);
181+
}
182+
183+
184+
static inline float
185+
unpack_float(const char *buf)
186+
{
187+
union _apg_floatconv v;
188+
v.i = (uint32_t)unpack_int32(buf);
189+
return v.f;
190+
}
191+
192+
193+
static inline double
194+
unpack_double(const char *buf)
195+
{
196+
union _apg_doubleconv v;
197+
v.i = (uint64_t)unpack_int64(buf);
198+
return v.f;
199+
}

asyncpg/protocol/hton.pxd

+11-74
Original file line numberDiff line numberDiff line change
@@ -8,77 +8,14 @@
88
from libc.stdint cimport int16_t, int32_t, uint16_t, uint32_t, int64_t, uint64_t
99

1010

11-
IF UNAME_SYSNAME == "Windows":
12-
cdef extern from "winsock2.h":
13-
uint32_t htonl(uint32_t hostlong)
14-
uint16_t htons(uint16_t hostshort)
15-
uint32_t ntohl(uint32_t netlong)
16-
uint16_t ntohs(uint16_t netshort)
17-
ELSE:
18-
cdef extern from "arpa/inet.h":
19-
uint32_t htonl(uint32_t hostlong)
20-
uint16_t htons(uint16_t hostshort)
21-
uint32_t ntohl(uint32_t netlong)
22-
uint16_t ntohs(uint16_t netshort)
23-
24-
25-
cdef inline void pack_int16(char* buf, int16_t x):
26-
(<uint16_t*>buf)[0] = htons(<uint16_t>x)
27-
28-
29-
cdef inline int16_t unpack_int16(const char* buf):
30-
return <int16_t>ntohs((<uint16_t*>buf)[0])
31-
32-
33-
cdef inline void pack_int32(char* buf, int32_t x):
34-
(<uint32_t*>buf)[0] = htonl(<uint32_t>x)
35-
36-
37-
cdef inline int32_t unpack_int32(const char* buf):
38-
return <int32_t>ntohl((<uint32_t*>buf)[0])
39-
40-
41-
cdef inline void pack_int64(char* buf, int64_t x):
42-
(<uint32_t*>buf)[0] = htonl(<uint32_t>(<uint64_t>(x) >> 32))
43-
(<uint32_t*>&buf[4])[0] = htonl(<uint32_t>(x))
44-
45-
46-
cdef inline int64_t unpack_int64(const char* buf):
47-
cdef int64_t hh = unpack_int32(buf)
48-
cdef uint32_t hl = <uint32_t>unpack_int32(&buf[4])
49-
50-
return (hh << 32) | hl
51-
52-
53-
cdef union _floatconv:
54-
uint32_t i
55-
float f
56-
57-
58-
cdef inline int32_t pack_float(char* buf, float f):
59-
cdef _floatconv v
60-
v.f = f
61-
pack_int32(buf, <int32_t>v.i)
62-
63-
64-
cdef inline float unpack_float(const char* buf):
65-
cdef _floatconv v
66-
v.i = <uint32_t>unpack_int32(buf)
67-
return v.f
68-
69-
70-
cdef union _doubleconv:
71-
uint64_t i
72-
double f
73-
74-
75-
cdef inline int64_t pack_double(char* buf, double f):
76-
cdef _doubleconv v
77-
v.f = f
78-
pack_int64(buf, <int64_t>v.i)
79-
80-
81-
cdef inline double unpack_double(const char* buf):
82-
cdef _doubleconv v
83-
v.i = <uint64_t>unpack_int64(buf)
84-
return v.f
11+
cdef extern from "hton.h":
12+
cdef void pack_int16(char *buf, int16_t x);
13+
cdef void pack_int32(char *buf, int32_t x);
14+
cdef void pack_int64(char *buf, int64_t x);
15+
cdef void pack_float(char *buf, float f);
16+
cdef void pack_double(char *buf, double f);
17+
cdef int16_t unpack_int16(const char *buf);
18+
cdef int32_t unpack_int32(const char *buf);
19+
cdef int64_t unpack_int64(const char *buf);
20+
cdef float unpack_float(const char *buf);
21+
cdef double unpack_double(const char *buf);

setup.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,8 @@
2121
CFLAGS = ['-O2']
2222
LDFLAGS = []
2323

24-
if platform.uname().system == 'Windows':
25-
LDFLAGS.append('ws2_32.lib')
26-
else:
27-
CFLAGS.extend(['-Wall', '-Wsign-compare', '-Wconversion'])
24+
if platform.uname().system != 'Windows':
25+
CFLAGS.extend(['-fsigned-char', '-Wall', '-Wsign-compare', '-Wconversion'])
2826

2927

3028
class build_ext(_build_ext.build_ext):

0 commit comments

Comments
 (0)