diff --git a/parser/extractors.py b/parser/extractors.py index a5855e9..1b63588 100644 --- a/parser/extractors.py +++ b/parser/extractors.py @@ -19,16 +19,18 @@ def _canonical_spelling(ty) -> str: _BOOL_SPELLINGS = {"bool", "_Bool"} +_BOOL_TOKEN_RE = re.compile(r"\b_Bool\b") + + def _c_spelling(ty) -> str: # Return the declared C spelling, with ``_Bool`` normalised to ``"bool"``. # Two bool representations arise depending on which postgres_int_defs.h is # in play: # - PostgreSQL headers: ``typedef char bool`` -> spelling already ``"bool"`` # - Stub header: ``#define bool _Bool`` -> spelling is ``"_Bool"`` - spelling = ty.spelling - if spelling == "_Bool": - return "bool" - return spelling + # Compound types preserve the inner spelling, so ``bool *`` arrives as + # ``_Bool *``; substitute on the token rather than the whole string. + return _BOOL_TOKEN_RE.sub("bool", ty.spelling) def _canonical_c_spelling(ty) -> str: diff --git a/parser/parser.py b/parser/parser.py index b19201c..5dd3d44 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -94,8 +94,48 @@ def _dedup(items: list) -> list: return resolve_idl_types(idl, mappings_path) +# Minimal stand-in for system headers so libclang does not fall back to +# treating undeclared identifiers as int. stdbool.h is the load-bearing one: +# without it every `bool`-returning function is parsed with result_type +# TypeKind.INT. +# +# The postgres integer typedefs are the same hazard: without a real +# pg_config.h, postgres/c.h never typedefs int64, so int64 (and every +# type built on it: TimestampTz, Timestamp, TimeADT, DateADT, ...) +# collapses to implicit int and timestamp parameters are emitted 32-bit. +# These mirror MobilityDB's postgres/c.h (LP64 branch), timestamp_def.h +# and date.h exactly. +_SYSTEM_HEADER_STUBS = """ +#ifndef bool +#define bool _Bool +#endif +#ifndef true +#define true 1 +#endif +#ifndef false +#define false 0 +#endif +typedef unsigned long size_t; +typedef signed char int8; +typedef signed short int16; +typedef signed int int32; +typedef long int int64; +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long int uint64; +typedef float float4; +typedef double float8; +typedef int64 Timestamp; +typedef int64 TimestampTz; +typedef int64 TimeADT; +typedef int64 TimeOffset; +typedef int32 DateADT; +""" + + def build_entry_point(headers_dir: Path) -> str: - lines = [] + lines = [_SYSTEM_HEADER_STUBS] for h in sorted(headers_dir.glob("**/*.h")): lines.append(f'#include "{h.resolve()}"') return "\n".join(lines)