From 1942a590b4ede949a12d4e8fcd76966a3dc4a105 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:17:37 +0100 Subject: [PATCH 01/30] replace == POINTZ and in {} with compatible_with(s, Point[ZM]) --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 65ffce2..1179cbc 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1218,11 +1218,11 @@ def write_to_byte_stream( n = Point._write_x_y_to_byte_stream(b_io, x, y, i) # Write a single Z value - if s.shapeType == POINTZ: + if compatible_with(s, PointZ): n += PointZ._write_single_point_z_to_byte_stream(b_io, s, i) # Write a single M value - if s.shapeType in {POINTM, POINTZ}: + if compatible_with(s, PointM): n += PointM._write_single_point_m_to_byte_stream(b_io, s, i) return n From a460c1ca9a2cb735bc690fae5a2eed7f29116b1e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:24:08 +0100 Subject: [PATCH 02/30] REmove sets of two enum members with identity checked against both --- src/shapefile.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1179cbc..0694b0e 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2538,7 +2538,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if typ in {FieldType.N, FieldType.F}: + if typ is FieldType.N or typ is FieldType.F: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -3046,7 +3046,7 @@ def __shapefileHeader( else: f.write(pack("<4d", 0, 0, 0, 0)) # Elevation - if self.shapeType in {POINTZ} | _HasZ._shapeTypes: + if self.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes: # Z values are present in Z type zbox = self.zbox() if zbox is None: @@ -3056,7 +3056,7 @@ def __shapefileHeader( # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s zbox = ZBox(0, 0) # Measure - if self.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes: + if self.shapeType in PointM._shapeTypes | _HasM._shapeTypes: # M values are present in M or Z type mbox = self.mbox() if mbox is None: @@ -3155,7 +3155,7 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType - if s.shapeType not in {NULL, self.shapeType}: + if s.shapeType != NULL and s.shapeType != self.shapeType: raise ShapefileException( f"The shape's type ({s.shapeType}) must match " f"the type of the shapefile ({self.shapeType})." @@ -3166,11 +3166,11 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: new_bbox = self.__bbox(s) if s.shapeType != NULL else None new_mbox = ( self.__mbox(s) - if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes + if s.shapeType in _PointM._shapeTypes | _HasM._shapeTypes else None ) new_zbox = ( - self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None + self.__zbox(s) if s.shapeType in _PointZ._shapeTypes | _HasZ._shapeTypes else None ) # Create an in-memory binary buffer to avoid @@ -3266,7 +3266,7 @@ def record( def _dbf_missing_placeholder( value: RecordValue, field_type: FieldType, size: int ) -> str: - if field_type in {FieldType.N, FieldType.F}: + if field_type is FieldType.N or field_type is FieldType.F: return "*" * size # QGIS NULL if field_type is FieldType.D: return "0" * 8 # QGIS NULL for date type @@ -3354,7 +3354,7 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: if value in MISSING: str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ in {FieldType.N, FieldType.F}: + elif type_ is FieldType.N or type_ is FieldType.F: str_val = self._try_coerce_to_numeric_str(value, size, decimal) elif type_ is FieldType.D: str_val = self._try_coerce_to_date_str(value) From d0a0d5607b5ddb21cec47228b7427dc0ce5c3f9b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:41:57 +0100 Subject: [PATCH 03/30] Correct variable names and reformat --- src/shapefile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 0694b0e..2d3a440 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3166,11 +3166,13 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: new_bbox = self.__bbox(s) if s.shapeType != NULL else None new_mbox = ( self.__mbox(s) - if s.shapeType in _PointM._shapeTypes | _HasM._shapeTypes + if s.shapeType in PointM._shapeTypes | _HasM._shapeTypes else None ) new_zbox = ( - self.__zbox(s) if s.shapeType in _PointZ._shapeTypes | _HasZ._shapeTypes else None + self.__zbox(s) + if s.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes + else None ) # Create an in-memory binary buffer to avoid From 9be9b0245b3456062407c300b329bcf398643097 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:44:03 +0100 Subject: [PATCH 04/30] Reinstate a 2-tuple --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2d3a440..7119d39 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3155,7 +3155,7 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType - if s.shapeType != NULL and s.shapeType != self.shapeType: + if s.shapeType not in (NULL, self.shapeType): raise ShapefileException( f"The shape's type ({s.shapeType}) must match " f"the type of the shapefile ({self.shapeType})." From 6bc458057a81eeb558ab41482e05c172b3275bc3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 14:25:28 +0100 Subject: [PATCH 05/30] Enum free since 1st August 2025 --- src/shapefile.py | 63 ++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 7119d39..53360a7 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -12,7 +12,6 @@ import array import doctest -import enum import io import logging import os @@ -26,7 +25,9 @@ IO, Any, Container, + Final, Generic, + Hashable, Iterable, Iterator, Literal, @@ -38,6 +39,7 @@ TypedDict, TypeVar, Union, + cast, overload, ) from urllib.error import HTTPError @@ -171,22 +173,34 @@ def read(self, size: int = -1): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] +FieldTypeT = Literal["C", "D", "F", "L", "M", "N"] + # https://en.wikipedia.org/wiki/.dbf#Database_records -class FieldType(enum.Enum): - # Use an ascii-encoded byte of the name, to save a decoding step. - C = "Character" # (str) - D = "Date" - F = "Floating point" - L = "Logical" # (bool) - M = "Memo" # Legacy. (10 digit str, starting block in an .dbt file) - N = "Numeric" # (int) +class FieldType: + """A bare bones 'enum', as the enum library noticeably slows performance.""" + + # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] + + C: Final = "C" # Character" # (str) + D: Final = "D" # "Date" + F: Final = "F" # "Floating point" + L: Final = "L" # "Logical" # (bool) + M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) + N: Final = "N" # "Numeric" # (int) + __members__ = {"C", "D", "F", "L", "M", "N"} # set(__slots__) - {"__members__"} + + def raise_if_invalid(field_type: Hashable): + if field_type not in FieldType.__members__: + raise ShapefileException( + f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + ) # Use functional syntax to have an attribute named type, a Python keyword class Field(NamedTuple): name: str - field_type: FieldType + field_type: FieldTypeT size: int decimal: int @@ -194,18 +208,12 @@ class Field(NamedTuple): def from_unchecked( cls, name: str, - field_type: Union[str, FieldType] = FieldType.C, + field_type: FieldTypeT = "C", size: int = 50, decimal: int = 0, ) -> Self: - if isinstance(field_type, str): - if field_type.upper() in FieldType.__members__: - field_type = FieldType[field_type.upper()] - else: - raise ShapefileException( - "type must be C,D,F,L,M,N, or a FieldType enum member. " - f"Got: {field_type=}. " - ) + field_type = cast(FieldTypeT, field_type.upper()) + FieldType.raise_if_invalid(field_type) if field_type is FieldType.D: size = 8 @@ -221,7 +229,7 @@ def from_unchecked( ) def __repr__(self) -> str: - return f'Field(name="{self.name}", field_type=FieldType.{self.field_type.name}, size={self.size}, decimal={self.decimal})' + return f'Field(name="{self.name}", field_type=FieldType.{self.field_type}, size={self.size}, decimal={self.decimal})' RecordValueNotDate = Union[bool, int, float, str, date] @@ -1977,7 +1985,7 @@ def __seek_0_on_file_obj_wrap_or_open_from_name( if hasattr(file_, "read"): # Copy if required try: - file_.seek(0) # type: ignore + file_.seek(0) return file_ except (NameError, io.UnsupportedOperation): return io.BytesIO(file_.read()) @@ -2418,7 +2426,8 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - field_type = FieldType[encoded_type_char.decode("ascii").upper()] + field_type = cast(FieldTypeT, encoded_type_char.decode("ascii").upper()) + FieldType.raise_if_invalid(field_type) self.fields.append(Field(name, field_type, size, decimal)) terminator = dbf.read(1) @@ -2632,7 +2641,9 @@ def records(self, fields: Optional[list[str]] = None) -> list[_Record]: f = self.__getFileObj(self.dbf) f.seek(self.__dbfHdrLength) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in range(self.numRecords): # type: ignore + # self.__dbfHeader() sets self.numRecords, so it's fine to cast it to int + # (to tell mypy it's not None). + for i in range(cast(int, self.numRecords)): r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) @@ -3111,7 +3122,7 @@ def __dbfHeader(self) -> None: encoded_name = field.name.encode(self.encoding, self.encodingErrors) encoded_name = encoded_name.replace(b" ", b"_") encoded_name = encoded_name[:10].ljust(11).replace(b" ", b"\x00") - encodedFieldType = field.field_type.name.encode("ascii") + encodedFieldType = field.field_type.encode("ascii") fld = pack( "<11sc4xBB14x", encoded_name, @@ -3266,7 +3277,7 @@ def record( @staticmethod def _dbf_missing_placeholder( - value: RecordValue, field_type: FieldType, size: int + value: RecordValue, field_type: FieldTypeT, size: int ) -> str: if field_type is FieldType.N or field_type is FieldType.F: return "*" * size # QGIS NULL @@ -3539,7 +3550,7 @@ def field( # Types of args should match *Field self, name: str, - field_type: Union[str, FieldType] = FieldType.C, + field_type: FieldTypeT = "C", size: int = 50, decimal: int = 0, ) -> None: From 17a89c4fae943382327eec219bb1e753d7ff0497 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 15:00:10 +0100 Subject: [PATCH 06/30] Use equality instead of identity tests with single character strings. --- src/shapefile.py | 34 ++++++++++++++++++++-------------- test_shapefile.py | 2 +- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 53360a7..c11da9f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -19,6 +19,7 @@ import tempfile import time import zipfile +from collections.abc import Hashable from datetime import date from struct import Struct, calcsize, error, pack, unpack from typing import ( @@ -27,7 +28,6 @@ Container, Final, Generic, - Hashable, Iterable, Iterator, Literal, @@ -180,7 +180,7 @@ def read(self, size: int = -1): ... class FieldType: """A bare bones 'enum', as the enum library noticeably slows performance.""" - # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] + # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__", "raise_if_invalid", "is_numeric"] C: Final = "C" # Character" # (str) D: Final = "D" # "Date" @@ -190,12 +190,17 @@ class FieldType: N: Final = "N" # "Numeric" # (int) __members__ = {"C", "D", "F", "L", "M", "N"} # set(__slots__) - {"__members__"} + @staticmethod def raise_if_invalid(field_type: Hashable): if field_type not in FieldType.__members__: raise ShapefileException( f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " ) + @classmethod + def is_numeric(cls, member: FieldTypeT): + return member in (cls.F, cls.N) + # Use functional syntax to have an attribute named type, a Python keyword class Field(NamedTuple): @@ -215,10 +220,10 @@ def from_unchecked( field_type = cast(FieldTypeT, field_type.upper()) FieldType.raise_if_invalid(field_type) - if field_type is FieldType.D: + if field_type == FieldType.D: size = 8 decimal = 0 - elif field_type is FieldType.L: + elif field_type == FieldType.L: size = 1 decimal = 0 @@ -2426,7 +2431,8 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - field_type = cast(FieldTypeT, encoded_type_char.decode("ascii").upper()) + decoded_type_char = encoded_type_char.upper().decode("ascii") + field_type: FieldTypeT = getattr(FieldType, decoded_type_char) FieldType.raise_if_invalid(field_type) self.fields.append(Field(name, field_type, size, decimal)) @@ -2547,7 +2553,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if typ is FieldType.N or typ is FieldType.F: + if FieldType.is_numeric(typ): # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2573,7 +2579,7 @@ def __record( except ValueError: # not parseable as int, set to None value = None - elif typ is FieldType.D: + elif typ == FieldType.D: # date: 8 bytes - date stored as a string in the format YYYYMMDD. if ( not value.replace(b"\x00", b"") @@ -2591,7 +2597,7 @@ def __record( except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) - elif typ is FieldType.L: + elif typ == FieldType.L: # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": value = None # space means missing or not yet set @@ -3279,11 +3285,11 @@ def record( def _dbf_missing_placeholder( value: RecordValue, field_type: FieldTypeT, size: int ) -> str: - if field_type is FieldType.N or field_type is FieldType.F: + if FieldType.is_numeric(field_type): return "*" * size # QGIS NULL - if field_type is FieldType.D: + if field_type == FieldType.D: return "0" * 8 # QGIS NULL for date type - if field_type is FieldType.L: + if field_type == FieldType.L: return " " return str(value)[:size].ljust(size) @@ -3367,11 +3373,11 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: if value in MISSING: str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ is FieldType.N or type_ is FieldType.F: + elif FieldType.is_numeric(type_): str_val = self._try_coerce_to_numeric_str(value, size, decimal) - elif type_ is FieldType.D: + elif type_ == FieldType.D: str_val = self._try_coerce_to_date_str(value) - elif type_ is FieldType.L: + elif type_ == FieldType.L: str_val = self._try_coerce_to_logical_str(value) else: if isinstance(value, bytes): diff --git a/test_shapefile.py b/test_shapefile.py index a2ffbff..2a10d3e 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -695,7 +695,7 @@ def test_reader_fields(): field = fields[0] assert isinstance(field[0], str) # field name - assert field[1].name in ["C", "N", "F", "L", "D", "M"] # field type + assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type assert isinstance(field[2], int) # field length assert isinstance(field[3], int) # decimal length From 4846dd8daf5b86114444b1f8d46eabd7fd57594f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 15:36:27 +0100 Subject: [PATCH 07/30] Use a single FieldType mapping --- src/shapefile.py | 50 +++++++++++++++++++++++++++++++---------------- test_shapefile.py | 2 +- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 53360a7..f4bef2e 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -27,7 +27,6 @@ Container, Final, Generic, - Hashable, Iterable, Iterator, Literal, @@ -182,19 +181,34 @@ class FieldType: # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] - C: Final = "C" # Character" # (str) + C: Final = "C" # "Character" # (str) D: Final = "D" # "Date" F: Final = "F" # "Floating point" L: Final = "L" # "Logical" # (bool) M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) N: Final = "N" # "Numeric" # (int) - __members__ = {"C", "D", "F", "L", "M", "N"} # set(__slots__) - {"__members__"} - - def raise_if_invalid(field_type: Hashable): - if field_type not in FieldType.__members__: - raise ShapefileException( - f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " - ) + __members__: set[FieldTypeT] = { + "C", + "D", + "F", + "L", + "M", + "N", + } # set(__slots__) - {"__members__"} + + # def raise_if_invalid(field_type: Hashable): + # if field_type not in FieldType.__members__: + # raise ShapefileException( + # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + # ) + + +FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} +for c in FieldType.__members__: + FIELD_TYPE_ALIASES[c.upper()] = c + FIELD_TYPE_ALIASES[c.lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c # Use functional syntax to have an attribute named type, a Python keyword @@ -208,24 +222,27 @@ class Field(NamedTuple): def from_unchecked( cls, name: str, - field_type: FieldTypeT = "C", + field_type: Union[str, bytes, FieldTypeT] = "C", size: int = 50, decimal: int = 0, ) -> Self: - field_type = cast(FieldTypeT, field_type.upper()) - FieldType.raise_if_invalid(field_type) + if field_type not in FIELD_TYPE_ALIASES: + raise ShapefileException( + f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + ) + type_ = FIELD_TYPE_ALIASES[field_type] - if field_type is FieldType.D: + if type_ is FieldType.D: size = 8 decimal = 0 - elif field_type is FieldType.L: + elif type_ is FieldType.L: size = 1 decimal = 0 # A doctest in README.md previously passed in a string ('40') for size, # so explictly convert name to str, and size and decimal to ints. return cls( - name=str(name), field_type=field_type, size=int(size), decimal=int(decimal) + name=str(name), field_type=type_, size=int(size), decimal=int(decimal) ) def __repr__(self) -> str: @@ -2426,8 +2443,7 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - field_type = cast(FieldTypeT, encoded_type_char.decode("ascii").upper()) - FieldType.raise_if_invalid(field_type) + field_type = FIELD_TYPE_ALIASES[encoded_type_char] self.fields.append(Field(name, field_type, size, decimal)) terminator = dbf.read(1) diff --git a/test_shapefile.py b/test_shapefile.py index a2ffbff..2a10d3e 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -695,7 +695,7 @@ def test_reader_fields(): field = fields[0] assert isinstance(field[0], str) # field name - assert field[1].name in ["C", "N", "F", "L", "D", "M"] # field type + assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type assert isinstance(field[2], int) # field length assert isinstance(field[3], int) # decimal length From 9dd687c68a5e0f93414f2bbc6c5fce5cc015c5ae Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 16:32:55 +0100 Subject: [PATCH 08/30] Ditch BBox, ZBox and MBox namedtuples --- src/shapefile.py | 79 ++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index f4bef2e..4c3aa80 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -123,22 +123,25 @@ PointT = Union[Point2D, PointMT, PointZT] PointsT = list[PointT] +BBox = tuple[float, float, float, float] +MBox = tuple[float, float] +ZBox = tuple[float, float] -class BBox(NamedTuple): - xmin: float - ymin: float - xmax: float - ymax: float +# class BBox(NamedTuple): +# xmin: float +# ymin: float +# xmax: float +# ymax: float -class MBox(NamedTuple): - mmin: Optional[float] - mmax: Optional[float] +# class MBox(NamedTuple): +# mmin: Optional[float] +# mmax: Optional[float] -class ZBox(NamedTuple): - zmin: float - zmax: float +# class ZBox(NamedTuple): +# zmin: float +# zmax: float class WriteableBinStream(Protocol): @@ -415,8 +418,9 @@ def rewind(coords: Reversible[PointT]) -> PointsT: def ring_bbox(coords: PointsT) -> BBox: """Calculates and returns the bounding box of a ring.""" xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values - bbox = BBox(xmin=min(xs), ymin=min(ys), xmax=max(xs), ymax=max(ys)) - return bbox + # bbox = BBox(xmin=min(xs), ymin=min(ys), xmax=max(xs), ymax=max(ys)) + # return bbox + return min(xs), min(ys), max(xs), max(ys) def bbox_overlap(bbox1: BBox, bbox2: BBox) -> bool: @@ -998,7 +1002,7 @@ class _CanHaveBBox(Shape): bbox: Optional[BBox] = None def _get_set_bbox_from_byte_stream(self, b_io: ReadableBinStream) -> BBox: - self.bbox: BBox = BBox(*_Array[float]("d", unpack("<4d", b_io.read(32)))) + self.bbox: BBox = tuple(*_Array[float]("d", unpack("<4d", b_io.read(32)))) return self.bbox @staticmethod @@ -1218,7 +1222,7 @@ def from_byte_stream( if bbox is not None: # create bounding box for Point by duplicating coordinates # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, BBox(x, y, x, y)): + if not bbox_overlap(bbox, (x, y, x, y)): return None shape.points = [(x, y)] @@ -2233,18 +2237,21 @@ def __shpHeader(self) -> None: shp.seek(32) self.shapeType = unpack("= NODATA else None for m_bound in unpack("<2d", shp.read(16)) ] - self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) + # self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) + self.mbox = (m_bounds[0], m_bounds[1]) def __shape( self, oid: Optional[int] = None, bbox: Optional[BBox] = None @@ -2953,10 +2960,10 @@ def __bbox(self, s: Shape) -> BBox: y: list[float] = [] if self._bbox: - x.append(self._bbox.xmin) - y.append(self._bbox.ymin) - x.append(self._bbox.xmax) - y.append(self._bbox.ymax) + x.append(self._bbox[0]) + y.append(self._bbox.[1]) + x.append(self._bbox.[2]) + y.append(self._bbox.[3]) if len(s.points) > 0: px, py = list(zip(*s.points))[:2] @@ -2970,7 +2977,8 @@ def __bbox(self, s: Shape) -> BBox: "Cannot create bbox. Expected a valid shape with at least one point. " f"Got a shape of type '{s.shapeType}' and 0 points." ) - self._bbox = BBox(xmin=min(x), ymin=min(y), xmax=max(x), ymax=max(y)) + # self._bbox = BBox(xmin=min(x), ymin=min(y), xmax=max(x), ymax=max(y)) + self._bbox = (min(x), min(y), max(x), max(y)) return self._bbox def __zbox(self, s) -> ZBox: @@ -2988,7 +2996,8 @@ def __zbox(self, s) -> ZBox: # Original self._zbox bounds (if any) are the first two entries. # Set zbox for the first, and all later times - self._zbox = ZBox(zmin=min(z), zmax=max(z)) + # self._zbox = ZBox(zmin=min(z), zmax=max(z)) + self._zbox = (min(z), max(z)) return self._zbox def __mbox(self, s) -> MBox: @@ -3012,7 +3021,8 @@ def __mbox(self, s) -> MBox: # Original self._mbox bounds (if any) are the first two entries. # Set mbox for the first, and all later times - self._mbox = MBox(mmin=min(m), mmax=max(m)) + # self._mbox = MBox(mmin=min(m), mmax=max(m)) + self._mbox = (min(m), max(m)) return self._mbox @property @@ -3064,7 +3074,8 @@ def __shapefileHeader( # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = BBox(0, 0, 0, 0) + # bbox = BBox(0, 0, 0, 0) + bbox = (0, 0, 0, 0) f.write(pack("<4d", *bbox)) except error: raise ShapefileException( @@ -3078,20 +3089,24 @@ def __shapefileHeader( zbox = self.zbox() if zbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = ZBox(0, 0) + # zbox = ZBox(0, 0) + zbox = (0, 0) else: # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = ZBox(0, 0) + # zbox = ZBox(0, 0) + zbox = (0, 0) # Measure if self.shapeType in PointM._shapeTypes | _HasM._shapeTypes: # M values are present in M or Z type mbox = self.mbox() if mbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = MBox(0, 0) + # mbox = MBox(0, 0) + mbox = (0, 0) else: # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = MBox(0, 0) + # mbox = MBox(0, 0) + mbox = (0, 0) # Try writing try: f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) From dc4511a145f788c4b9a0322e428c7326eeaab38e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 16:34:54 +0100 Subject: [PATCH 09/30] Remove errant dot --- src/shapefile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 4c3aa80..54f7c21 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2961,9 +2961,9 @@ def __bbox(self, s: Shape) -> BBox: if self._bbox: x.append(self._bbox[0]) - y.append(self._bbox.[1]) - x.append(self._bbox.[2]) - y.append(self._bbox.[3]) + y.append(self._bbox[1]) + x.append(self._bbox[2]) + y.append(self._bbox[3]) if len(s.points) > 0: px, py = list(zip(*s.points))[:2] From 60265b609f3a30f6cc7071823adfdc4797820c7a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 16:37:13 +0100 Subject: [PATCH 10/30] Remove errant * --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 54f7c21..83b54e0 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1002,7 +1002,7 @@ class _CanHaveBBox(Shape): bbox: Optional[BBox] = None def _get_set_bbox_from_byte_stream(self, b_io: ReadableBinStream) -> BBox: - self.bbox: BBox = tuple(*_Array[float]("d", unpack("<4d", b_io.read(32)))) + self.bbox: BBox = tuple(_Array[float]("d", unpack("<4d", b_io.read(32)))) return self.bbox @staticmethod From 3e286003fbfae77fd13d9a3187d61972cc3f3537 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:02:30 +0100 Subject: [PATCH 11/30] Fix type checking. Lose unneccessary conversions to _Array --- src/shapefile.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 83b54e0..1ddd492 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -124,7 +124,7 @@ PointsT = list[PointT] BBox = tuple[float, float, float, float] -MBox = tuple[float, float] +MBox = tuple[Optional[float], Optional[float]] ZBox = tuple[float, float] # class BBox(NamedTuple): @@ -1002,7 +1002,7 @@ class _CanHaveBBox(Shape): bbox: Optional[BBox] = None def _get_set_bbox_from_byte_stream(self, b_io: ReadableBinStream) -> BBox: - self.bbox: BBox = tuple(_Array[float]("d", unpack("<4d", b_io.read(32)))) + self.bbox: BBox = unpack("<4d", b_io.read(32)) return self.bbox @staticmethod @@ -1192,7 +1192,7 @@ def _set_single_point_m_from_byte_stream( @staticmethod def _x_y_from_byte_stream(b_io: ReadableBinStream): # Unpack _Array too - x, y = _Array[float]("d", unpack("<2d", b_io.read(16))) + x, y = unpack("<2d", b_io.read(16)) # Convert to tuple return x, y @@ -1298,7 +1298,7 @@ def _set_ms_from_byte_stream( # Measure values less than -10e38 are nodata values according to the spec if next_shape - b_io.tell() >= nPoints * 8: self.m = [] - for m in _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))): + for m in unpack(f"<{nPoints}d", b_io.read(nPoints * 8)): if m > NODATA: self.m.append(m) else: @@ -2237,11 +2237,13 @@ def __shpHeader(self) -> None: shp.seek(32) self.shapeType = unpack(" None: for m_bound in unpack("<2d", shp.read(16)) ] # self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) - self.mbox = (m_bounds[0], m_bounds[1]) + self.mbox: MBox = (m_bounds[0], m_bounds[1]) def __shape( self, oid: Optional[int] = None, bbox: Optional[BBox] = None From 10703eef413cf4358e3b0b44db545eb77502397d Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:18:27 +0100 Subject: [PATCH 12/30] Get rid of the Faux-enum Don't rely on FIELD_TYPE_ALIASES --- src/shapefile.py | 103 ++++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1ddd492..e66313f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -175,43 +175,43 @@ def read(self, size: int = -1): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] +# https://en.wikipedia.org/wiki/.dbf#Database_records FieldTypeT = Literal["C", "D", "F", "L", "M", "N"] -# https://en.wikipedia.org/wiki/.dbf#Database_records -class FieldType: - """A bare bones 'enum', as the enum library noticeably slows performance.""" - - # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] - - C: Final = "C" # "Character" # (str) - D: Final = "D" # "Date" - F: Final = "F" # "Floating point" - L: Final = "L" # "Logical" # (bool) - M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) - N: Final = "N" # "Numeric" # (int) - __members__: set[FieldTypeT] = { - "C", - "D", - "F", - "L", - "M", - "N", - } # set(__slots__) - {"__members__"} - - # def raise_if_invalid(field_type: Hashable): - # if field_type not in FieldType.__members__: - # raise ShapefileException( - # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " - # ) - - -FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} -for c in FieldType.__members__: - FIELD_TYPE_ALIASES[c.upper()] = c - FIELD_TYPE_ALIASES[c.lower()] = c - FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c - FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c +# class FieldType: +# """A bare bones 'enum', as the enum library noticeably slows performance.""" + +# # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] + +# C: Final = "C" # "Character" # (str) +# D: Final = "D" # "Date" +# F: Final = "F" # "Floating point" +# L: Final = "L" # "Logical" # (bool) +# M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) +# N: Final = "N" # "Numeric" # (int) +# __members__: set[FieldTypeT] = { +# "C", +# "D", +# "F", +# "L", +# "M", +# "N", +# } # set(__slots__) - {"__members__"} + +# # def raise_if_invalid(field_type: Hashable): +# # if field_type not in FieldType.__members__: +# # raise ShapefileException( +# # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " +# # ) + +FIELD_TYPE_ALIASES = dict.fromkeys("CDFLMN") +# FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} +# for c in FieldType.__members__: +# FIELD_TYPE_ALIASES[c.upper()] = c +# FIELD_TYPE_ALIASES[c.lower()] = c +# FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c +# FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c # Use functional syntax to have an attribute named type, a Python keyword @@ -231,25 +231,25 @@ def from_unchecked( ) -> Self: if field_type not in FIELD_TYPE_ALIASES: raise ShapefileException( - f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + f"field_type must be in {FIELD_TYPE_ALIASES}. Got: {field_type=}. " ) - type_ = FIELD_TYPE_ALIASES[field_type] + # type_ = FIELD_TYPE_ALIASES[field_type] - if type_ is FieldType.D: + if field_type == "D": size = 8 decimal = 0 - elif type_ is FieldType.L: + elif field_type == "L": size = 1 decimal = 0 # A doctest in README.md previously passed in a string ('40') for size, # so explictly convert name to str, and size and decimal to ints. return cls( - name=str(name), field_type=type_, size=int(size), decimal=int(decimal) + name=str(name), field_type=field_type, size=int(size), decimal=int(decimal) ) def __repr__(self) -> str: - return f'Field(name="{self.name}", field_type=FieldType.{self.field_type}, size={self.size}, decimal={self.decimal})' + return f'Field(name="{self.name}", field_type="{self.field_type}", size={self.size}, decimal={self.decimal})' RecordValueNotDate = Union[bool, int, float, str, date] @@ -2452,7 +2452,8 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - field_type = FIELD_TYPE_ALIASES[encoded_type_char] + # field_type = FIELD_TYPE_ALIASES[encoded_type_char] + field_type = encoded_type_char.decode("ascii") self.fields.append(Field(name, field_type, size, decimal)) terminator = dbf.read(1) @@ -2462,7 +2463,7 @@ def __dbfHeader(self) -> None: ) # insert deletion field at start - self.fields.insert(0, Field("DeletionFlag", FieldType.C, 1, 0)) + self.fields.insert(0, Field("DeletionFlag", "C", 1, 0)) # store all field positions for easy lookups # note: fieldLookup gives the index position of a field inside Reader.fields @@ -2572,7 +2573,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if typ is FieldType.N or typ is FieldType.F: + if typ in ("N", "F"): #typ is FieldType.F: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2598,7 +2599,7 @@ def __record( except ValueError: # not parseable as int, set to None value = None - elif typ is FieldType.D: + elif typ == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. if ( not value.replace(b"\x00", b"") @@ -2616,7 +2617,7 @@ def __record( except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) - elif typ is FieldType.L: + elif typ == "L": # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": value = None # space means missing or not yet set @@ -3312,11 +3313,11 @@ def record( def _dbf_missing_placeholder( value: RecordValue, field_type: FieldTypeT, size: int ) -> str: - if field_type is FieldType.N or field_type is FieldType.F: + if field_type in ("N", "F"): #field_type is FieldType.F: return "*" * size # QGIS NULL - if field_type is FieldType.D: + if field_type == "D": return "0" * 8 # QGIS NULL for date type - if field_type is FieldType.L: + if field_type == "L": return " " return str(value)[:size].ljust(size) @@ -3400,11 +3401,11 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: if value in MISSING: str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ is FieldType.N or type_ is FieldType.F: + elif type_ in ("N", "F"): #type_ is FieldType.F: str_val = self._try_coerce_to_numeric_str(value, size, decimal) - elif type_ is FieldType.D: + elif type_ == "D": str_val = self._try_coerce_to_date_str(value) - elif type_ is FieldType.L: + elif type_ == "L": str_val = self._try_coerce_to_logical_str(value) else: if isinstance(value, bytes): From 5a77db3d553ab6b941bae0a33cbb6945e2446d51 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:26:14 +0100 Subject: [PATCH 13/30] Revert "Get rid of the Faux-enum" This reverts commit 10703eef413cf4358e3b0b44db545eb77502397d. --- src/shapefile.py | 103 +++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 52 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index e66313f..1ddd492 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -175,43 +175,43 @@ def read(self, size: int = -1): ... BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] -# https://en.wikipedia.org/wiki/.dbf#Database_records FieldTypeT = Literal["C", "D", "F", "L", "M", "N"] -# class FieldType: -# """A bare bones 'enum', as the enum library noticeably slows performance.""" - -# # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] - -# C: Final = "C" # "Character" # (str) -# D: Final = "D" # "Date" -# F: Final = "F" # "Floating point" -# L: Final = "L" # "Logical" # (bool) -# M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) -# N: Final = "N" # "Numeric" # (int) -# __members__: set[FieldTypeT] = { -# "C", -# "D", -# "F", -# "L", -# "M", -# "N", -# } # set(__slots__) - {"__members__"} - -# # def raise_if_invalid(field_type: Hashable): -# # if field_type not in FieldType.__members__: -# # raise ShapefileException( -# # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " -# # ) - -FIELD_TYPE_ALIASES = dict.fromkeys("CDFLMN") -# FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} -# for c in FieldType.__members__: -# FIELD_TYPE_ALIASES[c.upper()] = c -# FIELD_TYPE_ALIASES[c.lower()] = c -# FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c -# FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c +# https://en.wikipedia.org/wiki/.dbf#Database_records +class FieldType: + """A bare bones 'enum', as the enum library noticeably slows performance.""" + + # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] + + C: Final = "C" # "Character" # (str) + D: Final = "D" # "Date" + F: Final = "F" # "Floating point" + L: Final = "L" # "Logical" # (bool) + M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) + N: Final = "N" # "Numeric" # (int) + __members__: set[FieldTypeT] = { + "C", + "D", + "F", + "L", + "M", + "N", + } # set(__slots__) - {"__members__"} + + # def raise_if_invalid(field_type: Hashable): + # if field_type not in FieldType.__members__: + # raise ShapefileException( + # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + # ) + + +FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} +for c in FieldType.__members__: + FIELD_TYPE_ALIASES[c.upper()] = c + FIELD_TYPE_ALIASES[c.lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c # Use functional syntax to have an attribute named type, a Python keyword @@ -231,25 +231,25 @@ def from_unchecked( ) -> Self: if field_type not in FIELD_TYPE_ALIASES: raise ShapefileException( - f"field_type must be in {FIELD_TYPE_ALIASES}. Got: {field_type=}. " + f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " ) - # type_ = FIELD_TYPE_ALIASES[field_type] + type_ = FIELD_TYPE_ALIASES[field_type] - if field_type == "D": + if type_ is FieldType.D: size = 8 decimal = 0 - elif field_type == "L": + elif type_ is FieldType.L: size = 1 decimal = 0 # A doctest in README.md previously passed in a string ('40') for size, # so explictly convert name to str, and size and decimal to ints. return cls( - name=str(name), field_type=field_type, size=int(size), decimal=int(decimal) + name=str(name), field_type=type_, size=int(size), decimal=int(decimal) ) def __repr__(self) -> str: - return f'Field(name="{self.name}", field_type="{self.field_type}", size={self.size}, decimal={self.decimal})' + return f'Field(name="{self.name}", field_type=FieldType.{self.field_type}, size={self.size}, decimal={self.decimal})' RecordValueNotDate = Union[bool, int, float, str, date] @@ -2452,8 +2452,7 @@ def __dbfHeader(self) -> None: name = encoded_name.decode(self.encoding, self.encodingErrors) name = name.lstrip() - # field_type = FIELD_TYPE_ALIASES[encoded_type_char] - field_type = encoded_type_char.decode("ascii") + field_type = FIELD_TYPE_ALIASES[encoded_type_char] self.fields.append(Field(name, field_type, size, decimal)) terminator = dbf.read(1) @@ -2463,7 +2462,7 @@ def __dbfHeader(self) -> None: ) # insert deletion field at start - self.fields.insert(0, Field("DeletionFlag", "C", 1, 0)) + self.fields.insert(0, Field("DeletionFlag", FieldType.C, 1, 0)) # store all field positions for easy lookups # note: fieldLookup gives the index position of a field inside Reader.fields @@ -2573,7 +2572,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if typ in ("N", "F"): #typ is FieldType.F: + if typ is FieldType.N or typ is FieldType.F: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2599,7 +2598,7 @@ def __record( except ValueError: # not parseable as int, set to None value = None - elif typ == "D": + elif typ is FieldType.D: # date: 8 bytes - date stored as a string in the format YYYYMMDD. if ( not value.replace(b"\x00", b"") @@ -2617,7 +2616,7 @@ def __record( except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) - elif typ == "L": + elif typ is FieldType.L: # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": value = None # space means missing or not yet set @@ -3313,11 +3312,11 @@ def record( def _dbf_missing_placeholder( value: RecordValue, field_type: FieldTypeT, size: int ) -> str: - if field_type in ("N", "F"): #field_type is FieldType.F: + if field_type is FieldType.N or field_type is FieldType.F: return "*" * size # QGIS NULL - if field_type == "D": + if field_type is FieldType.D: return "0" * 8 # QGIS NULL for date type - if field_type == "L": + if field_type is FieldType.L: return " " return str(value)[:size].ljust(size) @@ -3401,11 +3400,11 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: if value in MISSING: str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ in ("N", "F"): #type_ is FieldType.F: + elif type_ is FieldType.N or type_ is FieldType.F: str_val = self._try_coerce_to_numeric_str(value, size, decimal) - elif type_ == "D": + elif type_ is FieldType.D: str_val = self._try_coerce_to_date_str(value) - elif type_ == "L": + elif type_ is FieldType.L: str_val = self._try_coerce_to_logical_str(value) else: if isinstance(value, bytes): From 8de848db0bcfae16316b7000d22892739670e23c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:33:35 +0100 Subject: [PATCH 14/30] Remove old namedtuple names from doctests --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cd4758c..66951be 100644 --- a/README.md +++ b/README.md @@ -430,7 +430,7 @@ and the bounding box area the shapefile covers: >>> len(sf) 663 >>> sf.bbox - BBox(xmin=-122.515048, ymin=37.652916, xmax=-122.327622, ymax=37.863433) + (-122.515048, 37.652916, -122.327622, 37.863433) Finally, if you would prefer to work with the entire shapefile in a different format, you can convert all of it to a GeoJSON dictionary, although you may lose @@ -1388,7 +1388,7 @@ Shapefiles containing M-values can be examined in several ways: >>> r = shapefile.Reader('shapefiles/test/linem') >>> r.mbox # the lower and upper bound of M-values in the shapefile - MBox(mmin=0.0, mmax=3.0) + (0.0, 3.0) >>> r.shape(0).m # flat list of M-values [0.0, None, 3.0, None, 0.0, None, None] @@ -1421,7 +1421,7 @@ To examine a Z-type shapefile you can do: >>> r = shapefile.Reader('shapefiles/test/linez') >>> r.zbox # the lower and upper bound of Z-values in the shapefile - ZBox(zmin=0.0, zmax=22.0) + (0.0, 22.0) >>> r.shape(0).z # flat list of Z-values [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] From 960a7b28fa048285d169110a3713214caa829d3b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:41:58 +0100 Subject: [PATCH 15/30] Restore original numerical coercion code --- src/shapefile.py | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1ddd492..90edcec 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3333,27 +3333,47 @@ def _try_coerce_to_numeric_str(value, size, decimal): # numeric or float: number stored as a string, # right justified, and padded with blanks # to the width of the field. + if not decimal: # force to int try: # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - int_val = int(value) + value = int(value) except ValueError: # forcing directly to int failed, so was probably a float. - int_val = int(float(value)) - except TypeError: - raise ShapefileException(f"Could not form int from: {value}") - # length capped to the field size - return format(int_val, "d")[:size].rjust(size) - - try: - f_val = float(value) - except ValueError: - raise ShapefileException(f"Could not form float from: {value}") - # length capped to the field size - return format(f_val, f".{decimal}f")[:size].rjust(size) + value = int(float(value)) + return = format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + value = float(value) + return format(value, f".{deci}f")[:size].rjust( + size + ) # caps the size if exceeds the field size + + # if not decimal: + # # force to int + # try: + # # first try to force directly to int. + # # forcing a large int to float and back to int + # # will lose information and result in wrong nr. + # int_val = int(value) + # except ValueError: + # # forcing directly to int failed, so was probably a float. + # int_val = int(float(value)) + # except TypeError: + # raise ShapefileException(f"Could not form int from: {value}") + # # length capped to the field size + # return format(int_val, "d")[:size].rjust(size) + + # try: + # f_val = float(value) + # except ValueError: + # raise ShapefileException(f"Could not form float from: {value}") + # # length capped to the field size + # return format(f_val, f".{decimal}f")[:size].rjust(size) @staticmethod def _try_coerce_to_date_str(value: RecordValue) -> str: From 730da43e40156aa6e51c16ec91deb9de4af3866c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:43:07 +0100 Subject: [PATCH 16/30] Fix return statement --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 90edcec..fe98001 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3344,7 +3344,7 @@ def _try_coerce_to_numeric_str(value, size, decimal): except ValueError: # forcing directly to int failed, so was probably a float. value = int(float(value)) - return = format(value, "d")[:size].rjust( + return format(value, "d")[:size].rjust( size ) # caps the size if exceeds the field size else: From d55f76caef6e6cbabbcff59565ad8f7e02b19a39 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:43:48 +0100 Subject: [PATCH 17/30] Correct arg name --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index fe98001..685362c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3349,7 +3349,7 @@ def _try_coerce_to_numeric_str(value, size, decimal): ) # caps the size if exceeds the field size else: value = float(value) - return format(value, f".{deci}f")[:size].rjust( + return format(value, f".{decimal}f")[:size].rjust( size ) # caps the size if exceeds the field size From 3b057cd49a1819d637de8193a8c35431581c25a2 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:54:44 +0100 Subject: [PATCH 18/30] Add in entire old __dbfRecord method --- src/shapefile.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 685362c..9d1019f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3398,7 +3398,11 @@ def _try_coerce_to_logical_str(value: RecordValue) -> str: return "F" return " " # unknown is set to space - def __dbfRecord(self, record: list[RecordValue]) -> None: + + + + + def __newdbfRecord(self, record: list[RecordValue]) -> None: """Writes the dbf records.""" f = self.__getFileObj(self.dbf) if self.recNum == 0: @@ -3445,6 +3449,91 @@ def __dbfRecord(self, record: list[RecordValue]) -> None: ) f.write(encoded_val) + + + def __dbfRecord(self, record): + """Writes the dbf records.""" + f = self.__getFileObj(self.dbf) + if self.recNum == 0: + # first records, so all fields should be set + # allowing us to write the dbf header + # cannot change the fields after this point + self.__dbfHeader() + # first byte of the record is deletion flag, always disabled + f.write(b" ") + # begin + self.recNum += 1 + fields = ( + field for field in self.fields if field[0] != "DeletionFlag" + ) # ignore deletionflag field in case it was specified + for (fieldName, fieldType, size, deci), value in zip(fields, record): + # write + fieldType = fieldType.upper() + size = int(size) + if fieldType in ("N", "F"): + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + value = b"*" * size # QGIS NULL + elif not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) + value = format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + value = float(value) + value = format(value, f".{deci}f")[:size].rjust( + size + ) # caps the size if exceeds the field size + elif fieldType == "D": + # date: 8 bytes - date stored as a string in the format YYYYMMDD. + if isinstance(value, date): + value = f"{value.year:04d}{value.month:02d}{value.day:02d}" + elif isinstance(value, list) and len(value) == 3: + value = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" + elif value in MISSING: + value = b"0" * 8 # QGIS NULL for date type + elif is_string(value) and len(value) == 8: + pass # value is already a date string + else: + raise ShapefileException( + "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." + ) + elif fieldType == "L": + # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. + if value in MISSING: + value = b" " # missing is set to space + elif value in [True, 1]: + value = b"T" + elif value in [False, 0]: + value = b"F" + else: + value = b" " # unknown is set to space + else: + # anything else is forced to string, truncated to the length of the field + # value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) + value = str(value).encode(self.encoding, self.encodingErrors)[:size].ljust(size) + if not isinstance(value, bytes): + # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) + # value = b( + # value, "ascii", self.encodingErrors + # ) # should be default ascii encoding + value = value.encode('ascii', self.encodingErrors) + if len(value) != size: + raise ShapefileException( + "Shapefile Writer unable to pack incorrect sized value" + f" (size {len(value)}) into field '{fieldName}' (size {size})." + ) + f.write(value) + + def balance(self) -> None: """Adds corresponding empty attributes or null geometry records depending on which type of record was created to make sure all three files From 38a3f41d1f02c3f3f8daae83d90bf640c90d9e9c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 19:22:01 +0100 Subject: [PATCH 19/30] Refactor coercer into method. Remove call to deleted helper function --- src/shapefile.py | 90 +++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 35 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9d1019f..536e8fb 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3347,11 +3347,10 @@ def _try_coerce_to_numeric_str(value, size, decimal): return format(value, "d")[:size].rjust( size ) # caps the size if exceeds the field size - else: - value = float(value) - return format(value, f".{decimal}f")[:size].rjust( - size - ) # caps the size if exceeds the field size + value = float(value) + return format(value, f".{decimal}f")[:size].rjust( + size + ) # caps the size if exceeds the field size # if not decimal: # # force to int @@ -3398,10 +3397,6 @@ def _try_coerce_to_logical_str(value: RecordValue) -> str: return "F" return " " # unknown is set to space - - - - def __newdbfRecord(self, record: list[RecordValue]) -> None: """Writes the dbf records.""" f = self.__getFileObj(self.dbf) @@ -3449,7 +3444,28 @@ def __newdbfRecord(self, record: list[RecordValue]) -> None: ) f.write(encoded_val) - + def _original_coerce_to_numeric_str(self, value, size, deci): + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + return b"*" * size # QGIS NULL + if not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) + return format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + + value = float(value) + return format(value, f".{deci}f")[:size].rjust( + size + ) # caps the size if exceeds the field size def __dbfRecord(self, record): """Writes the dbf records.""" @@ -3471,27 +3487,28 @@ def __dbfRecord(self, record): fieldType = fieldType.upper() size = int(size) if fieldType in ("N", "F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - value = b"*" * size # QGIS NULL - elif not deci: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - value = format(value, "d")[:size].rjust( - size - ) # caps the size if exceeds the field size - else: - value = float(value) - value = format(value, f".{deci}f")[:size].rjust( - size - ) # caps the size if exceeds the field size + value = self._original_coerce_to_numeric_str(value, size, deci) + # # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + # if value in MISSING: + # value = b"*" * size # QGIS NULL + # elif not deci: + # # force to int + # try: + # # first try to force directly to int. + # # forcing a large int to float and back to int + # # will lose information and result in wrong nr. + # value = int(value) + # except ValueError: + # # forcing directly to int failed, so was probably a float. + # value = int(float(value)) + # value = format(value, "d")[:size].rjust( + # size + # ) # caps the size if exceeds the field size + # else: + # value = float(value) + # value = format(value, f".{deci}f")[:size].rjust( + # size + # ) # caps the size if exceeds the field size elif fieldType == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. if isinstance(value, date): @@ -3500,7 +3517,7 @@ def __dbfRecord(self, record): value = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" elif value in MISSING: value = b"0" * 8 # QGIS NULL for date type - elif is_string(value) and len(value) == 8: + elif isinstance(value, str) and len(value) == 8: pass # value is already a date string else: raise ShapefileException( @@ -3519,13 +3536,17 @@ def __dbfRecord(self, record): else: # anything else is forced to string, truncated to the length of the field # value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - value = str(value).encode(self.encoding, self.encodingErrors)[:size].ljust(size) + value = ( + str(value) + .encode(self.encoding, self.encodingErrors)[:size] + .ljust(size) + ) if not isinstance(value, bytes): # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) # value = b( # value, "ascii", self.encodingErrors # ) # should be default ascii encoding - value = value.encode('ascii', self.encodingErrors) + value = value.encode("ascii", self.encodingErrors) if len(value) != size: raise ShapefileException( "Shapefile Writer unable to pack incorrect sized value" @@ -3533,7 +3554,6 @@ def __dbfRecord(self, record): ) f.write(value) - def balance(self) -> None: """Adds corresponding empty attributes or null geometry records depending on which type of record was created to make sure all three files From 141f7583586e28c35dc83db11724ab54788c237a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 20:20:07 +0100 Subject: [PATCH 20/30] Only coerce if not already int or float. Check isinstance( ,int/float) first. --- src/shapefile.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 536e8fb..e9f13ec 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3450,19 +3450,20 @@ def _original_coerce_to_numeric_str(self, value, size, deci): return b"*" * size # QGIS NULL if not deci: # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) + if not isinstance(value, int): + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) return format(value, "d")[:size].rjust( size ) # caps the size if exceeds the field size - - value = float(value) + if not isinstance(value, float): + value = float(value) return format(value, f".{deci}f")[:size].rjust( size ) # caps the size if exceeds the field size From af9f09d857804c6db350335bb8fdf57ac9b43c83 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 20:57:07 +0100 Subject: [PATCH 21/30] Make existing dbf record code statically typable --- src/shapefile.py | 125 ++++++++++++++++++++--------------------------- 1 file changed, 54 insertions(+), 71 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index e9f13ec..1207e84 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -229,11 +229,12 @@ def from_unchecked( size: int = 50, decimal: int = 0, ) -> Self: - if field_type not in FIELD_TYPE_ALIASES: + try: + type_ = FIELD_TYPE_ALIASES[field_type] + except KeyError: raise ShapefileException( f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " ) - type_ = FIELD_TYPE_ALIASES[field_type] if type_ is FieldType.D: size = 8 @@ -3444,30 +3445,6 @@ def __newdbfRecord(self, record: list[RecordValue]) -> None: ) f.write(encoded_val) - def _original_coerce_to_numeric_str(self, value, size, deci): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - return b"*" * size # QGIS NULL - if not deci: - # force to int - if not isinstance(value, int): - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - return format(value, "d")[:size].rjust( - size - ) # caps the size if exceeds the field size - if not isinstance(value, float): - value = float(value) - return format(value, f".{deci}f")[:size].rjust( - size - ) # caps the size if exceeds the field size - def __dbfRecord(self, record): """Writes the dbf records.""" f = self.__getFileObj(self.dbf) @@ -3485,39 +3462,40 @@ def __dbfRecord(self, record): ) # ignore deletionflag field in case it was specified for (fieldName, fieldType, size, deci), value in zip(fields, record): # write - fieldType = fieldType.upper() - size = int(size) + # fieldName, fieldType, size and deci were already checked + # when their Field instance was created and added to self.fields + str_val: Optional[str] = None + if fieldType in ("N", "F"): - value = self._original_coerce_to_numeric_str(value, size, deci) - # # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - # if value in MISSING: - # value = b"*" * size # QGIS NULL - # elif not deci: - # # force to int - # try: - # # first try to force directly to int. - # # forcing a large int to float and back to int - # # will lose information and result in wrong nr. - # value = int(value) - # except ValueError: - # # forcing directly to int failed, so was probably a float. - # value = int(float(value)) - # value = format(value, "d")[:size].rjust( - # size - # ) # caps the size if exceeds the field size - # else: - # value = float(value) - # value = format(value, f".{deci}f")[:size].rjust( - # size - # ) # caps the size if exceeds the field size + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + str_val = "*" * size # QGIS NULL + elif not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + num_val = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + num_val = int(float(value)) + str_val = format(num_val, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + f_val = float(value) + str_val = format(f_val, f".{deci}f")[:size].rjust( + size + ) # caps the size if exceeds the field size elif fieldType == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. if isinstance(value, date): - value = f"{value.year:04d}{value.month:02d}{value.day:02d}" + str_val = f"{value.year:04d}{value.month:02d}{value.day:02d}" elif isinstance(value, list) and len(value) == 3: - value = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" + str_val = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" elif value in MISSING: - value = b"0" * 8 # QGIS NULL for date type + str_val = "0" * 8 # QGIS NULL for date type elif isinstance(value, str) and len(value) == 8: pass # value is already a date string else: @@ -3527,33 +3505,38 @@ def __dbfRecord(self, record): elif fieldType == "L": # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value in MISSING: - value = b" " # missing is set to space + str_val = " " # missing is set to space elif value in [True, 1]: - value = b"T" + str_val = "T" elif value in [False, 0]: - value = b"F" + str_val = "F" else: - value = b" " # unknown is set to space - else: - # anything else is forced to string, truncated to the length of the field - # value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - value = ( + str_val = " " # unknown is set to space + + if str_val is None: + # Types C and M, and anything else, value is forced to string, + # encoded by the codec specified to the Writer (utf-8 by default), + # then the resulting bytes are padded and truncated to the length + # of the field + encoded = ( str(value) .encode(self.encoding, self.encodingErrors)[:size] .ljust(size) ) - if not isinstance(value, bytes): - # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - # value = b( - # value, "ascii", self.encodingErrors - # ) # should be default ascii encoding - value = value.encode("ascii", self.encodingErrors) - if len(value) != size: + else: + # str_val was given a not-None string value + # under the checks for fieldTypes "N", "F", "D", or "L" above + # Numeric, logical, and date numeric types are ascii already, but + # for Shapefile or dbf spec reasons + # "should be default ascii encoding" + encoded = str_val.encode("ascii", self.encodingErrors) + + if len(encoded) != size: raise ShapefileException( - "Shapefile Writer unable to pack incorrect sized value" - f" (size {len(value)}) into field '{fieldName}' (size {size})." + f"Shapefile Writer unable to pack incorrect sized {value=}" + f" (encoded as {len(encoded)}B) into field '{fieldName}' ({size}B)." ) - f.write(value) + f.write(encoded) def balance(self) -> None: """Adds corresponding empty attributes or null geometry records depending From 68fa45f62a18c6923d67cb8a336d08feed42f755 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 21:00:50 +0100 Subject: [PATCH 22/30] Delete unused methods --- src/shapefile.py | 134 ----------------------------------------------- 1 file changed, 134 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1207e84..b97e22c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3309,141 +3309,7 @@ def record( record = ["" for _ in range(fieldCount)] self.__dbfRecord(record) - @staticmethod - def _dbf_missing_placeholder( - value: RecordValue, field_type: FieldTypeT, size: int - ) -> str: - if field_type is FieldType.N or field_type is FieldType.F: - return "*" * size # QGIS NULL - if field_type is FieldType.D: - return "0" * 8 # QGIS NULL for date type - if field_type is FieldType.L: - return " " - return str(value)[:size].ljust(size) - - @overload - @staticmethod - def _try_coerce_to_numeric_str(value: date, size: int, decimal: int) -> Never: ... - @overload - @staticmethod - def _try_coerce_to_numeric_str( - value: RecordValueNotDate, size: int, decimal: int - ) -> str: ... - @staticmethod - def _try_coerce_to_numeric_str(value, size, decimal): - # numeric or float: number stored as a string, - # right justified, and padded with blanks - # to the width of the field. - - if not decimal: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - return format(value, "d")[:size].rjust( - size - ) # caps the size if exceeds the field size - value = float(value) - return format(value, f".{decimal}f")[:size].rjust( - size - ) # caps the size if exceeds the field size - - # if not decimal: - # # force to int - # try: - # # first try to force directly to int. - # # forcing a large int to float and back to int - # # will lose information and result in wrong nr. - # int_val = int(value) - # except ValueError: - # # forcing directly to int failed, so was probably a float. - # int_val = int(float(value)) - # except TypeError: - # raise ShapefileException(f"Could not form int from: {value}") - # # length capped to the field size - # return format(int_val, "d")[:size].rjust(size) - - # try: - # f_val = float(value) - # except ValueError: - # raise ShapefileException(f"Could not form float from: {value}") - # # length capped to the field size - # return format(f_val, f".{decimal}f")[:size].rjust(size) - - @staticmethod - def _try_coerce_to_date_str(value: RecordValue) -> str: - # date: 8 bytes - date stored as a string in the format YYYYMMDD. - if isinstance(value, date): - return f"{value.year:04d}{value.month:02d}{value.day:02d}" - if isinstance(value, (list, tuple)) and len(value) == 3: - return f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" - if isinstance(value, str) and len(value) == 8: - return value # value is already a date string - - raise ShapefileException( - "Date values must be either a datetime.date object, a list/tuple, a YYYYMMDD string, or a missing value." - ) - - @staticmethod - def _try_coerce_to_logical_str(value: RecordValue) -> str: - # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. - if value == 1: # True == 1 - return "T" - if value == 0: # False == 0 - return "F" - return " " # unknown is set to space - - def __newdbfRecord(self, record: list[RecordValue]) -> None: - """Writes the dbf records.""" - f = self.__getFileObj(self.dbf) - if self.recNum == 0: - # first records, so all fields should be set - # allowing us to write the dbf header - # cannot change the fields after this point - self.__dbfHeader() - # first byte of the record is deletion flag, always disabled - f.write(b" ") - # begin - self.recNum += 1 - fields = ( - field for field in self.fields if field[0] != "DeletionFlag" - ) # ignore deletionflag field in case it was specified - for (fieldName, type_, size, decimal), value in zip(fields, record): - # write - size = int(size) - str_val: str - - if value in MISSING: - str_val = self._dbf_missing_placeholder(value, type_, size) - elif type_ is FieldType.N or type_ is FieldType.F: - str_val = self._try_coerce_to_numeric_str(value, size, decimal) - elif type_ is FieldType.D: - str_val = self._try_coerce_to_date_str(value) - elif type_ is FieldType.L: - str_val = self._try_coerce_to_logical_str(value) - else: - if isinstance(value, bytes): - str_val = value.decode(self.encoding, self.encodingErrors) - else: - # anything else is forced to string. - str_val = str(value) - - # Truncate or right pad to the length of the field - encoded_val = str_val.encode(self.encoding, self.encodingErrors)[ - :size - ].ljust(size) - if len(encoded_val) != size: - raise ShapefileException( - f"Shapefile Writer unable to pack incorrect sized {value=!r} " - f"(size {len(encoded_val)}) into field '{fieldName}' (size {size})." - ) - f.write(encoded_val) def __dbfRecord(self, record): """Writes the dbf records.""" From 17431a244f82761290c8ef1f513207ab16d59c9b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 21:02:06 +0100 Subject: [PATCH 23/30] Reformat --- src/shapefile.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b97e22c..ca761a4 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3309,8 +3309,6 @@ def record( record = ["" for _ in range(fieldCount)] self.__dbfRecord(record) - - def __dbfRecord(self, record): """Writes the dbf records.""" f = self.__getFileObj(self.dbf) From c5ba356a67b0ecd5ebd51466dab1311214310e81 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 21:19:53 +0100 Subject: [PATCH 24/30] Copy in changes from Use-identity-not-equality that PR/Merge conflicts messed up --- src/shapefile.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ea1f8c1..ca761a4 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -19,7 +19,6 @@ import tempfile import time import zipfile -from collections.abc import Hashable from datetime import date from struct import Struct, calcsize, error, pack, unpack from typing import ( @@ -183,7 +182,7 @@ def read(self, size: int = -1): ... class FieldType: """A bare bones 'enum', as the enum library noticeably slows performance.""" - # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__", "raise_if_invalid", "is_numeric"] + # __slots__ = ["C", "D", "F", "L", "M", "N", "__members__"] C: Final = "C" # "Character" # (str) D: Final = "D" # "Date" @@ -200,6 +199,11 @@ class FieldType: "N", } # set(__slots__) - {"__members__"} + # def raise_if_invalid(field_type: Hashable): + # if field_type not in FieldType.__members__: + # raise ShapefileException( + # f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + # ) FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} @@ -210,7 +214,7 @@ class FieldType: FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c - +# Use functional syntax to have an attribute named type, a Python keyword class Field(NamedTuple): name: str field_type: FieldTypeT @@ -2569,7 +2573,7 @@ def __record( # parse each value record = [] for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): - if FieldType.is_numeric(typ): + if typ is FieldType.N or typ is FieldType.F: # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b"\0")[0] value = value.replace(b"*", b"") # QGIS NULL is all '*' chars @@ -2595,7 +2599,7 @@ def __record( except ValueError: # not parseable as int, set to None value = None - elif typ == FieldType.D: + elif typ is FieldType.D: # date: 8 bytes - date stored as a string in the format YYYYMMDD. if ( not value.replace(b"\x00", b"") @@ -2613,7 +2617,7 @@ def __record( except (TypeError, ValueError): # if invalid date, just return as unicode string so user can decimalde value = str(value.strip()) - elif typ == FieldType.L: + elif typ is FieldType.L: # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value == b" ": value = None # space means missing or not yet set @@ -3326,8 +3330,6 @@ def __dbfRecord(self, record): # when their Field instance was created and added to self.fields str_val: Optional[str] = None - - if fieldType in ("N", "F"): # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. if value in MISSING: From 5c1ca4504d3cceb69416688612f5724cc953a5ff Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:07:56 +0100 Subject: [PATCH 25/30] REstore original (correct) __zbox and __mbox implementations. --- src/shapefile.py | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ca761a4..b1cb504 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1162,7 +1162,7 @@ def _get_nparts_from_byte_stream(b_io: ReadableBinStream) -> int: return unpack(" int: + def _write_nparts_to_byte_stream(b_io: WriteableBinStream, s: _CanHaveParts) -> int: return b_io.write(pack(" BBox: self._bbox = (min(x), min(y), max(x), max(y)) return self._bbox - def __zbox(self, s) -> ZBox: + def __zbox(self, s: Union[_HasZ, PointZ]) -> ZBox: z: list[float] = [] - if self._zbox: - z.extend(self._zbox) - for p in s.points: try: z.append(p[2]) @@ -2996,18 +2993,19 @@ def __zbox(self, s) -> ZBox: # point did not have z value # setting it to 0 is probably ok, since it means all are on the same elevation z.append(0) + zbox = (min(z), max(z)) + # update global + if self._zbox: + # compare with existing + self._zbox = (min(zbox[0], self._zbox[0]), max(zbox[1], self._zbox[1])) + else: + # first time zbox is being set + self._zbox = zbox + return zbox - # Original self._zbox bounds (if any) are the first two entries. - # Set zbox for the first, and all later times - # self._zbox = ZBox(zmin=min(z), zmax=max(z)) - self._zbox = (min(z), max(z)) - return self._zbox - - def __mbox(self, s) -> MBox: - mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 + def __mbox(self, s: Union[_HasM, PointM]) -> MBox: + mpos = 3 if s.shapeType in _HasZ._shapeTypes | PointZ.shapeTypes else 2 m: list[float] = [] - if self._mbox: - m.extend(m_bound for m_bound in self._mbox if m_bound is not None) for p in s.points: try: @@ -3021,12 +3019,16 @@ def __mbox(self, s) -> MBox: if not m: # only if none of the shapes had m values, should mbox be set to missing m values m.append(NODATA) + mbox = (min(m), max(m)) + # update global + if self._mbox: + # compare with existing + self._mbox = (min(mbox[0], self._mbox[0]), max(mbox[1], self._mbox[1])) + else: + # first time mbox is being set + self._mbox = mbox + return mbox - # Original self._mbox bounds (if any) are the first two entries. - # Set mbox for the first, and all later times - # self._mbox = MBox(mmin=min(m), mmax=max(m)) - self._mbox = (min(m), max(m)) - return self._mbox @property def shapeTypeName(self) -> str: From b7f4f7dabbfbf95a244e6e2d6234d02814ab1ab2 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:11:06 +0100 Subject: [PATCH 26/30] Restore old __?box method implementations, and type them. --- src/shapefile.py | 123 ++++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 60 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index b1cb504..5049653 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -124,7 +124,7 @@ PointsT = list[PointT] BBox = tuple[float, float, float, float] -MBox = tuple[Optional[float], Optional[float]] +MBox = tuple[float, float] ZBox = tuple[float, float] # class BBox(NamedTuple): @@ -727,8 +727,8 @@ def __init__( # Preserve previous behaviour for anyone who set self.shapeType = None if not isinstance(shapeType, _NoShapeTypeSentinel): self.shapeType = shapeType - self.points = points or [] - self.parts = parts or [] + self.points: PointsT = points or [] + self.parts: Sequence[int] = parts or [] if partTypes: self.partTypes = partTypes @@ -2254,7 +2254,7 @@ def __shpHeader(self) -> None: for m_bound in unpack("<2d", shp.read(16)) ] # self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) - self.mbox: MBox = (m_bounds[0], m_bounds[1]) + self.mbox: tuple[Optional[float], Optional[float]] = (m_bounds[0], m_bounds[1]) def __shape( self, oid: Optional[int] = None, bbox: Optional[BBox] = None @@ -2959,41 +2959,47 @@ def __shpFileLength(self) -> int: return size def __bbox(self, s: Shape) -> BBox: - x: list[float] = [] - y: list[float] = [] + xs: list[float] = [] + ys: list[float] = [] - if self._bbox: - x.append(self._bbox[0]) - y.append(self._bbox[1]) - x.append(self._bbox[2]) - y.append(self._bbox[3]) - - if len(s.points) > 0: - px, py = list(zip(*s.points))[:2] - x.extend(px) - y.extend(py) - else: + if not s.points: # this should not happen. # any shape that is not null should have at least one point, and only those should be sent here. # could also mean that earlier code failed to add points to a non-null shape. - raise ValueError( + raise ShapefileException( "Cannot create bbox. Expected a valid shape with at least one point. " - f"Got a shape of type '{s.shapeType}' and 0 points." + f"Got a shape of type {s.shapeType=} and 0 points." ) - # self._bbox = BBox(xmin=min(x), ymin=min(y), xmax=max(x), ymax=max(y)) - self._bbox = (min(x), min(y), max(x), max(y)) - return self._bbox + + for point in s.points: + xs.append(point[0]) + ys.append(point[1]) + + shape_bbox = (min(xs), min(ys), max(xs), max(ys)) + # update global + if self._bbox: + # compare with existing + self._bbox = ( + min(shape_bbox[0], self._bbox[0]), + min(shape_bbox[1], self._bbox[1]), + max(shape_bbox[2], self._bbox[2]), + max(shape_bbox[3], self._bbox[3]), + ) + else: + # first time bbox is being set + self._bbox = shape_bbox + return shape_bbox def __zbox(self, s: Union[_HasZ, PointZ]) -> ZBox: - z: list[float] = [] - for p in s.points: - try: - z.append(p[2]) - except IndexError: - # point did not have z value - # setting it to 0 is probably ok, since it means all are on the same elevation - z.append(0) - zbox = (min(z), max(z)) + shape_zs: list[float] = [] + if s.z: + shape_zs.extend(s.z) + else: + for p in s.points: + # On a ShapeZ type, M is at index 4, and the point can be a 3-tuple or 4-tuple. + z = p[2] if len(p) >= 3 and p[2] is not None else 0 + shape_zs.append(z) + zbox = (min(shape_zs), max(shape_zs)) # update global if self._zbox: # compare with existing @@ -3004,22 +3010,20 @@ def __zbox(self, s: Union[_HasZ, PointZ]) -> ZBox: return zbox def __mbox(self, s: Union[_HasM, PointM]) -> MBox: - mpos = 3 if s.shapeType in _HasZ._shapeTypes | PointZ.shapeTypes else 2 - m: list[float] = [] + mpos = 3 if s.shapeType in _HasZ._shapeTypes | PointZ._shapeTypes else 2 + shape_ms: list[float] = [] + if s.m: + shape_ms.extend(m for m in s.m if m is not None) + else: + for p in s.points: + m = p[mpos] if len(p) >= mpos + 1 else None + if m is not None: + shape_ms.append(m) - for p in s.points: - try: - if p[mpos] is not None: - # mbox should only be calculated on valid m values - m.append(p[mpos]) - except IndexError: - # point did not have m value so is missing - # mbox should only be calculated on valid m values - pass - if not m: + if not shape_ms: # only if none of the shapes had m values, should mbox be set to missing m values - m.append(NODATA) - mbox = (min(m), max(m)) + shape_ms.append(NODATA) + mbox = (min(shape_ms), max(shape_ms)) # update global if self._mbox: # compare with existing @@ -3029,7 +3033,6 @@ def __mbox(self, s: Union[_HasM, PointM]) -> MBox: self._mbox = mbox return mbox - @property def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType or 0] @@ -3210,17 +3213,17 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: # For both single point and multiple-points non-null shapes, # update bbox, mbox and zbox of the whole shapefile - new_bbox = self.__bbox(s) if s.shapeType != NULL else None - new_mbox = ( - self.__mbox(s) - if s.shapeType in PointM._shapeTypes | _HasM._shapeTypes - else None - ) - new_zbox = ( - self.__zbox(s) - if s.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes - else None - ) + shape_bbox = self.__bbox(s) if s.shapeType != NULL else None + + if s.shapeType in PointM._shapeTypes | _HasM._shapeTypes: + shape_mbox = self.__mbox(cast(Union[_HasM, PointM], s)) + else: + shape_mbox = None + + if s.shapeType in PointZ._shapeTypes | _HasZ._shapeTypes: + shape_zbox = self.__zbox(cast(Union[_HasZ, PointZ], s)) + else: + shape_zbox = None # Create an in-memory binary buffer to avoid # unnecessary seeks to files on disk @@ -3243,9 +3246,9 @@ def __shpRecord(self, s: Shape) -> tuple[int, int]: b_io=b_io, s=s, i=self.shpNum, - bbox=new_bbox, - mbox=new_mbox, - zbox=new_zbox, + bbox=shape_bbox, + mbox=shape_mbox, + zbox=shape_zbox, ) # Finalize record length as 16-bit words From 9deff14472bddfca4d992a83afd0ed48bcf31357 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:18:20 +0100 Subject: [PATCH 27/30] Initialise .m and .z on multi-point shapes not read from .shp files --- src/shapefile.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/shapefile.py b/src/shapefile.py index 5049653..a1f5c81 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1291,6 +1291,10 @@ class _HasM(_CanHaveBBox): ) m: Sequence[Optional[float]] + def __init__(self, *args, **kwargs): + self.z = [] + super().__init__(*args, **kwargs) + def _set_ms_from_byte_stream( self, b_io: ReadSeekableBinStream, nPoints: int, next_shape: int ): @@ -1360,6 +1364,10 @@ class _HasZ(_CanHaveBBox): ) z: Sequence[float] + def __init__(self, *args, **kwargs): + self.z = [] + super().__init__(*args, **kwargs) + def _set_zs_from_byte_stream(self, b_io: ReadableBinStream, nPoints: int): __zmin, __zmax = unpack("<2d", b_io.read(16)) # pylint: disable=unused-private-member self.z = _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) From 5dfa9fe675537fb376f62d6b307f5588210ab0ee Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:30:38 +0100 Subject: [PATCH 28/30] Specify .m on PolylineM etc. --- src/shapefile.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index a1f5c81..150d0ce 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1327,7 +1327,7 @@ def _write_ms_to_byte_stream( f"Failed to write measure extremes for record {i}. Expected floats" ) try: - if hasattr(s, "m"): + if getattr(s, "m", False): # if m values are stored in attribute ms = [m if m is not None else NODATA for m in s.m] @@ -1335,12 +1335,9 @@ def _write_ms_to_byte_stream( # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 - ms = [] - for p in s.points: - if len(p) > mpos and p[mpos] is not None: - ms.append(p[mpos]) - else: - ms.append(NODATA) + ms = [p[mpos] if len(p) > mpos and p[mpos] is not None else NODATA + for p in s.points + ] num_bytes_written += b_io.write(pack(f"<{len(ms)}d", *ms)) @@ -1388,7 +1385,7 @@ def _write_zs_to_byte_stream( f"Failed to write elevation extremes for record {i}. Expected floats." ) try: - if hasattr(s, "z"): + if getattr(s, "z", False): # if z values are stored in attribute zs = s.z else: @@ -1444,16 +1441,13 @@ def _write_single_point_m_to_byte_stream( # Write a single M value # Note: missing m values are autoset to NODATA. - if hasattr(s, "m"): + if hasattr(s, "m", False): # if m values are stored in attribute try: # if not s.m or s.m[0] is None: # s.m = (NODATA,) # m = s.m[0] - if s.m and s.m[0] is not None: - m = s.m[0] - else: - m = NODATA + m = s.m[0] if s.m and s.m[0] is not None else NODATA except error: raise ShapefileException( f"Failed to write measure value for record {i}. Expected floats." From b456d681b2e0d5bb87bf0d57fb2bfbcfa837b496 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:41:20 +0100 Subject: [PATCH 29/30] Initialise .m on _HasM instances, not .z --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 150d0ce..287ab60 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1292,7 +1292,7 @@ class _HasM(_CanHaveBBox): m: Sequence[Optional[float]] def __init__(self, *args, **kwargs): - self.z = [] + self.m = [] super().__init__(*args, **kwargs) def _set_ms_from_byte_stream( From 6d17738c43c29888e387f39b3257da08f489c793 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:56:05 +0100 Subject: [PATCH 30/30] Fix type checking --- src/shapefile.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 287ab60..2c741d0 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1329,15 +1329,18 @@ def _write_ms_to_byte_stream( try: if getattr(s, "m", False): # if m values are stored in attribute - ms = [m if m is not None else NODATA for m in s.m] + ms = [m if m is not None else NODATA for m in cast(_HasM, s).m] else: # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 - ms = [p[mpos] if len(p) > mpos and p[mpos] is not None else NODATA - for p in s.points - ] + ms = [ + cast(float, p[mpos]) + if len(p) > mpos and p[mpos] is not None + else NODATA + for p in s.points + ] num_bytes_written += b_io.write(pack(f"<{len(ms)}d", *ms)) @@ -1387,10 +1390,10 @@ def _write_zs_to_byte_stream( try: if getattr(s, "z", False): # if z values are stored in attribute - zs = s.z + zs = cast(_HasZ, s).z else: # if z values are stored as 3rd dimension - zs = [p[2] if len(p) > 2 else 0 for p in s.points] + zs = [cast(float, p[2]) if len(p) > 2 else 0 for p in s.points] num_bytes_written += b_io.write(pack(f"<{len(zs)}d", *zs)) except error: @@ -1441,13 +1444,14 @@ def _write_single_point_m_to_byte_stream( # Write a single M value # Note: missing m values are autoset to NODATA. - if hasattr(s, "m", False): + if getattr(s, "m", False): # if m values are stored in attribute try: # if not s.m or s.m[0] is None: # s.m = (NODATA,) # m = s.m[0] - m = s.m[0] if s.m and s.m[0] is not None else NODATA + s = cast(_HasM, s) + m = s.m[0] if s.m and s.m[0] is not None else NODATA except error: raise ShapefileException( f"Failed to write measure value for record {i}. Expected floats." @@ -1464,7 +1468,7 @@ def _write_single_point_m_to_byte_stream( # s.points[0][mpos] = NODATA m = NODATA else: - m = s.points[0][mpos] + m = cast(float, s.points[0][mpos]) except error: raise ShapefileException(