Skip to content

Commit

Permalink
cleanup: make Plane generic
Browse files Browse the repository at this point in the history
  • Loading branch information
0xabu committed Sep 4, 2021
1 parent 624fc92 commit 5f82eb4
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 22 deletions.
23 changes: 13 additions & 10 deletions pdfminer/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Sequence, Set, Tuple, TypeVar, Union, cast)

from .utils import INF
from .utils import LTComponentT
from .utils import Matrix
from .utils import Plane
from .utils import Point
Expand Down Expand Up @@ -435,7 +436,8 @@ def analyze(self, laparams: LAParams) -> None:
LTContainer.add(self, LTAnno('\n'))
return

def find_neighbors(self, plane: Plane, ratio: float) -> List["LTTextLine"]:
def find_neighbors(self, plane: Plane[LTComponentT], ratio: float
) -> List["LTTextLine"]:
raise NotImplementedError


Expand All @@ -456,7 +458,8 @@ def add(self, obj: LTComponent) -> None: # type: ignore[override]
super().add(obj)
return

def find_neighbors(self, plane: Plane, ratio: float) -> List[LTTextLine]:
def find_neighbors(self, plane: Plane[LTComponentT], ratio: float
) -> List[LTTextLine]:
"""
Finds neighboring LTTextLineHorizontals in the plane.
Expand Down Expand Up @@ -518,7 +521,8 @@ def add(self, obj: LTComponent) -> None: # type: ignore[override]
super().add(obj)
return

def find_neighbors(self, plane: Plane, ratio: float) -> List[LTTextLine]:
def find_neighbors(self, plane: Plane[LTComponentT], ratio: float
) -> List[LTTextLine]:
"""
Finds neighboring LTTextLineVerticals in the plane.
Expand Down Expand Up @@ -724,7 +728,7 @@ def group_objects(self, laparams: LAParams, objs: Iterable[LTComponent]
def group_textlines(self, laparams: LAParams, lines: Iterable[LTTextLine]
) -> Iterator[LTTextBox]:
"""Group neighboring lines to textboxes"""
plane = Plane(self.bbox)
plane: Plane[LTTextLine] = Plane(self.bbox)
plane.extend(lines)
boxes: Dict[Any, LTTextBox] = {}
for line in lines:
Expand Down Expand Up @@ -773,6 +777,9 @@ def group_textboxes(self, laparams: LAParams, boxes: Sequence[LTTextBox]
:return: a list that has only one element, the final top level group.
"""

ElementT = Union[LTTextBox, LTTextGroup]
plane: Plane[ElementT] = Plane(self.bbox)

def dist(obj1: LTComponent, obj2: LTComponent) -> float:
"""A distance function between two TextBoxes.
Expand All @@ -792,7 +799,7 @@ def dist(obj1: LTComponent, obj2: LTComponent) -> float:
return (x1 - x0) * (y1 - y0) \
- obj1.width*obj1.height - obj2.width*obj2.height

def isany(obj1: LTComponent, obj2: LTComponent) -> Set[LTComponent]:
def isany(obj1: ElementT, obj2: ElementT) -> Set[ElementT]:
"""Check if there's any other object between obj1 and obj2."""
x0 = min(obj1.x0, obj2.x0)
y0 = min(obj1.y0, obj2.y0)
Expand All @@ -801,8 +808,7 @@ def isany(obj1: LTComponent, obj2: LTComponent) -> Set[LTComponent]:
objs = set(plane.find((x0, y0, x1, y1)))
return objs.difference((obj1, obj2))

dists: List[Tuple[bool, float, int, int, Union[LTTextBox, LTTextGroup],
Union[LTTextBox, LTTextGroup]]] = []
dists: List[Tuple[bool, float, int, int, ElementT, ElementT]] = []
for i in range(len(boxes)):
box1 = boxes[i]
for j in range(i+1, len(boxes)):
Expand All @@ -811,7 +817,6 @@ def isany(obj1: LTComponent, obj2: LTComponent) -> Set[LTComponent]:
box1, box2))
heapq.heapify(dists)

plane = Plane(self.bbox)
plane.extend(boxes)
done = set()
while len(dists) > 0:
Expand All @@ -831,8 +836,6 @@ def isany(obj1: LTComponent, obj2: LTComponent) -> Set[LTComponent]:
done.update([id1, id2])

for other in plane:
# This cast could be avoided by making Plane generic
other = cast(Union[LTTextBox, LTTextGroup], other)
heapq.heappush(dists, (False, dist(group, other),
id(group), id(other), group, other))
plane.add(group)
Expand Down
27 changes: 15 additions & 12 deletions pdfminer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import io
import pathlib
import struct
from typing import (Any, BinaryIO, Callable, Dict, Iterable, Iterator, List,
Optional, Set, TextIO, Tuple, TypeVar, Union,
from typing import (Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator,
List, Optional, Set, TextIO, Tuple, TypeVar, Union,
TYPE_CHECKING, cast)
from typing_extensions import Literal
from html import escape
Expand Down Expand Up @@ -426,7 +426,10 @@ def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:
return max(0, iw), max(0, ih)


class Plane:
LTComponentT = TypeVar('LTComponentT', bound='LTComponent')


class Plane(Generic[LTComponentT]):
"""A set-like data structure for objects placed on a plane.
Can efficiently find objects in a certain rectangular area.
Expand All @@ -435,16 +438,16 @@ class Plane:
"""

def __init__(self, bbox: Rect, gridsize: int = 50):
self._seq: List["LTComponent"] = [] # preserve the object order.
self._objs: Set["LTComponent"] = set()
self._grid: Dict[Point, List["LTComponent"]] = {}
self._seq: List[LTComponentT] = [] # preserve the object order.
self._objs: Set[LTComponentT] = set()
self._grid: Dict[Point, List[LTComponentT]] = {}
self.gridsize = gridsize
(self.x0, self.y0, self.x1, self.y1) = bbox

def __repr__(self) -> str:
return '<Plane objs=%r>' % list(self)

def __iter__(self) -> Iterator["LTComponent"]:
def __iter__(self) -> Iterator[LTComponentT]:
return (obj for obj in self._seq if obj in self._objs)

def __len__(self) -> int:
Expand All @@ -465,23 +468,23 @@ def _getrange(self, bbox: Rect) -> Iterator[Point]:
for grid_x in drange(x0, x1, self.gridsize):
yield (grid_x, grid_y)

def extend(self, objs: Iterable["LTComponent"]) -> None:
def extend(self, objs: Iterable[LTComponentT]) -> None:
for obj in objs:
self.add(obj)

def add(self, obj: "LTComponent") -> None:
def add(self, obj: LTComponentT) -> None:
"""place an object."""
for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
if k not in self._grid:
r: List["LTComponent"] = []
r: List[LTComponentT] = []
self._grid[k] = r
else:
r = self._grid[k]
r.append(obj)
self._seq.append(obj)
self._objs.add(obj)

def remove(self, obj: "LTComponent") -> None:
def remove(self, obj: LTComponentT) -> None:
"""displace an object."""
for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
try:
Expand All @@ -490,7 +493,7 @@ def remove(self, obj: "LTComponent") -> None:
pass
self._objs.remove(obj)

def find(self, bbox: Rect) -> Iterator["LTComponent"]:
def find(self, bbox: Rect) -> Iterator[LTComponentT]:
"""finds objects that are in a certain area."""
(x0, y0, x1, y1) = bbox
done = set()
Expand Down

0 comments on commit 5f82eb4

Please sign in to comment.