diff --git a/predict.py b/predict.py index bf64120..6436f1a 100644 --- a/predict.py +++ b/predict.py @@ -1,5 +1,5 @@ import errno -from argparse import ArgumentParser +from argparse import ArgumentParser, ArgumentTypeError from pathlib import Path import torch @@ -21,6 +21,12 @@ required=True, help='the path to the model weight' ) + parser.add_argument( + '--scale', + type=float, + default=1.0, + help='adjust the scaling ratio. default to 1.0.' + ) parser.add_argument( '--save-annotated', type=str, @@ -33,6 +39,9 @@ ) args = parser.parse_args() + if args.scale <= 0.0: + raise ArgumentTypeError(message='scale must be greater than 0.0') + if args.save_annotated is not None: save_annotated = Path(args.save_annotated) if not save_annotated.is_dir(): @@ -59,7 +68,7 @@ streamer = ImageStreamer(args.source) for i, image in enumerate(streamer): - prediction = predictor.predict(image) + prediction = predictor.predict(image, scaling_ratio=args.scale) print(f'Prediction #{i}') print(' bounds', prediction.bounds.tolist()) diff --git a/wpodnet/backend.py b/wpodnet/backend.py index 1c61e69..6eabce5 100644 --- a/wpodnet/backend.py +++ b/wpodnet/backend.py @@ -81,7 +81,7 @@ def _inference(self, image: torch.Tensor) -> Tuple[np.ndarray, np.ndarray]: def _get_max_anchor(self, probs: np.ndarray) -> Tuple[int, int]: return np.unravel_index(probs.argmax(), probs.shape) - def _get_bounds(self, affines: np.ndarray, anchor_y: int, anchor_x: int) -> np.ndarray: + def _get_bounds(self, affines: np.ndarray, anchor_y: int, anchor_x: int, scaling_ratio: float = 1.0) -> np.ndarray: # Compute theta theta = affines[:, anchor_y, anchor_x] theta = theta.reshape((2, 3)) @@ -89,7 +89,7 @@ def _get_bounds(self, affines: np.ndarray, anchor_y: int, anchor_x: int) -> np.n theta[1, 1] = max(theta[1, 1], 0.0) # Convert theta into the bounding polygon - bounds = np.matmul(theta, self._q) * self._scaling_const + bounds = np.matmul(theta, self._q) * self._scaling_const * scaling_ratio # Normalize the bounds _, grid_h, grid_w = affines.shape @@ -98,7 +98,7 @@ def _get_bounds(self, affines: np.ndarray, anchor_y: int, anchor_x: int) -> np.n return np.transpose(bounds) - def predict(self, image: Image.Image) -> Prediction: + def predict(self, image: Image.Image, scaling_ratio: float = 1.0) -> Prediction: orig_h, orig_w = image.height, image.width # Resize the image to fixed ratio @@ -115,7 +115,7 @@ def predict(self, image: Image.Image) -> Prediction: # Get the theta with maximum probability max_prob = np.amax(probs) anchor_y, anchor_x = self._get_max_anchor(probs) - bounds = self._get_bounds(affines, anchor_y, anchor_x) + bounds = self._get_bounds(affines, anchor_y, anchor_x, scaling_ratio) bounds[:, 0] *= orig_w bounds[:, 1] *= orig_h