Skip to content

Commit

Permalink
refactor(//py)!: Kwargs updates and support for shifting internal apis
Browse files Browse the repository at this point in the history
BREAKING CHANGE: This commit changes the APIs from a dictionary of
arguements to a set of kwargs. You can port forward using

```py
trtorch.compile(mod, **spec)
```

Also in preparation for partial compilation to be enabled by default
settings related to torch fallback have been moved to the top level

instead of

```py
"torch_fallback": {
  "enabled": True,
  "min_block_size" " 3,
  "forced_fallback_ops" : ["aten::add"],
  "forced_fallback_mods" : ["MySubModule"]
}
```

now there are new settings

```py
require_full_compilation=False,
min_block_size=3,
torch_executed_ops=["aten::add"],
torch_executed_modules=["MySubModule"]
```

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
  • Loading branch information
narendasan committed Oct 19, 2021
1 parent 748ecf3 commit 2a0d1c8
Show file tree
Hide file tree
Showing 8 changed files with 140 additions and 109 deletions.
2 changes: 1 addition & 1 deletion docsrc/requirements.txt
Expand Up @@ -2,5 +2,5 @@ sphinx==3.1.2
breathe==4.19.2
exhale
sphinx_rtd_theme==0.4.3
sphinx-material==0.0.30
sphinx-material==0.0.35
nbsphinx==0.8.6
5 changes: 5 additions & 0 deletions py/trtorch/Device.py
Expand Up @@ -105,6 +105,11 @@ def _from_torch_device(cls, torch_dev: torch.device):
gpu_id = torch_dev.index
return cls(gpu_id=gpu_id)

@classmethod
def _current_device(cls):
dev = trtorch._C._get_current_device()
return cls(gpu_id=dev.gpu_id)

@staticmethod
def _parse_device_str(s):
s = s.lower()
Expand Down
124 changes: 71 additions & 53 deletions py/trtorch/_compile_spec.py
Expand Up @@ -4,6 +4,7 @@
from trtorch import _types
from trtorch.Input import Input
from trtorch.Device import Device
from trtorch._types import EngineCapability

import warnings

Expand Down Expand Up @@ -246,63 +247,80 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
return info


def TensorRTCompileSpec(compile_spec: Dict[str, Any]) -> torch.classes.tensorrt.CompileSpec:
"""
Utility to create a formated spec dictionary for using the PyTorch TensorRT backend
Args:
compile_spec (dict): Compilation settings including operating precision, target device, etc.
One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs
to the graph as well as expect types and formats for those inputs. All other keys are optional.
Entries for each method to be compiled.
Note: Partial compilation of TorchScript modules is not supported through the PyTorch TensorRT backend
If you need this feature, use trtorch.compile to compile your module. Usage of the resulting module is
as if you were using the TensorRT integration.
.. code-block:: py
CompileSpec = {
"forward" : trtorch.TensorRTCompileSpec({
"inputs": [
trtorch.Input((1, 3, 224, 224)), # Static input shape for input #1
trtorch.Input(
min_shape=1, 3, 224, 224),
opt_shape=(1, 3, 512, 512),
max_shape=(1, 3, 1024, 1024),
dtype=torch.int32
format=torch.channel_last
) # Dynamic input shape for input #2
],
"device": {
"device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
"gpu_id": 0, # Target gpu id to run engine (Use Xavier as gpu id for DLA)
"dla_core": 0, # (DLA only) Target dla core id to run engine
"allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
},
"enabled_precisions": {torch.half}, # Operating precision set to FP16
"sparse_weights": Enable sparsity for convolution and fully connected layers.
"disable_tf32": False, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
"refit": False, # enable refit
"debug": False, # enable debuggable engine
"strict_types": False, # kernels should strictly run in operating precision
"capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
"num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
"num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
"workspace_size": 0, # Maximum size of workspace given to TensorRT
"max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set)
"truncate_long_and_double": False, # Truncate long and double into int and float
})
}
Input Sizes can be specified as torch sizes, tuples or lists. Op precisions can be specified using
def TensorRTCompileSpec(inputs=[],
device=Device._current_device(),
disable_tf32=False,
sparse_weights=False,
enabled_precisions=set(),
refit=False,
debug=False,
strict_types=False,
capability=EngineCapability.default,
num_min_timing_iters=2,
num_avg_timing_iters=1,
workspace_size=0,
max_batch_size=0,
truncate_long_and_double=False,
calibrator=None) -> torch.classes.tensorrt.CompileSpec:
"""Utility to create a formated spec dictionary for using the PyTorch TensorRT backend
Keyword Args:
inputs (List[Union(trtorch.Input, torch.Tensor)]): **Required** List of specifications of input shape, dtype and memory layout for inputs to the module. This argument is required. Input Sizes can be specified as torch sizes, tuples or lists. dtypes can be specified using
torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum
to select device type.
Returns:
to select device type. ::
input=[
trtorch.Input((1, 3, 224, 224)), # Static NCHW input shape for input #1
trtorch.Input(
min_shape=(1, 224, 224, 3),
opt_shape=(1, 512, 512, 3),
max_shape=(1, 1024, 1024, 3),
dtype=torch.int32
format=torch.channel_last
), # Dynamic input shape for input #2
torch.randn((1, 3, 224, 244)) # Use an example tensor and let trtorch infer settings
]
device (Union(trtorch.Device, torch.device, dict)): Target device for TensorRT engines to run on ::
device=trtorch.Device("dla:1", allow_gpu_fallback=True)
disable_tf32 (bool): Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
sparse_weights (bool): Enable sparsity for convolution and fully connected layers.
enabled_precision (Set(Union(torch.dtype, trtorch.dtype))): The set of datatypes that TensorRT can use when selecting kernels
refit (bool): Enable refitting
debug (bool): Enable debuggable engine
strict_types (bool): Kernels should strictly run in a particular operating precision. Enabled precision should only have one type in the set
capability (trtorch.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels
num_min_timing_iters (int): Number of minimization timing iterations used to select kernels
num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
workspace_size (int): Maximum size of workspace given to TensorRT
max_batch_size (int): Maximum batch size (must be >= 1 to be set, 0 means not set)
truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
calibrator (Union(trtorch._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
Returns:
torch.classes.tensorrt.CompileSpec: List of methods and formated spec objects to be provided to ``torch._C._jit_to_tensorrt``
"""

compile_spec = {
"inputs": inputs,
"device": device,
"disable_tf32": disable_tf32, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
"sparse_weights": sparse_weights, #Enable sparsity for convolution and fully connected layers.
"enabled_precisions": enabled_precisions, # Enabling FP16 kernels
"refit": refit, # enable refit
"debug": debug, # enable debuggable engine
"strict_types": strict_types, # kernels should strictly run in operating precision
"capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels
"num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels
"num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels
"workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
"max_batch_size": max_batch_size, # Maximum batch size (must be >= 1 to be set, 0 means not set)
"calibrator": calibrator,
"truncate_long_and_double": truncate_long_and_double
}

parsed_spec = _parse_compile_spec(compile_spec)

backend_spec = torch.classes.tensorrt.CompileSpec()
Expand Down

0 comments on commit 2a0d1c8

Please sign in to comment.