refactor(//py)!: Kwargs updates and support for shifting internal apis

BREAKING CHANGE: This commit changes the APIs from a dictionary of arguements to a set of kwargs. You can port forward using ```py trtorch.compile(mod, **spec) ``` Also in preparation for partial compilation to be enabled by default settings related to torch fallback have been moved to the top level instead of ```py "torch_fallback": { "enabled": True, "min_block_size" " 3, "forced_fallback_ops" : ["aten::add"], "forced_fallback_mods" : ["MySubModule"] } ``` now there are new settings ```py require_full_compilation=False, min_block_size=3, torch_executed_ops=["aten::add"], torch_executed_modules=["MySubModule"] ``` Signed-off-by: Naren Dasan <naren@narendasan.com> Signed-off-by: Naren Dasan <narens@nvidia.com>
pytorch · Oct 19, 2021 · 2a0d1c8 · 2a0d1c8
1 parent 748ecf3
commit 2a0d1c8
Show file tree

Hide file tree

Showing 8 changed files with 140 additions and 109 deletions.
diff --git a/docsrc/requirements.txt b/docsrc/requirements.txt
@@ -2,5 +2,5 @@ sphinx==3.1.2
 breathe==4.19.2
 exhale
 sphinx_rtd_theme==0.4.3
-sphinx-material==0.0.30
+sphinx-material==0.0.35
 nbsphinx==0.8.6
diff --git a/py/trtorch/Device.py b/py/trtorch/Device.py
@@ -105,6 +105,11 @@ def _from_torch_device(cls, torch_dev: torch.device):
         gpu_id = torch_dev.index
         return cls(gpu_id=gpu_id)
 
+    @classmethod
+    def _current_device(cls):
+        dev = trtorch._C._get_current_device()
+        return cls(gpu_id=dev.gpu_id)
+
     @staticmethod
     def _parse_device_str(s):
         s = s.lower()

diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py
@@ -4,6 +4,7 @@
 from trtorch import _types
 from trtorch.Input import Input
 from trtorch.Device import Device
+from trtorch._types import EngineCapability
 
 import warnings
 
@@ -246,63 +247,80 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
     return info
 
 
-def TensorRTCompileSpec(compile_spec: Dict[str, Any]) -> torch.classes.tensorrt.CompileSpec:
-    """
-    Utility to create a formated spec dictionary for using the PyTorch TensorRT backend
-
-    Args:
-        compile_spec (dict): Compilation settings including operating precision, target device, etc.
-            One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs
-            to the graph as well as expect types and formats for those inputs. All other keys are optional.
-            Entries for each method to be compiled.
-
-            Note: Partial compilation of TorchScript modules is not supported through the PyTorch TensorRT backend
-            If you need this feature, use trtorch.compile to compile your module. Usage of the resulting module is
-            as if you were using the TensorRT integration.
-
-            .. code-block:: py
-
-                CompileSpec = {
-                    "forward" : trtorch.TensorRTCompileSpec({
-                        "inputs": [
-                            trtorch.Input((1, 3, 224, 224)), # Static input shape for input #1
-                            trtorch.Input(
-                                min_shape=1, 3, 224, 224),
-                                opt_shape=(1, 3, 512, 512),
-                                max_shape=(1, 3, 1024, 1024),
-                                dtype=torch.int32
-                                format=torch.channel_last
-                            ) # Dynamic input shape for input #2
-                        ],
-                        "device": {
-                            "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
-                            "gpu_id": 0, # Target gpu id to run engine (Use Xavier as gpu id for DLA)
-                            "dla_core": 0, # (DLA only) Target dla core id to run engine
-                            "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
-                        },
-                        "enabled_precisions": {torch.half}, # Operating precision set to FP16
-                        "sparse_weights": Enable sparsity for convolution and fully connected layers.
-                        "disable_tf32": False, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
-                        "refit": False, # enable refit
-                        "debug": False, # enable debuggable engine
-                        "strict_types": False, # kernels should strictly run in operating precision
-                        "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
-                        "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
-                        "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
-                        "workspace_size": 0, # Maximum size of workspace given to TensorRT
-                        "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set)
-                        "truncate_long_and_double": False, # Truncate long and double into int and float
-                    })
-                }
-
-            Input Sizes can be specified as torch sizes, tuples or lists. Op precisions can be specified using
+def TensorRTCompileSpec(inputs=[],
+                        device=Device._current_device(),
+                        disable_tf32=False,
+                        sparse_weights=False,
+                        enabled_precisions=set(),
+                        refit=False,
+                        debug=False,
+                        strict_types=False,
+                        capability=EngineCapability.default,
+                        num_min_timing_iters=2,
+                        num_avg_timing_iters=1,
+                        workspace_size=0,
+                        max_batch_size=0,
+                        truncate_long_and_double=False,
+                        calibrator=None) -> torch.classes.tensorrt.CompileSpec:
+    """Utility to create a formated spec dictionary for using the PyTorch TensorRT backend
+
+    Keyword Args:
+        inputs (List[Union(trtorch.Input, torch.Tensor)]): **Required** List of specifications of input shape, dtype and memory layout for inputs to the module. This argument is required. Input Sizes can be specified as torch sizes, tuples or lists. dtypes can be specified using
             torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum
-            to select device type.
-
-    Returns:
+            to select device type. ::
+
+                input=[
+                    trtorch.Input((1, 3, 224, 224)), # Static NCHW input shape for input #1
+                    trtorch.Input(
+                        min_shape=(1, 224, 224, 3),
+                        opt_shape=(1, 512, 512, 3),
+                        max_shape=(1, 1024, 1024, 3),
+                        dtype=torch.int32
+                        format=torch.channel_last
+                    ), # Dynamic input shape for input #2
+                    torch.randn((1, 3, 224, 244)) # Use an example tensor and let trtorch infer settings
+                ]
+
+        device (Union(trtorch.Device, torch.device, dict)): Target device for TensorRT engines to run on ::
+
+            device=trtorch.Device("dla:1", allow_gpu_fallback=True)
+
+        disable_tf32 (bool): Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
+        sparse_weights (bool): Enable sparsity for convolution and fully connected layers.
+        enabled_precision (Set(Union(torch.dtype, trtorch.dtype))): The set of datatypes that TensorRT can use when selecting kernels
+        refit (bool): Enable refitting
+        debug (bool): Enable debuggable engine
+        strict_types (bool): Kernels should strictly run in a particular operating precision. Enabled precision should only have one type in the set
+        capability (trtorch.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels
+        num_min_timing_iters (int): Number of minimization timing iterations used to select kernels
+        num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
+        workspace_size (int): Maximum size of workspace given to TensorRT
+        max_batch_size (int): Maximum batch size (must be >= 1 to be set, 0 means not set)
+        truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
+        calibrator (Union(trtorch._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
+
+      Returns:
         torch.classes.tensorrt.CompileSpec: List of methods and formated spec objects to be provided to ``torch._C._jit_to_tensorrt``
     """
 
+    compile_spec = {
+        "inputs": inputs,
+        "device": device,
+        "disable_tf32": disable_tf32, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
+        "sparse_weights": sparse_weights, #Enable sparsity for convolution and fully connected layers.
+        "enabled_precisions": enabled_precisions, # Enabling FP16 kernels
+        "refit": refit, # enable refit
+        "debug": debug, # enable debuggable engine
+        "strict_types": strict_types, # kernels should strictly run in operating precision
+        "capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels
+        "num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels
+        "num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels
+        "workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
+        "max_batch_size": max_batch_size, # Maximum batch size (must be >= 1 to be set, 0 means not set)
+        "calibrator": calibrator,
+        "truncate_long_and_double": truncate_long_and_double
+    }
+
     parsed_spec = _parse_compile_spec(compile_spec)
 
     backend_spec = torch.classes.tensorrt.CompileSpec()