| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter("torch.nn.Conv2d.forward", enabled=trt_version() < '7.0') | ||
| def convert_Conv2d(ctx): | ||
| module = ctx.method_args[0] | ||
| input = ctx.method_args[1] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| kernel_size = module.kernel_size | ||
| if not isinstance(kernel_size, tuple): | ||
| kernel_size = (kernel_size,) * 2 | ||
|
|
||
| stride = module.stride | ||
| if not isinstance(stride, tuple): | ||
| stride = (stride,) * 2 | ||
|
|
||
| padding = module.padding | ||
| if not isinstance(padding, tuple): | ||
| padding = (padding,) * 2 | ||
|
|
||
| dilation = module.dilation | ||
| if not isinstance(dilation, tuple): | ||
| dilation = (dilation,) * 2 | ||
|
|
||
| kernel = module.weight.detach().cpu().numpy() | ||
|
|
||
| bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) | ||
| if module.bias is not None: | ||
| bias = module.bias.detach().cpu().numpy() | ||
|
|
||
| layer = ctx.network.add_convolution( | ||
| input=input_trt, | ||
| num_output_maps=module.out_channels, | ||
| kernel_shape=kernel_size, | ||
| kernel=kernel, | ||
| bias=bias, | ||
| ) | ||
| layer.stride = stride | ||
| layer.padding = padding | ||
| layer.dilation = dilation | ||
|
|
||
| if module.groups is not None: | ||
| layer.num_groups = module.groups | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0') | ||
| def test_Conv2d_basic(): | ||
| return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0') | ||
| def test_Conv2d_stride2(): | ||
| return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0') | ||
| def test_Conv2d_kernel3(): | ||
| return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0') | ||
| def test_Conv2d_dilation2(): | ||
| return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.ConvTranspose2d.forward', enabled=trt_version() >= '7.0') | ||
| @tensorrt_converter('torch.nn.ConvTranspose3d.forward', enabled=trt_version() >= '7.0') | ||
| def convert_ConvTranspose2d_trt7(ctx): | ||
| module = ctx.method_args[0] | ||
| input = ctx.method_args[1] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| input_dim = input.dim() - 2 | ||
|
|
||
| kernel_size = module.kernel_size | ||
| if not isinstance(kernel_size, tuple): | ||
| kernel_size = (kernel_size, ) * input_dim | ||
|
|
||
| stride = module.stride | ||
| if not isinstance(stride, tuple): | ||
| stride = (stride, ) * input_dim | ||
|
|
||
| padding = module.padding | ||
| if not isinstance(padding, tuple): | ||
| padding = (padding, ) * input_dim | ||
|
|
||
| assert module.dilation == 1 or all([d == 1 for d in module.dilation]), \ | ||
| "Transposed convolution dilation is not supported in TensorRT" | ||
|
|
||
| kernel = module.weight.detach().cpu().numpy() | ||
|
|
||
| bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) | ||
| if module.bias is not None: | ||
| bias = module.bias.detach().cpu().numpy() | ||
|
|
||
| layer = ctx.network.add_deconvolution_nd( | ||
| input=input_trt, | ||
| num_output_maps=module.out_channels, | ||
| kernel_shape=kernel_size, | ||
| kernel=kernel, | ||
| bias=bias) | ||
| layer.stride_nd = stride | ||
| layer.padding_nd = padding | ||
|
|
||
| if module.groups is not None: | ||
| layer.num_groups = module.groups | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7)], enabled=trt_version() >= '7.0') | ||
| def test_ConvTranspose2d_basic_trt7(): | ||
| return torch.nn.ConvTranspose2d(10, 5, kernel_size=1, stride=1, padding=0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 8, 8)], enabled=trt_version() >= '7.0') | ||
| def test_ConvTranspose2d_stride2_trt7(): | ||
| return torch.nn.ConvTranspose2d(10, 5, kernel_size=1, stride=2, padding=0) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 9, 9)], enabled=trt_version() >= '7.0') | ||
| def test_ConvTranspose2d_kernel3_trt7(): | ||
| return torch.nn.ConvTranspose2d(10, 5, kernel_size=3, stride=2, padding=1) | ||
|
|
||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)], enabled=trt_version() >= '7.0') | ||
| def test_ConvTranspose3d_basic_trt7(): | ||
| return torch.nn.ConvTranspose3d(10, 5, kernel_size=1, stride=1, padding=0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)], enabled=trt_version() >= '7.0') | ||
| def test_ConvTranspose3d_stride2_trt7(): | ||
| return torch.nn.ConvTranspose3d(10, 5, kernel_size=1, stride=2, padding=0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 6, 6, 6)], enabled=trt_version() >= '7.0') | ||
| def test_ConvTranspose3d_kernel3_trt7(): | ||
| return torch.nn.ConvTranspose3d(10, 5, kernel_size=3, stride=2, padding=1) | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
| @tensorrt_converter("torch.nn.ConvTranspose2d.forward", enabled=trt_version() < '7.0') | ||
| def convert_ConvTranspose2d(ctx): | ||
| module = ctx.method_args[0] | ||
| input = ctx.method_args[1] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| kernel_size = module.kernel_size | ||
| if not isinstance(kernel_size, tuple): | ||
| kernel_size = (kernel_size,) * 2 | ||
|
|
||
| stride = module.stride | ||
| if not isinstance(stride, tuple): | ||
| stride = (stride,) * 2 | ||
|
|
||
| padding = module.padding | ||
| if not isinstance(padding, tuple): | ||
| padding = (padding,) * 2 | ||
|
|
||
| kernel = module.weight.detach().cpu().numpy() | ||
|
|
||
| bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) | ||
| if module.bias is not None: | ||
| bias = module.bias.detach().cpu().numpy() | ||
|
|
||
| layer = ctx.network.add_deconvolution( | ||
| input=input_trt, | ||
| num_output_maps=module.out_channels, | ||
| kernel_shape=kernel_size, | ||
| kernel=kernel, | ||
| bias=bias, | ||
| ) | ||
| layer.stride = stride | ||
|
|
||
| # if output_padding in original pytorch layer is not 0, pre_padding and post_padding should be set respectively. Otherwise the output dimension of pytorch and tensorrt may be different. | ||
| output_padding = module.output_padding | ||
| if output_padding[0] + output_padding[1] > 0: | ||
| layer.pre_padding = padding | ||
| layer.post_padding = trt.tensorrt.DimsHW(padding[0] - output_padding[0], padding[1] - output_padding[1]) | ||
| else: | ||
| layer.padding = padding | ||
|
|
||
| if module.groups is not None: | ||
| layer.num_groups = module.groups | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)], enabled=trt_version() < '7.0') | ||
| def test_square_kernel_equal_stride_mode(): | ||
| return torch.nn.ConvTranspose2d(3,3,3,stride=2) | ||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)], enabled=trt_version() < '7.0') | ||
| def test_square_kernel_equal_stride_mode_unequal_op_size(): | ||
| return torch.nn.ConvTranspose2d(3,6,3,stride=2) | ||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)], enabled=trt_version() < '7.0') | ||
| def test_unequal_stride_mode(): | ||
| return torch.nn.ConvTranspose2d(3,3,3, stride=(2,1), padding=(4,2)) | ||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1,3,112,112)], enabled=trt_version() < '7.0') | ||
| @add_module_test(torch.float32, torch.device("cuda"), [(1,3,7,7)], enabled=trt_version() < '7.0') | ||
| def test_kernelsize_4(): | ||
| return torch.nn.ConvTranspose2d(3,3,4, stride=2, padding=1) | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.Linear.forward') | ||
| def convert_Linear(ctx): | ||
| module = ctx.method_args[0] | ||
| input = ctx.method_args[1] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| # reshape to ...xNx1x1 | ||
| layer = ctx.network.add_shuffle(input_trt) | ||
| layer.reshape_dims = tuple(input_trt.shape) + (1, 1) | ||
|
|
||
| bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) | ||
| if module.bias is not None: | ||
| bias = module.bias.detach().cpu().numpy() | ||
|
|
||
| # add fully connected | ||
| layer = ctx.network.add_fully_connected( | ||
| input=layer.get_output(0), | ||
| num_outputs=module.out_features, | ||
| kernel=module.weight.detach().cpu().numpy(), | ||
| bias=bias) | ||
|
|
||
| # reshape back to N | ||
| layer = ctx.network.add_shuffle(layer.get_output(0)) | ||
| layer.reshape_dims = tuple(output.shape[1:]) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)]) | ||
| def test_Linear_basic(): | ||
| return torch.nn.Linear(10, 5) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)]) | ||
| def test_Linear_no_bias(): | ||
| return torch.nn.Linear(10, 5, bias=False) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| from torch2trt.torch2trt import * | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.LogSoftmax.forward') | ||
| def convert_LogSoftmax(ctx): | ||
| input = ctx.method_args[1] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
| layer = ctx.network.add_softmax(input=input_trt) | ||
| layer = ctx.network.add_unary(input=layer.get_output(0), | ||
| op=trt.UnaryOperation.LOG) | ||
| output._trt = layer.get_output(0) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| # dummy converters throw warnings method encountered | ||
| import tensorrt as trt | ||
| from .dummy_converters import * | ||
|
|
||
| # supported converters will override dummy converters | ||
|
|
||
| from .AdaptiveAvgPool2d import * | ||
| from .BatchNorm1d import * | ||
| from .BatchNorm2d import * | ||
| from .Conv import * | ||
| from .Conv1d import * | ||
| from .Conv2d import * | ||
| from .ConvTranspose import * | ||
| from .ConvTranspose2d import * | ||
| from .Linear import * | ||
| from .LogSoftmax import * | ||
| from .activation import * | ||
| from .adaptive_avg_pool2d import * | ||
| from .adaptive_max_pool2d import * | ||
| from .add import * | ||
| from .avg_pool import * | ||
| from .batch_norm import * | ||
| from .cat import * | ||
| from .chunk import * | ||
| from .clamp import * | ||
| from .compare import * | ||
| from .div import * | ||
| from .expand import * | ||
| from .floordiv import * | ||
| from .getitem import * | ||
| from .identity import * | ||
| from .instance_norm import * | ||
| from .interpolate import * | ||
| from .group_norm import * | ||
| from .max import * | ||
| from .max_pool2d import * | ||
| from .mean import * | ||
| from .min import * | ||
| from .mod import * | ||
| from .mul import * | ||
| from .normalize import * | ||
| from .ne import * | ||
| from .narrow import * | ||
| from .pad import * | ||
| from .permute import * | ||
| from .pow import * | ||
| from .prelu import * | ||
| from .prod import * | ||
| from .relu import * | ||
| from .relu6 import * | ||
| from .sigmoid import * | ||
| from .softmax import * | ||
| from .split import * | ||
| from .stack import * | ||
| from .sub import * | ||
| from .sum import * | ||
| from .tanh import * | ||
| from .tensor import * | ||
| from .transpose import * | ||
| from .unary import * | ||
| from .view import * |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
| from .unary import UnaryModule | ||
|
|
||
|
|
||
| # | RELU : Rectified Linear activation (impl in relu.py) | ||
| # | SIGMOID : Sigmoid activation (impl in sigmoid.py) | ||
| # | TANH : Hyperbolic Tangent activation (impl in tanh.py) | ||
|
|
||
|
|
||
| # | LEAKY_RELU : Leaky Relu activation: f(x) = x if x >= 0, f(x) = alpha * x if x < 0 | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.leaky_relu') | ||
| @tensorrt_converter('torch.nn.functional.leaky_relu_') | ||
| def convert_leaky_relu(ctx): | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| negative_slope = get_arg(ctx, 'negative_slope', pos=1, default=0.01) | ||
| output = ctx.method_return | ||
|
|
||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| layer = ctx.network.add_activation(input_trt, trt.ActivationType.LEAKY_RELU) | ||
| layer.alpha = negative_slope | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) | ||
| def test_leaky_relu(): | ||
| return UnaryModule(lambda x: torch.nn.functional.leaky_relu(x)) | ||
|
|
||
|
|
||
| # | ELU : Elu activation: f(x) = x if x >= 0, f(x) = alpha * (exp(x) - 1) if x < 0 | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.elu') | ||
| @tensorrt_converter('torch.nn.functional.elu_') | ||
| def convert_elu(ctx): | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| alpha = get_arg(ctx, 'alpha', pos=1, default=1.0) | ||
| output = ctx.method_return | ||
|
|
||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| layer = ctx.network.add_activation(input_trt, trt.ActivationType.ELU) | ||
| layer.alpha = alpha | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) | ||
| def test_elu(): | ||
| return UnaryModule(lambda x: torch.nn.functional.elu(x)) | ||
|
|
||
|
|
||
| # | SELU : Selu activation: f(x) = beta * x if x > 0, f(x) = beta * (alpha * exp(x) - alpha) if x <= 0 | ||
|
|
||
| @tensorrt_converter('torch.selu') | ||
| @tensorrt_converter('torch.selu_') | ||
| @tensorrt_converter('torch.nn.functional.selu') | ||
| @tensorrt_converter('torch.nn.functional.selu_') | ||
| def convert_selu(ctx): | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| alpha = get_arg(ctx, 'alpha', pos=1, default=1.0) | ||
| output = ctx.method_return | ||
|
|
||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| layer = ctx.network.add_activation(input_trt, trt.ActivationType.SELU) | ||
| layer.alpha = 1.6732632423543772848170429916717 | ||
| layer.beta = 1.0507009873554804934193349852946 | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) | ||
| def test_selu(): | ||
| return UnaryModule(lambda x: torch.nn.functional.selu(x)) | ||
|
|
||
|
|
||
| # | SOFTSIGN : Softsign activation: f(x) = x / (1 + \|x\|) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.softsign') | ||
| def convert_softsign(ctx): | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| output = ctx.method_return | ||
|
|
||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| layer = ctx.network.add_activation(input_trt, trt.ActivationType.SOFTSIGN) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) | ||
| def test_softsign(): | ||
| return UnaryModule(lambda x: torch.nn.functional.softsign(x)) | ||
|
|
||
|
|
||
| # | SOFTPLUS : Softplus activation: f(x) = alpha * log(exp(beta * x) + 1) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.softplus') | ||
| def convert_softplus(ctx): | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| output = ctx.method_return | ||
|
|
||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| layer = ctx.network.add_activation(input_trt, trt.ActivationType.SOFTPLUS) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) | ||
| def test_softplus(): | ||
| return UnaryModule(lambda x: torch.nn.functional.softplus(x)) | ||
|
|
||
|
|
||
| # | CLIP : Clip activation: f(x) = max(alpha, min(beta, x)) (impl in clamp.py) | ||
|
|
||
| # | HARD_SIGMOID : Hard sigmoid activation: f(x) = max(0, min(1, alpha * x + beta)) (not sure if there is this in Pytorch?) | ||
| # | SCALED_TANH : Scaled Tanh activation: f(x) = alpha * tanh(beta * x) (not sure if there is this in Pytorch?) | ||
| # | THRESHOLDED_RELU : Thresholded Relu activation: f(x) = x if x > alpha, f(x) = 0 if x <= alpha (not sure if there is this in Pytorch?) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| from torch2trt.torch2trt import * | ||
| from .AdaptiveAvgPool2d import * | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.adaptive_avg_pool2d') | ||
| def convert_adaptive_avg_pool2d(ctx): | ||
| ctx.method_args = (torch.nn.AdaptiveAvgPool2d(ctx.method_args[1]), ctx.method_args[0]) | ||
| convert_AdaptiveAvgPool2d(ctx) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.adaptive_max_pool2d') | ||
| def convert_adaptive_max_pool2d(ctx): | ||
| input = ctx.method_args[0] | ||
| output = ctx.method_return | ||
|
|
||
| output_size = ctx.method_args[1] | ||
| if isinstance(output_size, int): | ||
| output_size = (output_size, ) * 2 | ||
|
|
||
| stride = (input._trt.shape[-2] // output_size[-2], input._trt.shape[-1] // output_size[-1]) | ||
|
|
||
| kernel_size = stride | ||
| layer = ctx.network.add_pooling( | ||
| input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size) | ||
| layer.stride = stride | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_adaptive_max_pool2d_1x1(): | ||
| return torch.nn.AdaptiveMaxPool2d((1, 1)) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_adaptive_max_pool2d_2x2(): | ||
| return torch.nn.AdaptiveMaxPool2d((2, 2)) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_adaptive_max_pool2d_3x3(): | ||
| return torch.nn.AdaptiveMaxPool2d((3, 3)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.add') | ||
| @tensorrt_converter('torch.Tensor.__iadd__') | ||
| @tensorrt_converter('torch.Tensor.__add__') | ||
| @tensorrt_converter('torch.Tensor.__radd__') | ||
| def convert_add(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class Add(torch.nn.Module): | ||
| def __init__(self): | ||
| super(Add, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x + y | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_add_basic(): | ||
| return Add() | ||
|
|
||
|
|
||
| class IAdd(torch.nn.Module): | ||
| def __init__(self): | ||
| super(IAdd, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| x += y | ||
| return x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_add_iadd(): | ||
| return IAdd() | ||
|
|
||
|
|
||
| class TorchAdd(torch.nn.Module): | ||
| def __init__(self): | ||
| super(TorchAdd, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return torch.add(x, y) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_add_torchadd(): | ||
| return TorchAdd() | ||
|
|
||
|
|
||
| class RAddInt(torch.nn.Module): | ||
| def __init__(self): | ||
| super(RAddInt, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return 1 + x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_add_radd_int(): | ||
| return RAddInt() | ||
|
|
||
|
|
||
| class RAddFloat(torch.nn.Module): | ||
| def __init__(self): | ||
| super(RAddFloat, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return 1.0 + x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_add_radd_float(): | ||
| return RAddFloat() | ||
|
|
||
|
|
||
| class AddConstantNoBatch(torch.nn.Module): | ||
| def __init__(self): | ||
| super(AddConstantNoBatch, self).__init__() | ||
| self.register_buffer('y', torch.ones((3, 10, 10))) | ||
|
|
||
| def forward(self, x): | ||
| return x + self.y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) | ||
| def test_add_constant_nobatch(): | ||
| return AddConstantNoBatch() | ||
|
|
||
|
|
||
| class AddConstantBatch(torch.nn.Module): | ||
| def __init__(self): | ||
| super(AddConstantBatch, self).__init__() | ||
| self.register_buffer('y', torch.ones((1, 3, 10, 10))) | ||
|
|
||
| def forward(self, x): | ||
| return x + self.y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) | ||
| def test_add_constant_batch(): | ||
| return AddConstantBatch() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter("torch.nn.functional.avg_pool2d", enabled=trt_version() < '7.0') | ||
| def convert_avg_pool2d(ctx): | ||
| # parse args | ||
| input = get_arg(ctx, "input", pos=0, default=None) | ||
| kernel_size = get_arg(ctx, "kernel_size", pos=1, default=None) | ||
| stride = get_arg(ctx, "stride", pos=2, default=None) | ||
| padding = get_arg(ctx, "padding", pos=3, default=0) | ||
| ceil_mode = get_arg(ctx, "ceil_mode", pos=4, default=False) | ||
| count_include_pad = get_arg(ctx, "count_include_pad", pos=5, default=True) | ||
|
|
||
| # get input trt tensor (or create constant if it doesn't exist) | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
|
|
||
| output = ctx.method_return | ||
|
|
||
| # get kernel size | ||
| if not isinstance(kernel_size, tuple): | ||
| kernel_size = (kernel_size,) * 2 | ||
|
|
||
| # get stride | ||
| if not isinstance(stride, tuple): | ||
| stride = (stride,) * 2 | ||
|
|
||
| # get padding | ||
| if not isinstance(padding, tuple): | ||
| padding = (padding,) * 2 | ||
|
|
||
| layer = ctx.network.add_pooling( | ||
| input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size | ||
| ) | ||
|
|
||
| layer.stride = stride | ||
| layer.padding = padding | ||
| layer.average_count_excludes_padding = not count_include_pad | ||
|
|
||
| if ceil_mode: | ||
| layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.avg_pool2d', enabled=trt_version() >= '7.0') | ||
| @tensorrt_converter('torch.nn.functional.avg_pool3d', enabled=trt_version() >= '7.0') | ||
| def convert_avg_pool_trt7(ctx): | ||
| # parse args | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) | ||
| stride = get_arg(ctx, 'stride', pos=2, default=None) | ||
| padding = get_arg(ctx, 'padding', pos=3, default=0) | ||
| ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=False) | ||
| count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True) | ||
|
|
||
| # get input trt tensor (or create constant if it doesn't exist) | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| input_dim = input.dim() - 2 | ||
|
|
||
| # get kernel size | ||
| if not isinstance(kernel_size, tuple): | ||
| kernel_size = (kernel_size, ) * input_dim | ||
|
|
||
| # get stride | ||
| if not isinstance(stride, tuple): | ||
| stride = (stride, ) * input_dim | ||
|
|
||
| # get padding | ||
| if not isinstance(padding, tuple): | ||
| padding = (padding, ) * input_dim | ||
|
|
||
| layer = ctx.network.add_pooling_nd( | ||
| input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) | ||
|
|
||
| layer.stride_nd = stride | ||
| layer.padding_nd = padding | ||
| layer.average_count_excludes_padding = not count_include_pad | ||
|
|
||
| if ceil_mode: | ||
| layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)]) | ||
| @add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)]) | ||
| def test_avg_pool2d_without_ceil_mode(): | ||
| return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)]) | ||
| @add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)]) | ||
| def test_avg_pool2d_with_ceil_mode(): | ||
| return torch.nn.AvgPool2d( | ||
| kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False | ||
| ) # TRT does not support ceil_mode=True && count_include_pad=True | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)], enabled=trt_version() >= '7.0') | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)], enabled=trt_version() >= '7.0') | ||
| def test_avg_pool3d_without_ceil_mode_trt7(): | ||
| return torch.nn.AvgPool3d(kernel_size=3, stride=2, padding=1, ceil_mode=False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)], enabled=trt_version() >= '7.0') | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)], enabled=trt_version() >= '7.0') | ||
| def test_avg_pool3d_with_ceil_mode_trt7(): | ||
| return torch.nn.AvgPool3d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
| @tensorrt_converter('torch.nn.functional.batch_norm', enabled=trt_version() >= '7.0') | ||
| def convert_batch_norm_trt7(ctx): | ||
|
|
||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| running_mean = get_arg(ctx, 'running_mean', pos=1, default=None) | ||
| running_var = get_arg(ctx, 'running_var', pos=2, default=None) | ||
|
|
||
| weight = get_arg(ctx, 'weight', pos=3, default=None) | ||
| bias = get_arg(ctx, 'bias', pos=4, default=None) | ||
| eps = get_arg(ctx, 'eps', pos=7, default=10e-6) | ||
|
|
||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| scale = weight.detach().cpu().numpy() / np.sqrt(running_var.detach().cpu().numpy() + eps) | ||
| bias = bias.detach().cpu().numpy() - running_mean.detach().cpu().numpy() * scale | ||
| power = np.ones_like(scale) | ||
|
|
||
| layer = ctx.network.add_scale_nd(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power, 0) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)], enabled=trt_version() >= '7.0') | ||
| def test_batch_norm_2d_trt7(): | ||
| return torch.nn.BatchNorm2d(10) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)], enabled=trt_version() >= '7.0') | ||
| def test_batch_norm_3d_2_trt7(): | ||
| return torch.nn.BatchNorm3d(10) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 32, 2, 36, 47)], enabled=trt_version() >= '7.0') | ||
| def test_batch_norm_3d_trt7(): | ||
| return torch.nn.BatchNorm3d(32) | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.cat') | ||
| def convert_cat(ctx): | ||
| inputs = get_arg(ctx, 'input', pos=0, default=None) | ||
| dim = get_arg(ctx, 'dim', pos=1, default=0) | ||
|
|
||
| output = ctx.method_return | ||
| trt_inputs = add_missing_trt_tensors(ctx.network, inputs) | ||
| trt_inputs = broadcast_trt_tensors(ctx.network, trt_inputs, len(output.shape) - 1) | ||
|
|
||
| layer = ctx.network.add_concatenation(inputs=trt_inputs) | ||
| layer.axis = dim - 1 | ||
| output._trt = layer.get_output(0) | ||
|
|
||
| class Cat(torch.nn.Module): | ||
| def __init__(self, dim): | ||
| super(Cat, self).__init__() | ||
| self.dim = dim | ||
|
|
||
| def forward(self, *x): | ||
| return torch.cat(x, dim=self.dim) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 3, 4), (1, 17, 4)]) | ||
| def test_Cat_basic(): | ||
| return Cat(1) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
| from .split import convert_split | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.chunk') | ||
| @tensorrt_converter('torch.Tensor.chunk') | ||
| def convert_chunk(ctx): | ||
| convert_split(ctx) | ||
|
|
||
|
|
||
| class TorchChunk(torch.nn.Module): | ||
|
|
||
| def __init__(self, *args, **kwargs): | ||
| super(TorchChunk, self).__init__() | ||
| self.args = args | ||
| self.kwargs = kwargs | ||
|
|
||
| def forward(self, x): | ||
| return torch.chunk(x, *self.args, **self.kwargs) | ||
|
|
||
|
|
||
| class TensorChunk(torch.nn.Module): | ||
|
|
||
| def __init__(self, *args, **kwargs): | ||
| super(TensorChunk, self).__init__() | ||
| self.args = args | ||
| self.kwargs = kwargs | ||
|
|
||
| def forward(self, x): | ||
| return x.chunk(*self.args, **self.kwargs) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_torch_chunk_1_1(): | ||
| return TorchChunk(1, 1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_torch_chunk_2_1(): | ||
| return TorchChunk(2, 1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_torch_chunk_3_1(): | ||
| return TorchChunk(3, 1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_torch_chunk_3_2(): | ||
| return TorchChunk(3, 2) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_tensor_chunk_3_2(): | ||
| return TensorChunk(3, 2) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,191 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| def __add_clamp(network, trt_input, val, op): | ||
|
|
||
| # create TensorRT constant for minimum value | ||
| val_shape = (1, ) * len(trt_input.shape) # broadcast all dimensions | ||
| val_tensor = val * torch.ones(val_shape, dtype=torch_dtype_from_trt(trt_input.dtype)).cpu().numpy() | ||
| val_trt = network.add_constant(val_shape, val_tensor) | ||
| layer = network.add_elementwise(trt_input, val_trt.get_output(0), op) | ||
|
|
||
| return layer | ||
|
|
||
|
|
||
| # CLAMP_MIN | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.clamp_min') | ||
| @tensorrt_converter('torch.Tensor.clamp_min') | ||
| def convert_clamp_min(ctx): | ||
| input = ctx.method_args[0] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| val = ctx.method_args[1] | ||
| output = ctx.method_return | ||
|
|
||
| layer = __add_clamp(ctx.network, input_trt, val, trt.ElementWiseOperation.MAX) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class TorchClampMin(torch.nn.Module): | ||
| def forward(self, x): | ||
| return torch.clamp_min(x, -0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_torch_clamp_min(): | ||
| return TorchClampMin() | ||
|
|
||
|
|
||
| class TensorClampMin(torch.nn.Module): | ||
| def forward(self, x): | ||
| return x.clamp_min(-0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_tensor_clamp_min(): | ||
| return TensorClampMin() | ||
|
|
||
|
|
||
| # CLAMP_MAX | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.clamp_max') | ||
| @tensorrt_converter('torch.Tensor.clamp_max') | ||
| def convert_clamp_max(ctx): | ||
| input = ctx.method_args[0] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| val = ctx.method_args[1] | ||
| output = ctx.method_return | ||
|
|
||
| layer = __add_clamp(ctx.network, input_trt, val, trt.ElementWiseOperation.MIN) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class TorchClampMax(torch.nn.Module): | ||
| def forward(self, x): | ||
| return torch.clamp_max(x, 0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_torch_clamp_max(): | ||
| return TorchClampMax() | ||
|
|
||
|
|
||
| class TensorClampMax(torch.nn.Module): | ||
| def forward(self, x): | ||
| return x.clamp_max(0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_tensor_clamp_max(): | ||
| return TensorClampMax() | ||
|
|
||
|
|
||
| # CLAMP | ||
|
|
||
| @tensorrt_converter('torch.clamp') | ||
| @tensorrt_converter('torch.Tensor.clamp') | ||
| def convert_clamp(ctx): | ||
| input = ctx.method_args[0] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
| if "min" in ctx.method_kwargs and "max" in ctx.method_kwargs: | ||
| min_val = ctx.method_kwargs["min"] | ||
| max_val = ctx.method_kwargs["max"] | ||
| layer = __add_clamp(ctx.network, input_trt, min_val, trt.ElementWiseOperation.MAX) | ||
| layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN) | ||
| elif "min" in ctx.method_kwargs: | ||
| min_val = ctx.method_kwargs["min"] | ||
| layer = __add_clamp(ctx.network, input_trt, min_val, trt.ElementWiseOperation.MAX) | ||
| elif "max" in ctx.method_kwargs: | ||
| max_val = ctx.method_kwargs["max"] | ||
| layer = __add_clamp(ctx.network, input_trt, max_val, trt.ElementWiseOperation.MIN) | ||
| else: | ||
| min_val = ctx.method_args[1] | ||
| max_val = ctx.method_args[2] | ||
| layer = __add_clamp(ctx.network, input_trt, min_val, trt.ElementWiseOperation.MAX) | ||
| layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class TorchClamp(torch.nn.Module): | ||
| def forward(self, x): | ||
| return torch.clamp(x, -0.1, 0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_torch_clamp(): | ||
| return TorchClamp() | ||
|
|
||
|
|
||
| class TensorClamp(torch.nn.Module): | ||
| def forward(self, x): | ||
| return x.clamp(-0.1, 0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_tensor_clamp(): | ||
| return TensorClamp() | ||
|
|
||
|
|
||
| class TorchClampOptionMax(torch.nn.Module): | ||
| def forward(self, x): | ||
| return torch.clamp(x, max=0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_torch_clamp_option_max(): | ||
| return TorchClampOptionMax() | ||
|
|
||
| class TorchClampOptionMin(torch.nn.Module): | ||
| def forward(self, x): | ||
| return torch.clamp(x, min=-0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_torch_clamp_option_min(): | ||
| return TorchClampOptionMin() | ||
|
|
||
|
|
||
| class TorchClampOptionMaxMin(torch.nn.Module): | ||
| def forward(self, x): | ||
| return torch.clamp(x, min=-0.1, max=0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_torch_clamp_option_max_min(): | ||
| return TorchClampOptionMaxMin() | ||
|
|
||
|
|
||
| class TensorClampOptionMax(torch.nn.Module): | ||
| def forward(self, x): | ||
| return x.clamp(max=0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_tensor_clamp_option_max(): | ||
| return TensorClampOptionMax() | ||
|
|
||
| class TensorClampOptionMin(torch.nn.Module): | ||
| def forward(self, x): | ||
| return x.clamp(min=-0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_tensor_clamp_option_min(): | ||
| return TensorClampOptionMin() | ||
|
|
||
|
|
||
| class TensorClampOptionMaxMin(torch.nn.Module): | ||
| def forward(self, x): | ||
| return x.clamp(min=-0.1, max=0.1) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_tensor_clamp_option_max_min(): | ||
| return TensorClampOptionMaxMin() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
| def convert_elementwise(ctx, op): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, op) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
| @tensorrt_converter('torch.gt', enabled=trt_version() >= '7.0') | ||
| @tensorrt_converter('torch.Tensor.__gt__', enabled=trt_version() >= '7.0') | ||
| def convert_gt(ctx): | ||
| return convert_elementwise(ctx, trt.ElementWiseOperation.GREATER) | ||
|
|
||
| @tensorrt_converter('torch.lt', enabled=trt_version() >= '7.0') | ||
| @tensorrt_converter('torch.Tensor.__lt__', enabled=trt_version() >= '7.0') | ||
| def convert_gt(ctx): | ||
| return convert_elementwise(ctx, trt.ElementWiseOperation.LESS) | ||
|
|
||
| @tensorrt_converter('torch.eq', enabled=trt_version() >= '7.0') | ||
| @tensorrt_converter('torch.Tensor.__eq__', enabled=trt_version() >= '7.0') | ||
| def convert_gt(ctx): | ||
| return convert_elementwise(ctx, trt.ElementWiseOperation.EQUAL) | ||
|
|
||
| class GT(torch.nn.Module): | ||
| def __init__(self): | ||
| super(GT, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x > y | ||
|
|
||
| class LT(torch.nn.Module): | ||
| def __init__(self): | ||
| super(LT, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x < y | ||
|
|
||
| class EQ(torch.nn.Module): | ||
| def __init__(self): | ||
| super(EQ, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x == y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)], enabled=trt_version() >= '7.0') | ||
| def test_gt_basic(): | ||
| return GT() | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)], enabled=trt_version() >= '7.0') | ||
| def test_gt_basic(): | ||
| return LT() | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)], enabled=trt_version() >= '7.0') | ||
| def test_gt_basic(): | ||
| return EQ() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,123 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.div') | ||
| @tensorrt_converter('torch.Tensor.__div__') # py2 | ||
| @tensorrt_converter('torch.Tensor.__idiv__') # py2 | ||
| @tensorrt_converter('torch.Tensor.__truediv__') # py3 | ||
| @tensorrt_converter('torch.Tensor.__itruediv__') # py3 | ||
| def convert_div(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.Tensor.__rdiv__') # py2 | ||
| @tensorrt_converter('torch.Tensor.__rtruediv__') # py3 | ||
| def convert_rdiv(ctx): | ||
| input_a = ctx.method_args[1] # inputs switched for rdiv | ||
| input_b = ctx.method_args[0] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class Div(torch.nn.Module): | ||
| def __init__(self): | ||
| super(Div, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x / y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_div_basic(): | ||
| return Div() | ||
|
|
||
|
|
||
| class IDiv(torch.nn.Module): | ||
| def __init__(self): | ||
| super(IDiv, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| x /= y | ||
| return x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_div_idiv(): | ||
| return IDiv() | ||
|
|
||
|
|
||
| class TorchDiv(torch.nn.Module): | ||
| def __init__(self): | ||
| super(TorchDiv, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return torch.div(x, y) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_div_torchdiv(): | ||
| return TorchDiv() | ||
|
|
||
|
|
||
| class RDivInt(torch.nn.Module): | ||
| def __init__(self): | ||
| super(RDivInt, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return 100 / x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_rdiv_int(): | ||
| return RDivInt() | ||
|
|
||
|
|
||
| class RDivFloat(torch.nn.Module): | ||
| def __init__(self): | ||
| super(RDivFloat, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return 100.0 / x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_rdiv_float(): | ||
| return RDivFloat() | ||
|
|
||
|
|
||
| class DivConstantNoBatch(torch.nn.Module): | ||
| def __init__(self): | ||
| super(DivConstantNoBatch, self).__init__() | ||
| self.register_buffer('y', torch.ones((3, 10, 10))) | ||
|
|
||
| def forward(self, x): | ||
| return x / self.y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) | ||
| def test_div_constant_nobatch(): | ||
| return DivConstantNoBatch() | ||
|
|
||
|
|
||
| class DivConstantBatch(torch.nn.Module): | ||
| def __init__(self): | ||
| super(DivConstantBatch, self).__init__() | ||
| self.register_buffer('y', torch.ones((1, 3, 10, 10))) | ||
|
|
||
| def forward(self, x): | ||
| return x / self.y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) | ||
| def test_div_constant_batch(): | ||
| return DivConstantBatch() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| from torch2trt.torch2trt import * | ||
|
|
||
|
|
||
| def is_private(method): | ||
| method = method.split('.')[-1] # remove prefix | ||
| return method[0] == '_' and method[1] is not '_' | ||
|
|
||
| def is_function_type(method): | ||
| fntype = eval(method + '.__class__.__name__') | ||
| return fntype == 'function' or fntype == 'builtin_function_or_method' or fntype == 'method_descriptor' | ||
|
|
||
| def get_methods(namespace): | ||
| methods = [] | ||
| for method in dir(eval(namespace)): | ||
| full_method = namespace + '.' + method | ||
| if not is_private(full_method) and is_function_type(full_method): | ||
| methods.append(full_method) | ||
| return methods | ||
|
|
||
|
|
||
| TORCH_METHODS = [] | ||
| TORCH_METHODS += get_methods('torch') | ||
| TORCH_METHODS += get_methods('torch.Tensor') | ||
| TORCH_METHODS += get_methods('torch.nn.functional') | ||
|
|
||
|
|
||
| for method in TORCH_METHODS: | ||
|
|
||
| @tensorrt_converter(method, is_real=False) | ||
| def warn_method(ctx): | ||
| print('Warning: Encountered known unsupported method %s' % ctx.method_str) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.Tensor.dim', is_real=False) | ||
| @tensorrt_converter('torch.Tensor.size', is_real=False) | ||
| def dont_warn(ctx): | ||
| pass |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.Tensor.expand') | ||
| def convert_expand(ctx): | ||
| input = ctx.method_args[0] | ||
| sizes = ctx.method_args[1:] | ||
| output = ctx.method_return | ||
|
|
||
| inshape = tuple(input.shape)[1:] # exclude batch | ||
| shape = tuple(output.shape)[1:] | ||
| ndim = len(shape) | ||
| start = tuple([0]*ndim) | ||
| stride = tuple([int(i == o) for i, o in zip(inshape, shape)]) # stride == 1 if dimensions match, 0 otherwise | ||
|
|
||
| layer = ctx.network.add_slice(input._trt, start, shape, stride) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class ExpandModule(torch.nn.Module): | ||
| def __init__(self, *sizes): | ||
| super(ExpandModule, self).__init__() | ||
| self.sizes = sizes | ||
|
|
||
| def forward(self, x): | ||
| return x.expand(*self.sizes) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) | ||
| def test_tensor_expand_singledim(): | ||
| return ExpandModule(1, 3, 3, 3) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,1,1,3)]) | ||
| def test_tensor_expand_multidim(): | ||
| return ExpandModule(1, 3, 3, 3) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,1,1,3)]) | ||
| def test_tensor_expand_inferdim(): | ||
| return ExpandModule(1, 3, -1, -1) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.Tensor.__floordiv__') | ||
| @tensorrt_converter('torch.Tensor.__ifloordiv__') | ||
| @tensorrt_converter('torch.floor_divide') | ||
| def convert_floordiv(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| # we can not use ElementWiseOperation.FLOOR_DIV directly because Torch truncate negative result toward 0 | ||
| # but TensorRT FLOOR_DIV op toward -Inf | ||
| # sign = ab / |ab| | ||
| # floordiv result: sign * (|a| // |b|) | ||
| ab_layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD) | ||
| abs_ab_layer = ctx.network.add_unary(ab_layer.get_output(0), trt.UnaryOperation.ABS) | ||
| sign_layer = ctx.network.add_elementwise(ab_layer.get_output(0), abs_ab_layer.get_output(0), | ||
| trt.ElementWiseOperation.DIV) | ||
| abs_a_layer = ctx.network.add_unary(input_a_trt, trt.UnaryOperation.ABS) | ||
| abs_b_layer = ctx.network.add_unary(input_b_trt, trt.UnaryOperation.ABS) | ||
| abs_floor_layer = ctx.network.add_elementwise(abs_a_layer.get_output(0), abs_b_layer.get_output(0), | ||
| trt.ElementWiseOperation.FLOOR_DIV) | ||
| out_layer = ctx.network.add_elementwise(sign_layer.get_output(0), abs_floor_layer.get_output(0), | ||
| trt.ElementWiseOperation.PROD) | ||
| output._trt = out_layer.get_output(0) | ||
|
|
||
|
|
||
| class FloorDiv(torch.nn.Module): | ||
| def __init__(self): | ||
| super(FloorDiv, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x // y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) | ||
| def test_floordiv_op(): | ||
| return FloorDiv() | ||
|
|
||
|
|
||
| class FloorDivAssign (torch.nn.Module): | ||
| def __init__(self): | ||
| super(FloorDivAssign, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| x //= y | ||
| return x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) | ||
| def test_floordiv_op_assign(): | ||
| return FloorDivAssign() | ||
|
|
||
|
|
||
| class FloorDivConst(torch.nn.Module): | ||
| def __init__(self): | ||
| super(FloorDivConst, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return x // 2. | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)]) | ||
| def test_floordiv_op_const(): | ||
| return FloorDivConst() | ||
|
|
||
|
|
||
| class TorchFloorDiv(torch.nn.Module): | ||
| def __init__(self): | ||
| super(TorchFloorDiv, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return torch.floor_divide(x, y) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) | ||
| def test_floordiv_func(): | ||
| return TorchFloorDiv() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,155 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| def slice_to_trt(dim_size, dim_slice): | ||
|
|
||
| start = 0 if dim_slice.start is None else dim_slice.start | ||
| stop = dim_size if dim_slice.stop is None else dim_slice.stop | ||
| stride = 1 if dim_slice.step is None else dim_slice.step | ||
|
|
||
| size = (stop - start - 1) // stride + 1 | ||
|
|
||
| return start, size, stride | ||
|
|
||
|
|
||
| def num_slice_types(slices): | ||
| num_slice = 0 | ||
| for s in slices: | ||
| if isinstance(s, slice) or isinstance(s, int): | ||
| num_slice += 1 | ||
| return num_slice | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.Tensor.__getitem__') | ||
| def convert_tensor_getitem(ctx): | ||
| input = ctx.method_args[0] | ||
| slices = ctx.method_args[1] | ||
| output = ctx.method_return | ||
|
|
||
| input_trt = input._trt | ||
|
|
||
| # Step 1 - Replace ellipsis with expanded slices | ||
|
|
||
| num_ellipsis = len(input.shape) - num_slice_types(slices) | ||
|
|
||
| new_slices = [] | ||
| for s in slices: | ||
|
|
||
| if s == Ellipsis: | ||
| while num_ellipsis > 0: | ||
| new_slices.append(slice(None, None, None)) | ||
| num_ellipsis -= 1 | ||
| elif isinstance(s, slice): | ||
| new_slices.append(s) | ||
| elif s is None: | ||
| new_slices.append(None) | ||
| elif isinstance(s, int): | ||
| new_slices.append(s) | ||
|
|
||
| # fill missing slices at end | ||
| while num_slice_types(new_slices) < len(input.shape): | ||
| new_slices.append(slice(None, None, None)) | ||
|
|
||
| # Step 2 - Remove batch from slices (TRT from this point) | ||
|
|
||
| slices = tuple(new_slices[1:]) # remove batch | ||
|
|
||
|
|
||
| # Step 3 - Add slice layer (will currently ignore 'None' slices) | ||
|
|
||
| starts = [] | ||
| sizes = [] | ||
| strides = [] | ||
|
|
||
| input_dim = 0 | ||
| for s in slices: | ||
|
|
||
| if input_dim >= len(input_trt.shape): | ||
| break | ||
|
|
||
| input_size = int(input_trt.shape[input_dim]) | ||
|
|
||
| if isinstance(s, slice): | ||
| start, size, stride = slice_to_trt(input_size, s) | ||
| starts.append(start) | ||
| sizes.append(size) | ||
| strides.append(stride) | ||
| input_dim += 1 | ||
|
|
||
| elif isinstance(s, int): | ||
| starts.append(s) | ||
| sizes.append(1) | ||
| strides.append(1) | ||
| input_dim += 1 | ||
|
|
||
| output_trt = ctx.network.add_slice(input_trt, starts, sizes, strides).get_output(0) | ||
|
|
||
| # Step 4 - Add shuffle layer to insert dimensions for 'None' slices and remove dimensions for 'int' slices | ||
|
|
||
| num_non_slice = len([s for s in slices if not isinstance(s, slice)]) | ||
| if num_non_slice > 0: | ||
| layer = ctx.network.add_shuffle(output_trt) | ||
| layer.reshape_dims = tuple(output.shape[1:]) # exclude batch | ||
| output_trt = layer.get_output(0) | ||
|
|
||
| output._trt = output_trt | ||
|
|
||
|
|
||
| class LambdaModule(torch.nn.Module): | ||
| def __init__(self, fn): | ||
| super(LambdaModule, self).__init__() | ||
| self.fn = fn | ||
|
|
||
| def forward(self, x): | ||
| return self.fn(x) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) | ||
| def test_tensor_getitem_1d_int(): | ||
| return LambdaModule(lambda x: x[:, 0]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) | ||
| def test_tensor_getitem_2d_int(): | ||
| return LambdaModule(lambda x: x[:, 0]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) | ||
| def test_tensor_getitem_2d_strided(): | ||
| return LambdaModule(lambda x: x[:, ::2]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) | ||
| def test_tensor_getitem_2d_strided_offset(): | ||
| return LambdaModule(lambda x: x[:, 1::2]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) | ||
| def test_tensor_getitem_2d_strided_range(): | ||
| return LambdaModule(lambda x: x[:, 1:3:2]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) | ||
| def test_tensor_getitem_2d_insert_dim(): | ||
| return LambdaModule(lambda x: x[:, None]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) | ||
| def test_tensor_getitem_2d_insert_dim_ellipsis(): | ||
| return LambdaModule(lambda x: x[:, None, ...]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) | ||
| def test_tensor_getitem_2d_append_dim(): | ||
| return LambdaModule(lambda x: x[:, ..., None]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) | ||
| def test_tensor_getitem_2d_append_2dim(): | ||
| return LambdaModule(lambda x: x[:, ..., None, None]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) | ||
| def test_tensor_getitem_2d_weird_combo(): | ||
| return LambdaModule(lambda x: x[:, 0:3:4, None, None, 1, ...]) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| import torch.nn as nn | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
| def has_group_norm_plugin(): | ||
| try: | ||
| from torch2trt.plugins import GroupNormPlugin | ||
| return True | ||
| except: | ||
| return False | ||
|
|
||
|
|
||
| def get_group_norm_plugin(num_groups, weight, bias, eps): | ||
| from torch2trt.plugins import GroupNormPlugin | ||
| PLUGIN_NAME = 'group_norm' | ||
| registry = trt.get_plugin_registry() | ||
| creator = [c for c in registry.plugin_creator_list if c.name == PLUGIN_NAME and c.plugin_namespace == 'torch2trt'][0] | ||
| torch2trt_plugin = GroupNormPlugin(num_groups=num_groups, weight=weight, bias=bias, eps=eps) | ||
| return creator.deserialize_plugin(PLUGIN_NAME, torch2trt_plugin.serializeToString()) | ||
|
|
||
| @tensorrt_converter('torch.nn.GroupNorm.forward', has_group_norm_plugin()) | ||
| def convert_group_norm_trt(ctx): | ||
| module = ctx.method_args[0] | ||
| input = ctx.method_args[1] | ||
| num_groups = module.num_groups | ||
| weight = module.weight | ||
| bias = module.bias | ||
| eps = module.eps | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input]) | ||
| output = ctx.method_return | ||
| plugin = get_group_norm_plugin(num_groups, weight, bias, eps) | ||
|
|
||
| layer = ctx.network.add_plugin_v2(input_trt, plugin) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], has_group_norm_plugin()) | ||
| def test_group_norm_trt_g2_fp32(): | ||
| return torch.nn.GroupNorm(2, 10) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], has_group_norm_plugin()) | ||
| def test_group_norm_trt_g2_eps_fp32(): | ||
| return torch.nn.GroupNorm(2, 10, eps=1e-4) | ||
|
|
||
|
|
||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| from torch2trt.torch2trt import * | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.Tensor.contiguous') | ||
| @tensorrt_converter('torch.nn.functional.dropout') | ||
| @tensorrt_converter('torch.nn.functional.dropout2d') | ||
| @tensorrt_converter('torch.nn.functional.dropout3d') | ||
| def convert_functional_identity(ctx): | ||
| input = ctx.method_args[0] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
| output._trt = input_trt | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.Dropout.forward') | ||
| @tensorrt_converter('torch.nn.Dropout2d.forward') | ||
| @tensorrt_converter('torch.nn.Dropout3d.forward') | ||
| def convert_identity(ctx): | ||
| input = ctx.method_args[1] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
| output._trt = input_trt |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| def _add_scale_1d2d3d(network, x_trt, mode, offset, scale, power): | ||
| ndim = len(x_trt.shape) | ||
|
|
||
| y_trt = x_trt | ||
|
|
||
| # shape to 2D | ||
| if ndim != 3: | ||
| layer = network.add_shuffle(y_trt) | ||
| layer.reshape_dims = (x_trt.shape[0], x_trt.shape[1], -1) # NCH -> NCHW | ||
| y_trt = layer.get_output(0) | ||
|
|
||
| y_trt = network.add_scale(y_trt, mode, offset, scale, power).get_output(0) | ||
|
|
||
| # shape to original dimension | ||
| if ndim != 3: | ||
| layer = network.add_shuffle(layer.get_output(0)) | ||
| layer.reshape_dims = tuple(x_trt.shape) | ||
| y_trt = layer.get_output(0) | ||
|
|
||
| return y_trt | ||
|
|
||
| @tensorrt_converter('torch.instance_norm') | ||
| @tensorrt_converter('torch.nn.functional.instance_norm') | ||
| def convert_instance_norm(ctx): | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| running_mean = get_arg(ctx, 'running_mean', pos=1, default=None) | ||
| running_var = get_arg(ctx, 'running_var', pos=2, default=None) | ||
| weight = get_arg(ctx, 'weight', pos=3, default=None) | ||
| bias = get_arg(ctx, 'bias', pos=4, default=None) | ||
| use_input_stats = get_arg(ctx, 'use_input_stats', pos=5, default=True) | ||
| momentum = get_arg(ctx, 'momentum', pos=6, default=0.1) | ||
| eps = get_arg(ctx, 'eps', pos=7, default=1e-05) | ||
| output = ctx.method_return | ||
|
|
||
|
|
||
| # CASE 1 - USING RUNNING STATISTICS | ||
| if not use_input_stats: | ||
|
|
||
| # equivalent to batch norm | ||
| scale = 1.0 / np.sqrt(running_var.detach().cpu().numpy() + eps) | ||
| offset = -running_mean.detach().cpu().numpy() * scale | ||
| power = np.ones_like(scale) | ||
|
|
||
| if weight is not None: | ||
| scale *= weight.detach().cpu().numpy() | ||
| offset += bias.detach().cpu().numpy() | ||
|
|
||
| result_trt = _add_scale_1d2d3d(ctx.network, input._trt, trt.ScaleMode.CHANNEL, offset, scale, power) | ||
|
|
||
| output._trt = result_trt | ||
|
|
||
| # CASE 2 - USING INPUT STATS | ||
| else: | ||
|
|
||
| eps_np = np.array([eps], dtype=np.float32) | ||
| keep_dims = True | ||
| reduce_axes = torch_dim_to_trt_axes(tuple(range(2, len(input.shape)))) | ||
|
|
||
| # compute mean over spatial | ||
| mean_trt = ctx.network.add_reduce(input._trt, trt.ReduceOperation.AVG, reduce_axes, keep_dims).get_output(0) | ||
|
|
||
| # compute variance over spatial (include eps, to reduce layer count) | ||
| delta_trt = ctx.network.add_elementwise(input._trt, mean_trt, trt.ElementWiseOperation.SUB).get_output(0) | ||
| var_trt = ctx.network.add_scale(delta_trt, trt.ScaleMode.UNIFORM, np.zeros_like(eps_np), np.ones_like(eps_np), 2 * np.ones_like(eps_np)).get_output(0) | ||
| var_trt = ctx.network.add_reduce(var_trt, trt.ReduceOperation.AVG, reduce_axes, keep_dims).get_output(0) | ||
|
|
||
| # compute sqrt(var + eps) | ||
| var_trt = ctx.network.add_scale(var_trt, trt.ScaleMode.UNIFORM, eps_np, np.ones_like(eps_np), 0.5 * np.ones_like(eps_np)).get_output(0) | ||
|
|
||
| # compute final result | ||
| result_trt = ctx.network.add_elementwise(delta_trt, var_trt, trt.ElementWiseOperation.DIV).get_output(0) | ||
|
|
||
| # compute affine (if applicable) | ||
| if weight is not None: | ||
|
|
||
| weight_np = weight.detach().cpu().numpy() | ||
| bias_np = bias.detach().cpu().numpy() | ||
|
|
||
| result_trt = _add_scale_1d2d3d(ctx.network, result_trt, trt.ScaleMode.CHANNEL, bias_np, weight_np, np.ones_like(bias_np)) | ||
|
|
||
| output._trt = result_trt | ||
|
|
||
|
|
||
| # STATIC | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) | ||
| def test_instance_norm_1d_static(): | ||
| return torch.nn.InstanceNorm1d(10, track_running_stats=True) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) | ||
| def test_instance_norm_2d_static(): | ||
| return torch.nn.InstanceNorm2d(10, track_running_stats=True) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) | ||
| def test_instance_norm_3d_static(): | ||
| return torch.nn.InstanceNorm3d(10, track_running_stats=True) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) | ||
| def test_instance_norm_1d_static_affine(): | ||
| return torch.nn.InstanceNorm1d(10, affine=True, track_running_stats=True) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) | ||
| def test_instance_norm_2d_static_affine(): | ||
| return torch.nn.InstanceNorm2d(10, affine=True, track_running_stats=True) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) | ||
| def test_instance_norm_3d_static_affine(): | ||
| return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=True) | ||
|
|
||
| # DYNAMIC | ||
|
|
||
| # @TODO(jwelsh): 1D dynamic test failing | ||
| # @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) | ||
| # def test_instance_norm_1d_dynamic(): | ||
| # return torch.nn.InstanceNorm1d(10, track_running_stats=False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) | ||
| def test_instance_norm_2d_dynamic(): | ||
| return torch.nn.InstanceNorm2d(10, track_running_stats=False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) | ||
| def test_instance_norm_3d_dynamic(): | ||
| return torch.nn.InstanceNorm3d(10, track_running_stats=False) | ||
|
|
||
|
|
||
| # @TODO(jwelsh): 1D dynamic test failing | ||
| # @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) | ||
| # def test_instance_norm_1d_dynamic_affine(): | ||
| # return torch.nn.InstanceNorm1d(10, affine=True, track_running_stats=False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) | ||
| def test_instance_norm_2d_dynamic_affine(): | ||
| return torch.nn.InstanceNorm2d(10, affine=True, track_running_stats=False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) | ||
| def test_instance_norm_3d_dynamic_affine(): | ||
| return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=False) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,179 @@ | ||
| import torch.nn.functional as F | ||
| import torch.nn as nn | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
| import collections | ||
|
|
||
|
|
||
| def has_interpolate_plugin(): | ||
| try: | ||
| from torch2trt.plugins import InterpolatePlugin | ||
| return True | ||
| except: | ||
| return False | ||
|
|
||
| def get_interpolate_plugin(size, mode, align_corners): | ||
| from torch2trt.plugins import InterpolatePlugin | ||
| PLUGIN_NAME = 'interpolate' | ||
| registry = trt.get_plugin_registry() | ||
| creator = [c for c in registry.plugin_creator_list if c.name == PLUGIN_NAME and c.plugin_namespace == 'torch2trt'][0] | ||
| torch2trt_plugin = InterpolatePlugin(size=size, mode=mode, align_corners=align_corners) | ||
| return creator.deserialize_plugin(PLUGIN_NAME, torch2trt_plugin.serializeToString()) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() < '7.1' and has_interpolate_plugin()) | ||
| def convert_interpolate_plugin(ctx): | ||
| input = ctx.method_args[0] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| try: | ||
| mode = get_arg(ctx, 'mode', pos=3, default='nearest') | ||
| except KeyError: | ||
| mode = 'nearest' | ||
|
|
||
| try: | ||
| align_corners = get_arg(ctx, 'align_corners', pos=4, default=None) | ||
| except KeyError: | ||
| align_corners = False | ||
|
|
||
| # currently only works for NCHW | ||
| size = list(output.shape[2:]) | ||
|
|
||
| plugin = get_interpolate_plugin(size=size, mode=mode, align_corners=align_corners) | ||
|
|
||
|
|
||
| layer = ctx.network.add_plugin_v2([input_trt], plugin) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() >= '7.1') | ||
| @tensorrt_converter('torch.nn.functional.upsample', enabled=trt_version() >= '7.1') | ||
| def convert_interpolate_trt7(ctx): | ||
| #parse args | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| size = get_arg(ctx, 'size', pos=1, default=None) | ||
| scale_factor=get_arg(ctx, 'scale_factor', pos=2, default=None) | ||
| mode = get_arg(ctx, 'mode', pos=3, default='nearest') | ||
| align_corners = get_arg(ctx, 'align_corners', pos=4, default=None) | ||
|
|
||
| input_dim = input.dim() - 2 | ||
|
|
||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
| layer = ctx.network.add_resize(input=input_trt) | ||
|
|
||
| shape = size | ||
| if shape != None: | ||
| if isinstance(shape, collections.Sequence): | ||
| shape = [input.size(1)] + list(shape) | ||
| else: | ||
| shape = [input.size(1)] + [shape] * input_dim | ||
|
|
||
| layer.shape = shape | ||
|
|
||
| scales = scale_factor | ||
| if scales != None: | ||
| if not isinstance(scales, collections.Sequence): | ||
| scales = [scales] * input_dim | ||
| layer.scales = [1] + list(scales) | ||
|
|
||
| resize_mode = mode | ||
| if resize_mode.lower() in ["linear","bilinear","trilinear"]: | ||
| layer.resize_mode = trt.ResizeMode.LINEAR | ||
| else: | ||
| layer.resize_mode=trt.ResizeMode.NEAREST | ||
|
|
||
| if align_corners != None: | ||
| layer.align_corners = align_corners | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class Interpolate(torch.nn.Module): | ||
| def __init__(self, size, mode, align_corners): | ||
| super(Interpolate, self).__init__() | ||
| self.size = size | ||
| self.mode = mode | ||
| self.align_corners = align_corners | ||
|
|
||
| def forward(self, x): | ||
| return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) | ||
| def test_interpolate_nearest(): | ||
| return Interpolate((224, 224), 'nearest', None) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) | ||
| def test_interpolate_bilinear(): | ||
| return Interpolate((224, 224), 'bilinear', False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) | ||
| def test_interpolate_bicubic(): | ||
| return Interpolate((224, 224), 'bicubic', False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) | ||
| def test_interpolate_area(): | ||
| return Interpolate((56, 56), 'area', None) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) | ||
| def test_upsample_scale_factor2(): | ||
| return nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,2,12,12)], enabled=trt_version() >= '7.1') | ||
| def test_nearest_mode(): | ||
| return torch.nn.Upsample(scale_factor=2, mode="nearest") | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1') | ||
| def test_bilinear_mode(): | ||
| return torch.nn.Upsample(scale_factor=3, mode="bilinear",align_corners=False) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,3,12,12)], enabled=trt_version() >= '7.1') | ||
| def test_align_corner(): | ||
| return torch.nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,5,13,13)], enabled=trt_version() >= '7.1') | ||
| def test_bilinear_mode_odd_input_shape(): | ||
| return torch.nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1') | ||
| def test_size_parameter(): | ||
| return torch.nn.Upsample(size=3,mode="nearest") | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,3,13,13)], enabled=trt_version() >= '7.1') | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1)], enabled=trt_version() >= '7.1') | ||
| def test_size_parameter_odd_input(): | ||
| return torch.nn.Upsample(size=[6,3],mode="nearest") | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,4,6,6,6)], enabled=trt_version() >= '7.1') | ||
| def test_nearest_mode_3d(): | ||
| return torch.nn.Upsample(scale_factor=2, mode="nearest") | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,3,5,5,5)], enabled=trt_version() >= '7.1') | ||
| def test_bilinear_mode_3d(): | ||
| return torch.nn.Upsample(scale_factor=3, mode="trilinear",align_corners=False) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,4,8,8,8)], enabled=trt_version() >= '7.1') | ||
| def test_align_corner_3d(): | ||
| return torch.nn.Upsample(scale_factor=4, mode="trilinear", align_corners=True) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,6,7,7,7)], enabled=trt_version() >= '7.1') | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,3,2,4,4)], enabled=trt_version() >= '7.1') | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1,1)], enabled=trt_version() >= '7.1') | ||
| def test_bilinear_mode_odd_input_shape_3d(): | ||
| return torch.nn.Upsample(scale_factor=2, mode="trilinear",align_corners=False) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)], enabled=trt_version() >= '7.1') | ||
| def test_size_parameter_3d(): | ||
| return torch.nn.Upsample(size=3,mode="trilinear", align_corners=True) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,3,7,9,5)], enabled=trt_version() >= '7.1') | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,4,3,5,1)], enabled=trt_version() >= '7.1') | ||
| def test_size_parameter_odd_input_3d(): | ||
| return torch.nn.Upsample(size=[11,14,17],mode="trilinear", align_corners=False) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
| from .unary import UnaryModule | ||
|
|
||
|
|
||
| def __convert_max_elementwise(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MAX) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| def __convert_max_reduce(ctx): | ||
| input = ctx.method_args[0] | ||
| dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, len(input.shape)))) | ||
| keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output_val = ctx.method_return[0] | ||
| output_idx = ctx.method_return[1] | ||
| layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MAX, torch_dim_to_trt_axes(dim), keepdim) | ||
| output_val._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.max') | ||
| @tensorrt_converter('torch.Tensor.max') | ||
| def convert_max(ctx): | ||
| if len(ctx.method_args) > 1 and isinstance(ctx.method_args[1], torch.Tensor): | ||
| __convert_max_elementwise(ctx) | ||
| else: | ||
| __convert_max_reduce(ctx) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_max_reduce_dim1(): | ||
| return UnaryModule(lambda x: torch.max(x, 1)[0]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_max_reduce_dim22(): | ||
| return UnaryModule(lambda x: torch.max(x, 2)[0]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_max_reduce_dim1_keepdim(): | ||
| return UnaryModule(lambda x: torch.max(x, 1, keepdim=True)[0]) | ||
|
|
||
|
|
||
| class MaxElementwise(torch.nn.Module): | ||
| def forward(self, x, y): | ||
| return torch.max(x, y) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1,)]) # broadcast | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3), (1, 3, 3)]) # broadcast | ||
| def test_max_elementwise(): | ||
| return MaxElementwise() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.max_pool2d') | ||
| def convert_max_pool2d(ctx): | ||
| # parse args | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) | ||
| stride = get_arg(ctx, 'stride', pos=2, default=None) | ||
| padding = get_arg(ctx, 'padding', pos=3, default=0) | ||
| dilation = get_arg(ctx, 'dilation', pos=4, default=1) | ||
| ceil_mode = get_arg(ctx, 'ceil_mode', pos=5, default=False) | ||
|
|
||
| # get input trt tensor (or create constant if it doesn't exist) | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
|
|
||
| output = ctx.method_return | ||
|
|
||
| # get kernel size | ||
| if not isinstance(kernel_size, tuple): | ||
| kernel_size = (kernel_size, ) * 2 | ||
|
|
||
| # get stride | ||
| if not isinstance(stride, tuple): | ||
| stride = (stride, ) * 2 | ||
|
|
||
| # get padding | ||
| if not isinstance(padding, tuple): | ||
| padding = (padding, ) * 2 | ||
|
|
||
| layer = ctx.network.add_pooling( | ||
| input=input_trt, type=trt.PoolingType.MAX, window_size=kernel_size) | ||
|
|
||
| layer.stride = stride | ||
| layer.padding = padding | ||
|
|
||
| if ceil_mode: | ||
| layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) | ||
| def test_MaxPool2d_without_ceil_mode(): | ||
| return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) | ||
| def test_MaxPool2d_with_ceil_mode(): | ||
| return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.mean') | ||
| @tensorrt_converter('torch.Tensor.mean') | ||
| def convert_mean(ctx): | ||
| input = ctx.method_args[0] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| # get dims from args or kwargs | ||
| if 'dim' in ctx.method_kwargs: | ||
| dim = ctx.method_kwargs['dim'] | ||
| elif len(ctx.method_args) >= 2: | ||
| dim = ctx.method_args[1] | ||
|
|
||
| # convert list to tuple | ||
| if isinstance(dim, list): | ||
| dim = tuple(dim) | ||
|
|
||
| if not isinstance(dim, tuple): | ||
| dim = (dim, ) | ||
|
|
||
| # create axes bitmask for reduce layer | ||
| axes = 0 | ||
| for d in dim: | ||
| axes |= 1 << (d - 1) # -1 to remove batch dimension | ||
|
|
||
| # get whether to keep dimensions | ||
| if 'keepdim' in ctx.method_kwargs: | ||
| keep_dims = ctx.method_kwargs['keepdim'] | ||
| elif len(ctx.method_args) == 3: | ||
| keep_dims = ctx.method_args[2] | ||
| else: | ||
| keep_dims = False | ||
|
|
||
| layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.AVG, axes, keep_dims) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class Mean(torch.nn.Module): | ||
| def __init__(self, dim, keepdim): | ||
| super(Mean, self).__init__() | ||
| self.dim = dim | ||
| self.keepdim = keepdim | ||
| def forward(self, x): | ||
| return x.mean(self.dim, self.keepdim) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_mean_channel(): | ||
| return Mean(1, False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_mean_tuple(): | ||
| return Mean((1, 2), False) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_mean_keepdim(): | ||
| return Mean(1, True) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
| from .unary import UnaryModule | ||
|
|
||
|
|
||
| def __convert_min_elementwise(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MIN) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| def __convert_min_reduce(ctx): | ||
| input = ctx.method_args[0] | ||
| dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1,len(input.shape)))) | ||
| keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output_val = ctx.method_return[0] | ||
| output_idx = ctx.method_return[1] | ||
| layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MIN, torch_dim_to_trt_axes(dim), keepdim) | ||
| output_val._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.min') | ||
| @tensorrt_converter('torch.Tensor.min') | ||
| def convert_min(ctx): | ||
| if len(ctx.method_args) > 1 and isinstance(ctx.method_args[1], torch.Tensor): | ||
| __convert_min_elementwise(ctx) | ||
| else: | ||
| __convert_min_reduce(ctx) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_min_reduce_dim1(): | ||
| return UnaryModule(lambda x: torch.min(x, 1)[0]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_min_reduce_dim22(): | ||
| return UnaryModule(lambda x: torch.min(x, 2)[0]) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_min_reduce_dim1_keepdim(): | ||
| return UnaryModule(lambda x: torch.min(x, 1, keepdim=True)[0]) | ||
|
|
||
|
|
||
| class MinElementwise(torch.nn.Module): | ||
| def forward(self, x, y): | ||
| return torch.min(x, y) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1,)]) # broadcast | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3), (1, 3, 3)]) # broadcast | ||
| def test_min_elementwise(): | ||
| return MinElementwise() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.fmod') | ||
| def convert_mod(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| # we can not use ElementWiseOperation.FLOOR_DIV directly because Torch truncate negative result toward 0 | ||
| # but TensorRT FLOOR_DIV op toward -Inf | ||
| # sign = ab / |ab| | ||
| # floordiv result: sign * (|a| // |b|) | ||
| ab_layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD) | ||
| abs_ab_layer = ctx.network.add_unary(ab_layer.get_output(0), trt.UnaryOperation.ABS) | ||
| sign_layer = ctx.network.add_elementwise(ab_layer.get_output(0), abs_ab_layer.get_output(0), | ||
| trt.ElementWiseOperation.DIV) | ||
| abs_a_layer = ctx.network.add_unary(input_a_trt, trt.UnaryOperation.ABS) | ||
| abs_b_layer = ctx.network.add_unary(input_b_trt, trt.UnaryOperation.ABS) | ||
| abs_floor_layer = ctx.network.add_elementwise(abs_a_layer.get_output(0), abs_b_layer.get_output(0), | ||
| trt.ElementWiseOperation.FLOOR_DIV) | ||
| # a % b = a - (a//b) * b | ||
| floordiv_layer = ctx.network.add_elementwise(sign_layer.get_output(0), abs_floor_layer.get_output(0), | ||
| trt.ElementWiseOperation.PROD) | ||
| prod_layer = ctx.network.add_elementwise(floordiv_layer.get_output(0), input_b_trt, trt.ElementWiseOperation.PROD) | ||
| sub_layer = ctx.network.add_elementwise(input_a_trt, prod_layer.get_output(0), trt.ElementWiseOperation.SUB) | ||
| output._trt = sub_layer.get_output(0) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.Tensor.__mod__') | ||
| # we need separate converter for operator because for some reason Torch use truncation toward -Inf for this op. | ||
| # bug is filed: https://github.com/pytorch/pytorch/issues/52425 | ||
| # but for now we have to convert model exactly | ||
| def convert_mod(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| # a % b = a - (a//b) * b | ||
| floordiv_layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.FLOOR_DIV) | ||
| prod_layer = ctx.network.add_elementwise(floordiv_layer.get_output(0), input_b_trt, trt.ElementWiseOperation.PROD) | ||
| mod_layer = ctx.network.add_elementwise(input_a_trt, prod_layer.get_output(0), trt.ElementWiseOperation.SUB) | ||
| output._trt = mod_layer.get_output(0) | ||
|
|
||
|
|
||
| class Mod(torch.nn.Module): | ||
| def __init__(self): | ||
| super(Mod, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x % y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) | ||
| def test_mod_op(): | ||
| return Mod() | ||
|
|
||
|
|
||
| class ModAssign(torch.nn.Module): | ||
| def __init__(self): | ||
| super(ModAssign, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| x %= y | ||
| return x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) | ||
| def test_mod_op_assign(): | ||
| return ModAssign() | ||
|
|
||
|
|
||
| class ModConst(torch.nn.Module): | ||
| def __init__(self): | ||
| super(ModConst, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return x % 2. | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)]) | ||
| def test_mod_op_const(): | ||
| return ModConst() | ||
|
|
||
|
|
||
| class TorchMod(torch.nn.Module): | ||
| def __init__(self): | ||
| super(TorchMod, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return torch.fmod(x, y) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 40, 20)]) | ||
| def test_mod_func(): | ||
| return TorchMod() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.mul') | ||
| @tensorrt_converter('torch.Tensor.__imul__') | ||
| @tensorrt_converter('torch.Tensor.__mul__') | ||
| @tensorrt_converter('torch.Tensor.__rmul__') | ||
| def convert_mul(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
| class Mul(torch.nn.Module): | ||
| def __init__(self): | ||
| super(Mul, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x * y | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_mul_basic(): | ||
| return Mul() | ||
|
|
||
|
|
||
| class IMul(torch.nn.Module): | ||
| def __init__(self): | ||
| super(IMul, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| x *= y | ||
| return x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_mul_imul(): | ||
| return IMul() | ||
|
|
||
|
|
||
| class TorchMul(torch.nn.Module): | ||
| def __init__(self): | ||
| super(TorchMul, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return torch.mul(x, y) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_mul_torchmul(): | ||
| return TorchMul() | ||
|
|
||
|
|
||
| class RMulInt(torch.nn.Module): | ||
| def __init__(self): | ||
| super(RMulInt, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return 10 * x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_rmul_int(): | ||
| return RMulInt() | ||
|
|
||
|
|
||
| class RMulFloat(torch.nn.Module): | ||
| def __init__(self): | ||
| super(RMulFloat, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return 10.0 * x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_rmul_float(): | ||
| return RMulFloat() | ||
|
|
||
|
|
||
| class MulConstantNoBatch(torch.nn.Module): | ||
| def __init__(self): | ||
| super(MulConstantNoBatch, self).__init__() | ||
| self.register_buffer('y', torch.ones((3, 10, 10))) | ||
|
|
||
| def forward(self, x): | ||
| return x * self.y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) | ||
| def test_mul_constant_nobatch(): | ||
| return MulConstantNoBatch() | ||
|
|
||
|
|
||
| class MulConstantBatch(torch.nn.Module): | ||
| def __init__(self): | ||
| super(MulConstantBatch, self).__init__() | ||
| self.register_buffer('y', torch.ones((1, 3, 10, 10))) | ||
|
|
||
| def forward(self, x): | ||
| return x * self.y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) | ||
| def test_mul_constant_batch(): | ||
| return MulConstantBatch() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| import tensorrt as trt | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
| @tensorrt_converter('torch.Tensor.narrow') | ||
| @tensorrt_converter('torch.narrow') | ||
| def convert_narrow(ctx): | ||
| inputs = get_arg(ctx, 'input', pos=0, default=None) | ||
| start = get_arg(ctx, 'start', pos=2, default=None) | ||
| output = ctx.method_return | ||
| shape = list(inputs.shape) | ||
| start = [0]*len(shape) | ||
| stride = [1]*len(shape) | ||
| dim = ctx.method_args[1] if get_arg(ctx, 'dim', pos=1, default=0) >=0 else len(shape)+get_arg(ctx, 'dim', pos=1, default=0) | ||
| start[dim] = ctx.method_args[2] | ||
| shape[dim] = ctx.method_args[3] | ||
| # not consider batch dimension | ||
| input_trt = trt_(ctx.network,inputs) | ||
| layer = ctx.network.add_slice(input=input_trt,start=start[1:], shape=shape[1:],stride=stride[1:]) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
| class Narrow(torch.nn.Module): | ||
| def __init__(self, dim, start, length): | ||
| super(Narrow, self).__init__() | ||
| self.dim = dim | ||
| self.start = start | ||
| self.length = length | ||
|
|
||
| def forward(self, x): | ||
| return torch.narrow(x,self.dim,self.start,self.length) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,3,224,224)]) | ||
| def test_narrow1(): | ||
| return Narrow(1,0,2) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1,3,224,224)]) | ||
| def test_narrow2(): | ||
| return Narrow(2,0,50) | ||
|
|
||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.ne') | ||
| @tensorrt_converter('torch.Tensor.__ne__') | ||
| def convert_ne(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer_1 = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.EQUAL) | ||
| layer_2 = ctx.network.add_unary(layer_1.get_output(0), trt.UnaryOperation.NOT) | ||
| output._trt = layer_2.get_output(0) | ||
|
|
||
|
|
||
| class NotEqual(torch.nn.Module): | ||
| def __init__(self): | ||
| super(NotEqual, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x != y | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) | ||
| def test_ne_op(): | ||
| return NotEqual() | ||
|
|
||
|
|
||
| class NotEqualConst(torch.nn.Module): | ||
| def __init__(self): | ||
| super(NotEqualConst, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return x != 13.62 | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)]) | ||
| def test_ne_op_const(): | ||
| return NotEqualConst() | ||
|
|
||
|
|
||
| class TorchNotEqual(torch.nn.Module): | ||
| def __init__(self): | ||
| super(TorchNotEqual, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return torch.ne(x, y) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) | ||
| def test_ne_torch(): | ||
| return TorchNotEqual() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.normalize') | ||
| def convert_normalize(ctx): | ||
| # get args | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| p = get_arg(ctx, 'p', pos=1, default=2) | ||
| dim = get_arg(ctx, 'dim', pos=2, default=1) | ||
| eps = get_arg(ctx, 'eps', pos=3, default=1e-12) | ||
|
|
||
| # input_trt = input._trt | ||
| output = ctx.method_return | ||
|
|
||
| # add broadcastable scalar constants to network | ||
| input_trt, eps_trt, p_trt, p_inv_trt = add_missing_trt_tensors(ctx.network, [input, eps, p, 1.0 / p]) | ||
| input_trt, eps_trt, p_trt, p_inv_trt = broadcast_trt_tensors(ctx.network, [input_trt, eps_trt, p_trt, p_inv_trt], len(input_trt.shape)) | ||
|
|
||
| # compute norm = sum(abs(x)**p, dim=dim)**(1./p) | ||
| norm = ctx.network.add_unary(input_trt, trt.UnaryOperation.ABS).get_output(0) | ||
| norm = ctx.network.add_elementwise(norm, p_trt, trt.ElementWiseOperation.POW).get_output(0) | ||
| norm = ctx.network.add_reduce(norm, trt.ReduceOperation.SUM, torch_dim_to_trt_axes(dim), keep_dims=True).get_output(0) | ||
| norm = ctx.network.add_elementwise(norm, p_inv_trt, trt.ElementWiseOperation.POW).get_output(0) | ||
|
|
||
| # clamp norm = max(norm, eps) | ||
| norm = ctx.network.add_elementwise(norm, eps_trt, trt.ElementWiseOperation.MAX).get_output(0) | ||
|
|
||
| # divide input by norm | ||
| output._trt = ctx.network.add_elementwise(input_trt, norm, trt.ElementWiseOperation.DIV).get_output(0) | ||
|
|
||
|
|
||
| class Normalize(torch.nn.Module): | ||
| def __init__(self, *args, **kwargs): | ||
| super(Normalize, self).__init__() | ||
| self.args = args | ||
| self.kwargs = kwargs | ||
|
|
||
| def forward(self, x): | ||
| return torch.nn.functional.normalize(x, *self.args, **self.kwargs) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_normalize_basic(): | ||
| return Normalize() | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_normalize_l1_basic(): | ||
| return Normalize(p=1.0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_normalize_l1p5_basic(): | ||
| return Normalize(p=1.5) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) | ||
| def test_normalize_l2_height(): | ||
| return Normalize(p=2.0, dim=2) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.pad') | ||
| def convert_pad(ctx): | ||
| input = ctx.method_args[0] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| pad = ctx.method_args[1] | ||
| pre_padding = (pad[2], pad[0]) | ||
| post_padding = (pad[3], pad[1]) | ||
|
|
||
| # mode / value are ignored since not supported by TensorRT | ||
|
|
||
| layer = ctx.network.add_padding(input_trt, pre_padding, post_padding) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class Pad(torch.nn.Module): | ||
|
|
||
| def __init__(self, pad): | ||
| super(Pad, self).__init__() | ||
| self.pad = pad | ||
|
|
||
| def forward(self, x): | ||
| return torch.nn.functional.pad(x, self.pad) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_pad_basic(): | ||
| return Pad((1, 2, 3, 4)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.Tensor.permute') | ||
| def convert_permute(ctx): | ||
| input = ctx.method_args[0] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
|
|
||
| # permutation -1 because TRT does not include batch dim | ||
| if isinstance(ctx.method_args[1], int): | ||
| permutation = tuple(ctx.method_args[1:]) # handle permute(a, b, c) | ||
| else: | ||
| permutation = tuple(ctx.method_args[1]) # handle permute([a, b, c]) | ||
|
|
||
| assert(permutation[0] == 0) # cannot move batch dim | ||
|
|
||
| trt_permutation = tuple([p - 1 for p in permutation])[1:] | ||
|
|
||
| layer = ctx.network.add_shuffle(input_trt) | ||
| layer.second_transpose = tuple(trt_permutation) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class Permute(torch.nn.Module): | ||
| def __init__(self, *args): | ||
| super(Permute, self).__init__() | ||
| self.args = args | ||
| def forward(self, x): | ||
| return x.permute(*self.args).contiguous() | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) | ||
| def test_permute_2d_0123(): | ||
| return Permute(0, 1, 2, 3) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) | ||
| def test_permute_2d_0312(): | ||
| return Permute(0, 3, 1, 2) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)]) | ||
| def test_permute_3d_01234(): | ||
| return Permute(0, 1, 2, 3, 4) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)]) | ||
| def test_permute_3d_04132(): | ||
| return Permute(0, 4, 1, 3, 2) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)]) | ||
| def test_permute_list(): | ||
| return Permute([0, 4, 1, 3, 2]) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)]) | ||
| def test_permute_tuple(): | ||
| return Permute((0, 4, 1, 3, 2)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.pow') | ||
| @tensorrt_converter('torch.Tensor.__ipow__') | ||
| @tensorrt_converter('torch.Tensor.__pow__') | ||
| def convert_pow(ctx): | ||
| input_a = ctx.method_args[0] | ||
| input_b = ctx.method_args[1] | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.Tensor.__rpow__') | ||
| def convert_pow(ctx): | ||
| input_a = ctx.method_args[1] | ||
| input_b = ctx.method_args[0] # flipped for rpow | ||
| output = ctx.method_return | ||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
| layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| class Pow(torch.nn.Module): | ||
| def __init__(self): | ||
| super(Pow, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return x ** y | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_pow_basic(): | ||
| return Pow() | ||
|
|
||
|
|
||
| # __ipow__ not yet impl in torch | ||
| # class IPow(torch.nn.Module): | ||
| # def __init__(self): | ||
| # super(IPow, self).__init__() | ||
|
|
||
| # def forward(self, x, y): | ||
| # x **= y | ||
| # return x | ||
|
|
||
|
|
||
| # @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| # def test_pow_ipow(): | ||
| # return IPow() | ||
|
|
||
|
|
||
| class TorchPow(torch.nn.Module): | ||
| def __init__(self): | ||
| super(TorchPow, self).__init__() | ||
|
|
||
| def forward(self, x, y): | ||
| return torch.pow(x, y) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) | ||
| def test_torch_pow(): | ||
| return TorchPow() | ||
|
|
||
|
|
||
| class RpowInt(torch.nn.Module): | ||
| def __init__(self): | ||
| super(RpowInt, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return 2 ** x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_rpow_int(): | ||
| return RpowInt() | ||
|
|
||
|
|
||
| class RpowFloat(torch.nn.Module): | ||
| def __init__(self): | ||
| super(RpowFloat, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return 2.0 ** x | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) | ||
| def test_rpow_float(): | ||
| return RpowFloat() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.prelu') | ||
| def convert_prelu(ctx): | ||
| input = get_arg(ctx, 'input', pos=0, default=None) | ||
| weight = get_arg(ctx, 'weight', pos=1, default=None) | ||
| output = ctx.method_return | ||
|
|
||
| weight_shape = [1] * (len(input.shape) - 1) | ||
| weight_shape[0] = weight.numel() | ||
|
|
||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
|
|
||
|
|
||
| # y = prelu(x) = relu(x) - alpha * relu(-x) | ||
| weight_trt = ctx.network.add_constant(weight_shape, -weight.detach().view(weight_shape).cpu().numpy()).get_output(0) # detach so considered leaf | ||
|
|
||
| # x >= 0 | ||
| a = ctx.network.add_activation(input_trt, trt.ActivationType.RELU).get_output(0) | ||
|
|
||
| # x <= 0 | ||
| b = ctx.network.add_unary(input_trt, trt.UnaryOperation.NEG).get_output(0) | ||
| b = ctx.network.add_activation(b, trt.ActivationType.RELU).get_output(0) | ||
| b = ctx.network.add_elementwise(b, weight_trt, trt.ElementWiseOperation.PROD).get_output(0) | ||
|
|
||
| # y = a + b | ||
| y = ctx.network.add_elementwise(a, b, trt.ElementWiseOperation.SUM) | ||
|
|
||
| output._trt = y.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)]) | ||
| def test_prelu_scalar(): | ||
| return torch.nn.PReLU() | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)]) | ||
| def test_prelu_vector(): | ||
| m = torch.nn.PReLU(5) | ||
| m.weight = torch.nn.Parameter(torch.randn(5)) # randn so each channel different | ||
| return m |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
| from .unary import UnaryModule | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.prod') | ||
| @tensorrt_converter('torch.Tensor.prod') | ||
| def convert_prod(ctx): | ||
| input = ctx.method_args[0] | ||
| dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, len(input.shape)))) | ||
| keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
| layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.PROD, torch_dim_to_trt_axes(dim), keepdim) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_prod_reduce_all(): | ||
| return UnaryModule(lambda x: torch.prod(x)) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_prod_reduce_dim1(): | ||
| return UnaryModule(lambda x: torch.prod(x, 1)) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_prod_reduce_dim22(): | ||
| return UnaryModule(lambda x: torch.prod(x, 2)) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) | ||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) | ||
| def test_prod_reduce_dim1_keepdim(): | ||
| return UnaryModule(lambda x: torch.prod(x, 1, keepdim=True)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.relu') | ||
| @tensorrt_converter('torch.relu_') | ||
| @tensorrt_converter('torch.nn.functional.relu') | ||
| @tensorrt_converter('torch.nn.functional.relu_') | ||
| @tensorrt_converter('torch.Tensor.relu') | ||
| def convert_functional_relu(ctx): | ||
| ctx.method_args = (torch.nn.ReLU(),) + ctx.method_args | ||
| convert_relu(ctx) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.ReLU.forward') | ||
| def convert_relu(ctx): | ||
| input = ctx.method_args[1] | ||
| input_trt = add_missing_trt_tensors(ctx.network, [input])[0] | ||
| output = ctx.method_return | ||
| layer = ctx.network.add_activation( | ||
| input=input_trt, type=trt.ActivationType.RELU) | ||
| output._trt = layer.get_output(0) | ||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) | ||
| def test_relu_basic(): | ||
| return torch.nn.ReLU() | ||
|
|
||
|
|
||
| class FunctionalRelu(torch.nn.Module): | ||
| def forward(self, x): | ||
| return torch.nn.functional.relu(x) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) | ||
| def test_functional_relu_basic(): | ||
| return FunctionalRelu() | ||
|
|
||
|
|
||
| class TensorRelu(torch.nn.Module): | ||
| def __init__(self): | ||
| super(TensorRelu, self).__init__() | ||
|
|
||
| def forward(self, x): | ||
| return x.relu() | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)]) | ||
| def test_tensor_relu(): | ||
| return TensorRelu() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| from torch2trt.torch2trt import * | ||
| from torch2trt.module_test import add_module_test | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.functional.relu6') | ||
| def convert_functional_relu6(ctx): | ||
| ctx.method_args = (torch.nn.ReLU6(),) + ctx.method_args | ||
| convert_relu6(ctx) | ||
|
|
||
|
|
||
| @tensorrt_converter('torch.nn.ReLU6.forward') | ||
| def convert_relu6(ctx): | ||
| input = ctx.method_args[1] | ||
| output = ctx.method_return | ||
|
|
||
| input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input, 6]) | ||
| input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) | ||
|
|
||
| layer = ctx.network.add_activation( | ||
| input=input_a_trt, type=trt.ActivationType.RELU) | ||
| layer = ctx.network.add_elementwise( | ||
| layer.get_output(0), input_b_trt, trt.ElementWiseOperation.MIN) | ||
|
|
||
| output._trt = layer.get_output(0) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) | ||
| def test_relu6_basic(): | ||
| return torch.nn.ReLU6() | ||
|
|
||
|
|
||
| class FunctionalRelu6(torch.nn.Module): | ||
| def forward(self, x): | ||
| return torch.nn.functional.relu6(x) | ||
|
|
||
|
|
||
| @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) | ||
| def test_functional_relu6_basic(): | ||
| return FunctionalRelu6() | ||
|
|