68 changes: 68 additions & 0 deletions torch2trt/converters/Conv2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter("torch.nn.Conv2d.forward", enabled=trt_version() < '7.0')
def convert_Conv2d(ctx):
module = ctx.method_args[0]
input = ctx.method_args[1]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

kernel_size = module.kernel_size
if not isinstance(kernel_size, tuple):
kernel_size = (kernel_size,) * 2

stride = module.stride
if not isinstance(stride, tuple):
stride = (stride,) * 2

padding = module.padding
if not isinstance(padding, tuple):
padding = (padding,) * 2

dilation = module.dilation
if not isinstance(dilation, tuple):
dilation = (dilation,) * 2

kernel = module.weight.detach().cpu().numpy()

bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
if module.bias is not None:
bias = module.bias.detach().cpu().numpy()

layer = ctx.network.add_convolution(
input=input_trt,
num_output_maps=module.out_channels,
kernel_shape=kernel_size,
kernel=kernel,
bias=bias,
)
layer.stride = stride
layer.padding = padding
layer.dilation = dilation

if module.groups is not None:
layer.num_groups = module.groups

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0')
def test_Conv2d_basic():
return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0)


@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0')
def test_Conv2d_stride2():
return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0)


@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0')
def test_Conv2d_kernel3():
return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1)


@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0')
def test_Conv2d_dilation2():
return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2)
79 changes: 79 additions & 0 deletions torch2trt/converters/ConvTranspose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.nn.ConvTranspose2d.forward', enabled=trt_version() >= '7.0')
@tensorrt_converter('torch.nn.ConvTranspose3d.forward', enabled=trt_version() >= '7.0')
def convert_ConvTranspose2d_trt7(ctx):
module = ctx.method_args[0]
input = ctx.method_args[1]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

input_dim = input.dim() - 2

kernel_size = module.kernel_size
if not isinstance(kernel_size, tuple):
kernel_size = (kernel_size, ) * input_dim

stride = module.stride
if not isinstance(stride, tuple):
stride = (stride, ) * input_dim

padding = module.padding
if not isinstance(padding, tuple):
padding = (padding, ) * input_dim

assert module.dilation == 1 or all([d == 1 for d in module.dilation]), \
"Transposed convolution dilation is not supported in TensorRT"

kernel = module.weight.detach().cpu().numpy()

bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
if module.bias is not None:
bias = module.bias.detach().cpu().numpy()

layer = ctx.network.add_deconvolution_nd(
input=input_trt,
num_output_maps=module.out_channels,
kernel_shape=kernel_size,
kernel=kernel,
bias=bias)
layer.stride_nd = stride
layer.padding_nd = padding

if module.groups is not None:
layer.num_groups = module.groups

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7)], enabled=trt_version() >= '7.0')
def test_ConvTranspose2d_basic_trt7():
return torch.nn.ConvTranspose2d(10, 5, kernel_size=1, stride=1, padding=0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 8, 8)], enabled=trt_version() >= '7.0')
def test_ConvTranspose2d_stride2_trt7():
return torch.nn.ConvTranspose2d(10, 5, kernel_size=1, stride=2, padding=0)

@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 9, 9)], enabled=trt_version() >= '7.0')
def test_ConvTranspose2d_kernel3_trt7():
return torch.nn.ConvTranspose2d(10, 5, kernel_size=3, stride=2, padding=1)



@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)], enabled=trt_version() >= '7.0')
def test_ConvTranspose3d_basic_trt7():
return torch.nn.ConvTranspose3d(10, 5, kernel_size=1, stride=1, padding=0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)], enabled=trt_version() >= '7.0')
def test_ConvTranspose3d_stride2_trt7():
return torch.nn.ConvTranspose3d(10, 5, kernel_size=1, stride=2, padding=0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 6, 6, 6)], enabled=trt_version() >= '7.0')
def test_ConvTranspose3d_kernel3_trt7():
return torch.nn.ConvTranspose3d(10, 5, kernel_size=3, stride=2, padding=1)

68 changes: 68 additions & 0 deletions torch2trt/converters/ConvTranspose2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test

@tensorrt_converter("torch.nn.ConvTranspose2d.forward", enabled=trt_version() < '7.0')
def convert_ConvTranspose2d(ctx):
module = ctx.method_args[0]
input = ctx.method_args[1]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

kernel_size = module.kernel_size
if not isinstance(kernel_size, tuple):
kernel_size = (kernel_size,) * 2

stride = module.stride
if not isinstance(stride, tuple):
stride = (stride,) * 2

padding = module.padding
if not isinstance(padding, tuple):
padding = (padding,) * 2

kernel = module.weight.detach().cpu().numpy()

bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
if module.bias is not None:
bias = module.bias.detach().cpu().numpy()

layer = ctx.network.add_deconvolution(
input=input_trt,
num_output_maps=module.out_channels,
kernel_shape=kernel_size,
kernel=kernel,
bias=bias,
)
layer.stride = stride

# if output_padding in original pytorch layer is not 0, pre_padding and post_padding should be set respectively. Otherwise the output dimension of pytorch and tensorrt may be different.
output_padding = module.output_padding
if output_padding[0] + output_padding[1] > 0:
layer.pre_padding = padding
layer.post_padding = trt.tensorrt.DimsHW(padding[0] - output_padding[0], padding[1] - output_padding[1])
else:
layer.padding = padding

if module.groups is not None:
layer.num_groups = module.groups

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)], enabled=trt_version() < '7.0')
def test_square_kernel_equal_stride_mode():
return torch.nn.ConvTranspose2d(3,3,3,stride=2)

@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)], enabled=trt_version() < '7.0')
def test_square_kernel_equal_stride_mode_unequal_op_size():
return torch.nn.ConvTranspose2d(3,6,3,stride=2)

@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)], enabled=trt_version() < '7.0')
def test_unequal_stride_mode():
return torch.nn.ConvTranspose2d(3,3,3, stride=(2,1), padding=(4,2))

@add_module_test(torch.float32, torch.device("cuda"), [(1,3,112,112)], enabled=trt_version() < '7.0')
@add_module_test(torch.float32, torch.device("cuda"), [(1,3,7,7)], enabled=trt_version() < '7.0')
def test_kernelsize_4():
return torch.nn.ConvTranspose2d(3,3,4, stride=2, padding=1)

45 changes: 45 additions & 0 deletions torch2trt/converters/Linear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.nn.Linear.forward')
def convert_Linear(ctx):
module = ctx.method_args[0]
input = ctx.method_args[1]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

# reshape to ...xNx1x1
layer = ctx.network.add_shuffle(input_trt)
layer.reshape_dims = tuple(input_trt.shape) + (1, 1)

bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
if module.bias is not None:
bias = module.bias.detach().cpu().numpy()

# add fully connected
layer = ctx.network.add_fully_connected(
input=layer.get_output(0),
num_outputs=module.out_features,
kernel=module.weight.detach().cpu().numpy(),
bias=bias)

# reshape back to N
layer = ctx.network.add_shuffle(layer.get_output(0))
layer.reshape_dims = tuple(output.shape[1:])

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)])
def test_Linear_basic():
return torch.nn.Linear(10, 5)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)])
def test_Linear_no_bias():
return torch.nn.Linear(10, 5, bias=False)
12 changes: 12 additions & 0 deletions torch2trt/converters/LogSoftmax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from torch2trt.torch2trt import *


@tensorrt_converter('torch.nn.LogSoftmax.forward')
def convert_LogSoftmax(ctx):
input = ctx.method_args[1]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return
layer = ctx.network.add_softmax(input=input_trt)
layer = ctx.network.add_unary(input=layer.get_output(0),
op=trt.UnaryOperation.LOG)
output._trt = layer.get_output(0)
61 changes: 61 additions & 0 deletions torch2trt/converters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# dummy converters throw warnings method encountered
import tensorrt as trt
from .dummy_converters import *

# supported converters will override dummy converters

from .AdaptiveAvgPool2d import *
from .BatchNorm1d import *
from .BatchNorm2d import *
from .Conv import *
from .Conv1d import *
from .Conv2d import *
from .ConvTranspose import *
from .ConvTranspose2d import *
from .Linear import *
from .LogSoftmax import *
from .activation import *
from .adaptive_avg_pool2d import *
from .adaptive_max_pool2d import *
from .add import *
from .avg_pool import *
from .batch_norm import *
from .cat import *
from .chunk import *
from .clamp import *
from .compare import *
from .div import *
from .expand import *
from .floordiv import *
from .getitem import *
from .identity import *
from .instance_norm import *
from .interpolate import *
from .group_norm import *
from .max import *
from .max_pool2d import *
from .mean import *
from .min import *
from .mod import *
from .mul import *
from .normalize import *
from .ne import *
from .narrow import *
from .pad import *
from .permute import *
from .pow import *
from .prelu import *
from .prod import *
from .relu import *
from .relu6 import *
from .sigmoid import *
from .softmax import *
from .split import *
from .stack import *
from .sub import *
from .sum import *
from .tanh import *
from .tensor import *
from .transpose import *
from .unary import *
from .view import *
121 changes: 121 additions & 0 deletions torch2trt/converters/activation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test
from .unary import UnaryModule


# | RELU : Rectified Linear activation (impl in relu.py)
# | SIGMOID : Sigmoid activation (impl in sigmoid.py)
# | TANH : Hyperbolic Tangent activation (impl in tanh.py)


# | LEAKY_RELU : Leaky Relu activation: f(x) = x if x >= 0, f(x) = alpha * x if x < 0


@tensorrt_converter('torch.nn.functional.leaky_relu')
@tensorrt_converter('torch.nn.functional.leaky_relu_')
def convert_leaky_relu(ctx):
input = get_arg(ctx, 'input', pos=0, default=None)
negative_slope = get_arg(ctx, 'negative_slope', pos=1, default=0.01)
output = ctx.method_return

input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
layer = ctx.network.add_activation(input_trt, trt.ActivationType.LEAKY_RELU)
layer.alpha = negative_slope

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
def test_leaky_relu():
return UnaryModule(lambda x: torch.nn.functional.leaky_relu(x))


# | ELU : Elu activation: f(x) = x if x >= 0, f(x) = alpha * (exp(x) - 1) if x < 0


@tensorrt_converter('torch.nn.functional.elu')
@tensorrt_converter('torch.nn.functional.elu_')
def convert_elu(ctx):
input = get_arg(ctx, 'input', pos=0, default=None)
alpha = get_arg(ctx, 'alpha', pos=1, default=1.0)
output = ctx.method_return

input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
layer = ctx.network.add_activation(input_trt, trt.ActivationType.ELU)
layer.alpha = alpha

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
def test_elu():
return UnaryModule(lambda x: torch.nn.functional.elu(x))


# | SELU : Selu activation: f(x) = beta * x if x > 0, f(x) = beta * (alpha * exp(x) - alpha) if x <= 0

@tensorrt_converter('torch.selu')
@tensorrt_converter('torch.selu_')
@tensorrt_converter('torch.nn.functional.selu')
@tensorrt_converter('torch.nn.functional.selu_')
def convert_selu(ctx):
input = get_arg(ctx, 'input', pos=0, default=None)
alpha = get_arg(ctx, 'alpha', pos=1, default=1.0)
output = ctx.method_return

input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
layer = ctx.network.add_activation(input_trt, trt.ActivationType.SELU)
layer.alpha = 1.6732632423543772848170429916717
layer.beta = 1.0507009873554804934193349852946

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
def test_selu():
return UnaryModule(lambda x: torch.nn.functional.selu(x))


# | SOFTSIGN : Softsign activation: f(x) = x / (1 + \|x\|)


@tensorrt_converter('torch.nn.functional.softsign')
def convert_softsign(ctx):
input = get_arg(ctx, 'input', pos=0, default=None)
output = ctx.method_return

input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
layer = ctx.network.add_activation(input_trt, trt.ActivationType.SOFTSIGN)

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
def test_softsign():
return UnaryModule(lambda x: torch.nn.functional.softsign(x))


# | SOFTPLUS : Softplus activation: f(x) = alpha * log(exp(beta * x) + 1)


@tensorrt_converter('torch.nn.functional.softplus')
def convert_softplus(ctx):
input = get_arg(ctx, 'input', pos=0, default=None)
output = ctx.method_return

input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
layer = ctx.network.add_activation(input_trt, trt.ActivationType.SOFTPLUS)

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
def test_softplus():
return UnaryModule(lambda x: torch.nn.functional.softplus(x))


# | CLIP : Clip activation: f(x) = max(alpha, min(beta, x)) (impl in clamp.py)

# | HARD_SIGMOID : Hard sigmoid activation: f(x) = max(0, min(1, alpha * x + beta)) (not sure if there is this in Pytorch?)
# | SCALED_TANH : Scaled Tanh activation: f(x) = alpha * tanh(beta * x) (not sure if there is this in Pytorch?)
# | THRESHOLDED_RELU : Thresholded Relu activation: f(x) = x if x > alpha, f(x) = 0 if x <= alpha (not sure if there is this in Pytorch?)
8 changes: 8 additions & 0 deletions torch2trt/converters/adaptive_avg_pool2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from torch2trt.torch2trt import *
from .AdaptiveAvgPool2d import *


@tensorrt_converter('torch.nn.functional.adaptive_avg_pool2d')
def convert_adaptive_avg_pool2d(ctx):
ctx.method_args = (torch.nn.AdaptiveAvgPool2d(ctx.method_args[1]), ctx.method_args[0])
convert_AdaptiveAvgPool2d(ctx)
36 changes: 36 additions & 0 deletions torch2trt/converters/adaptive_max_pool2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.nn.functional.adaptive_max_pool2d')
def convert_adaptive_max_pool2d(ctx):
input = ctx.method_args[0]
output = ctx.method_return

output_size = ctx.method_args[1]
if isinstance(output_size, int):
output_size = (output_size, ) * 2

stride = (input._trt.shape[-2] // output_size[-2], input._trt.shape[-1] // output_size[-1])

kernel_size = stride
layer = ctx.network.add_pooling(
input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size)
layer.stride = stride

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_adaptive_max_pool2d_1x1():
return torch.nn.AdaptiveMaxPool2d((1, 1))


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_adaptive_max_pool2d_2x2():
return torch.nn.AdaptiveMaxPool2d((2, 2))


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_adaptive_max_pool2d_3x3():
return torch.nn.AdaptiveMaxPool2d((3, 3))
109 changes: 109 additions & 0 deletions torch2trt/converters/add.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.add')
@tensorrt_converter('torch.Tensor.__iadd__')
@tensorrt_converter('torch.Tensor.__add__')
@tensorrt_converter('torch.Tensor.__radd__')
def convert_add(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM)
output._trt = layer.get_output(0)


class Add(torch.nn.Module):
def __init__(self):
super(Add, self).__init__()

def forward(self, x, y):
return x + y

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_add_basic():
return Add()


class IAdd(torch.nn.Module):
def __init__(self):
super(IAdd, self).__init__()

def forward(self, x, y):
x += y
return x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_add_iadd():
return IAdd()


class TorchAdd(torch.nn.Module):
def __init__(self):
super(TorchAdd, self).__init__()

def forward(self, x, y):
return torch.add(x, y)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_add_torchadd():
return TorchAdd()


class RAddInt(torch.nn.Module):
def __init__(self):
super(RAddInt, self).__init__()

def forward(self, x):
return 1 + x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_add_radd_int():
return RAddInt()


class RAddFloat(torch.nn.Module):
def __init__(self):
super(RAddFloat, self).__init__()

def forward(self, x):
return 1.0 + x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_add_radd_float():
return RAddFloat()


class AddConstantNoBatch(torch.nn.Module):
def __init__(self):
super(AddConstantNoBatch, self).__init__()
self.register_buffer('y', torch.ones((3, 10, 10)))

def forward(self, x):
return x + self.y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)])
def test_add_constant_nobatch():
return AddConstantNoBatch()


class AddConstantBatch(torch.nn.Module):
def __init__(self):
super(AddConstantBatch, self).__init__()
self.register_buffer('y', torch.ones((1, 3, 10, 10)))

def forward(self, x):
return x + self.y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)])
def test_add_constant_batch():
return AddConstantBatch()
111 changes: 111 additions & 0 deletions torch2trt/converters/avg_pool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter("torch.nn.functional.avg_pool2d", enabled=trt_version() < '7.0')
def convert_avg_pool2d(ctx):
# parse args
input = get_arg(ctx, "input", pos=0, default=None)
kernel_size = get_arg(ctx, "kernel_size", pos=1, default=None)
stride = get_arg(ctx, "stride", pos=2, default=None)
padding = get_arg(ctx, "padding", pos=3, default=0)
ceil_mode = get_arg(ctx, "ceil_mode", pos=4, default=False)
count_include_pad = get_arg(ctx, "count_include_pad", pos=5, default=True)

# get input trt tensor (or create constant if it doesn't exist)
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]

output = ctx.method_return

# get kernel size
if not isinstance(kernel_size, tuple):
kernel_size = (kernel_size,) * 2

# get stride
if not isinstance(stride, tuple):
stride = (stride,) * 2

# get padding
if not isinstance(padding, tuple):
padding = (padding,) * 2

layer = ctx.network.add_pooling(
input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size
)

layer.stride = stride
layer.padding = padding
layer.average_count_excludes_padding = not count_include_pad

if ceil_mode:
layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP

output._trt = layer.get_output(0)


@tensorrt_converter('torch.nn.functional.avg_pool2d', enabled=trt_version() >= '7.0')
@tensorrt_converter('torch.nn.functional.avg_pool3d', enabled=trt_version() >= '7.0')
def convert_avg_pool_trt7(ctx):
# parse args
input = get_arg(ctx, 'input', pos=0, default=None)
kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None)
stride = get_arg(ctx, 'stride', pos=2, default=None)
padding = get_arg(ctx, 'padding', pos=3, default=0)
ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=False)
count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True)

# get input trt tensor (or create constant if it doesn't exist)
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

input_dim = input.dim() - 2

# get kernel size
if not isinstance(kernel_size, tuple):
kernel_size = (kernel_size, ) * input_dim

# get stride
if not isinstance(stride, tuple):
stride = (stride, ) * input_dim

# get padding
if not isinstance(padding, tuple):
padding = (padding, ) * input_dim

layer = ctx.network.add_pooling_nd(
input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size)

layer.stride_nd = stride
layer.padding_nd = padding
layer.average_count_excludes_padding = not count_include_pad

if ceil_mode:
layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)])
@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)])
def test_avg_pool2d_without_ceil_mode():
return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)


@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)])
@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)])
def test_avg_pool2d_with_ceil_mode():
return torch.nn.AvgPool2d(
kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False
) # TRT does not support ceil_mode=True && count_include_pad=True


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)], enabled=trt_version() >= '7.0')
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)], enabled=trt_version() >= '7.0')
def test_avg_pool3d_without_ceil_mode_trt7():
return torch.nn.AvgPool3d(kernel_size=3, stride=2, padding=1, ceil_mode=False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)], enabled=trt_version() >= '7.0')
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)], enabled=trt_version() >= '7.0')
def test_avg_pool3d_with_ceil_mode_trt7():
return torch.nn.AvgPool3d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True
40 changes: 40 additions & 0 deletions torch2trt/converters/batch_norm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test

@tensorrt_converter('torch.nn.functional.batch_norm', enabled=trt_version() >= '7.0')
def convert_batch_norm_trt7(ctx):

input = get_arg(ctx, 'input', pos=0, default=None)
running_mean = get_arg(ctx, 'running_mean', pos=1, default=None)
running_var = get_arg(ctx, 'running_var', pos=2, default=None)

weight = get_arg(ctx, 'weight', pos=3, default=None)
bias = get_arg(ctx, 'bias', pos=4, default=None)
eps = get_arg(ctx, 'eps', pos=7, default=10e-6)

input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

scale = weight.detach().cpu().numpy() / np.sqrt(running_var.detach().cpu().numpy() + eps)
bias = bias.detach().cpu().numpy() - running_mean.detach().cpu().numpy() * scale
power = np.ones_like(scale)

layer = ctx.network.add_scale_nd(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power, 0)
output._trt = layer.get_output(0)



@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)], enabled=trt_version() >= '7.0')
def test_batch_norm_2d_trt7():
return torch.nn.BatchNorm2d(10)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)], enabled=trt_version() >= '7.0')
def test_batch_norm_3d_2_trt7():
return torch.nn.BatchNorm3d(10)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 32, 2, 36, 47)], enabled=trt_version() >= '7.0')
def test_batch_norm_3d_trt7():
return torch.nn.BatchNorm3d(32)

28 changes: 28 additions & 0 deletions torch2trt/converters/cat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.cat')
def convert_cat(ctx):
inputs = get_arg(ctx, 'input', pos=0, default=None)
dim = get_arg(ctx, 'dim', pos=1, default=0)

output = ctx.method_return
trt_inputs = add_missing_trt_tensors(ctx.network, inputs)
trt_inputs = broadcast_trt_tensors(ctx.network, trt_inputs, len(output.shape) - 1)

layer = ctx.network.add_concatenation(inputs=trt_inputs)
layer.axis = dim - 1
output._trt = layer.get_output(0)

class Cat(torch.nn.Module):
def __init__(self, dim):
super(Cat, self).__init__()
self.dim = dim

def forward(self, *x):
return torch.cat(x, dim=self.dim)

@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 3, 4), (1, 17, 4)])
def test_Cat_basic():
return Cat(1)
60 changes: 60 additions & 0 deletions torch2trt/converters/chunk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test
from .split import convert_split


@tensorrt_converter('torch.chunk')
@tensorrt_converter('torch.Tensor.chunk')
def convert_chunk(ctx):
convert_split(ctx)


class TorchChunk(torch.nn.Module):

def __init__(self, *args, **kwargs):
super(TorchChunk, self).__init__()
self.args = args
self.kwargs = kwargs

def forward(self, x):
return torch.chunk(x, *self.args, **self.kwargs)


class TensorChunk(torch.nn.Module):

def __init__(self, *args, **kwargs):
super(TensorChunk, self).__init__()
self.args = args
self.kwargs = kwargs

def forward(self, x):
return x.chunk(*self.args, **self.kwargs)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_torch_chunk_1_1():
return TorchChunk(1, 1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_torch_chunk_2_1():
return TorchChunk(2, 1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_torch_chunk_3_1():
return TorchChunk(3, 1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_torch_chunk_3_2():
return TorchChunk(3, 2)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_tensor_chunk_3_2():
return TensorChunk(3, 2)
191 changes: 191 additions & 0 deletions torch2trt/converters/clamp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


def __add_clamp(network, trt_input, val, op):

# create TensorRT constant for minimum value
val_shape = (1, ) * len(trt_input.shape) # broadcast all dimensions
val_tensor = val * torch.ones(val_shape, dtype=torch_dtype_from_trt(trt_input.dtype)).cpu().numpy()
val_trt = network.add_constant(val_shape, val_tensor)
layer = network.add_elementwise(trt_input, val_trt.get_output(0), op)

return layer


# CLAMP_MIN


@tensorrt_converter('torch.clamp_min')
@tensorrt_converter('torch.Tensor.clamp_min')
def convert_clamp_min(ctx):
input = ctx.method_args[0]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
val = ctx.method_args[1]
output = ctx.method_return

layer = __add_clamp(ctx.network, input_trt, val, trt.ElementWiseOperation.MAX)

output._trt = layer.get_output(0)


class TorchClampMin(torch.nn.Module):
def forward(self, x):
return torch.clamp_min(x, -0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_torch_clamp_min():
return TorchClampMin()


class TensorClampMin(torch.nn.Module):
def forward(self, x):
return x.clamp_min(-0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_tensor_clamp_min():
return TensorClampMin()


# CLAMP_MAX


@tensorrt_converter('torch.clamp_max')
@tensorrt_converter('torch.Tensor.clamp_max')
def convert_clamp_max(ctx):
input = ctx.method_args[0]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
val = ctx.method_args[1]
output = ctx.method_return

layer = __add_clamp(ctx.network, input_trt, val, trt.ElementWiseOperation.MIN)

output._trt = layer.get_output(0)


class TorchClampMax(torch.nn.Module):
def forward(self, x):
return torch.clamp_max(x, 0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_torch_clamp_max():
return TorchClampMax()


class TensorClampMax(torch.nn.Module):
def forward(self, x):
return x.clamp_max(0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_tensor_clamp_max():
return TensorClampMax()


# CLAMP

@tensorrt_converter('torch.clamp')
@tensorrt_converter('torch.Tensor.clamp')
def convert_clamp(ctx):
input = ctx.method_args[0]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return
if "min" in ctx.method_kwargs and "max" in ctx.method_kwargs:
min_val = ctx.method_kwargs["min"]
max_val = ctx.method_kwargs["max"]
layer = __add_clamp(ctx.network, input_trt, min_val, trt.ElementWiseOperation.MAX)
layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN)
elif "min" in ctx.method_kwargs:
min_val = ctx.method_kwargs["min"]
layer = __add_clamp(ctx.network, input_trt, min_val, trt.ElementWiseOperation.MAX)
elif "max" in ctx.method_kwargs:
max_val = ctx.method_kwargs["max"]
layer = __add_clamp(ctx.network, input_trt, max_val, trt.ElementWiseOperation.MIN)
else:
min_val = ctx.method_args[1]
max_val = ctx.method_args[2]
layer = __add_clamp(ctx.network, input_trt, min_val, trt.ElementWiseOperation.MAX)
layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN)

output._trt = layer.get_output(0)


class TorchClamp(torch.nn.Module):
def forward(self, x):
return torch.clamp(x, -0.1, 0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_torch_clamp():
return TorchClamp()


class TensorClamp(torch.nn.Module):
def forward(self, x):
return x.clamp(-0.1, 0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_tensor_clamp():
return TensorClamp()


class TorchClampOptionMax(torch.nn.Module):
def forward(self, x):
return torch.clamp(x, max=0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_torch_clamp_option_max():
return TorchClampOptionMax()

class TorchClampOptionMin(torch.nn.Module):
def forward(self, x):
return torch.clamp(x, min=-0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_torch_clamp_option_min():
return TorchClampOptionMin()


class TorchClampOptionMaxMin(torch.nn.Module):
def forward(self, x):
return torch.clamp(x, min=-0.1, max=0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_torch_clamp_option_max_min():
return TorchClampOptionMaxMin()


class TensorClampOptionMax(torch.nn.Module):
def forward(self, x):
return x.clamp(max=0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_tensor_clamp_option_max():
return TensorClampOptionMax()

class TensorClampOptionMin(torch.nn.Module):
def forward(self, x):
return x.clamp(min=-0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_tensor_clamp_option_min():
return TensorClampOptionMin()


class TensorClampOptionMaxMin(torch.nn.Module):
def forward(self, x):
return x.clamp(min=-0.1, max=0.1)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_tensor_clamp_option_max_min():
return TensorClampOptionMaxMin()
60 changes: 60 additions & 0 deletions torch2trt/converters/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test

def convert_elementwise(ctx, op):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, op)
output._trt = layer.get_output(0)

@tensorrt_converter('torch.gt', enabled=trt_version() >= '7.0')
@tensorrt_converter('torch.Tensor.__gt__', enabled=trt_version() >= '7.0')
def convert_gt(ctx):
return convert_elementwise(ctx, trt.ElementWiseOperation.GREATER)

@tensorrt_converter('torch.lt', enabled=trt_version() >= '7.0')
@tensorrt_converter('torch.Tensor.__lt__', enabled=trt_version() >= '7.0')
def convert_gt(ctx):
return convert_elementwise(ctx, trt.ElementWiseOperation.LESS)

@tensorrt_converter('torch.eq', enabled=trt_version() >= '7.0')
@tensorrt_converter('torch.Tensor.__eq__', enabled=trt_version() >= '7.0')
def convert_gt(ctx):
return convert_elementwise(ctx, trt.ElementWiseOperation.EQUAL)

class GT(torch.nn.Module):
def __init__(self):
super(GT, self).__init__()

def forward(self, x, y):
return x > y

class LT(torch.nn.Module):
def __init__(self):
super(LT, self).__init__()

def forward(self, x, y):
return x < y

class EQ(torch.nn.Module):
def __init__(self):
super(EQ, self).__init__()

def forward(self, x, y):
return x == y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)], enabled=trt_version() >= '7.0')
def test_gt_basic():
return GT()

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)], enabled=trt_version() >= '7.0')
def test_gt_basic():
return LT()

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)], enabled=trt_version() >= '7.0')
def test_gt_basic():
return EQ()
123 changes: 123 additions & 0 deletions torch2trt/converters/div.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.div')
@tensorrt_converter('torch.Tensor.__div__') # py2
@tensorrt_converter('torch.Tensor.__idiv__') # py2
@tensorrt_converter('torch.Tensor.__truediv__') # py3
@tensorrt_converter('torch.Tensor.__itruediv__') # py3
def convert_div(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV)
output._trt = layer.get_output(0)


@tensorrt_converter('torch.Tensor.__rdiv__') # py2
@tensorrt_converter('torch.Tensor.__rtruediv__') # py3
def convert_rdiv(ctx):
input_a = ctx.method_args[1] # inputs switched for rdiv
input_b = ctx.method_args[0]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV)
output._trt = layer.get_output(0)


class Div(torch.nn.Module):
def __init__(self):
super(Div, self).__init__()

def forward(self, x, y):
return x / y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_div_basic():
return Div()


class IDiv(torch.nn.Module):
def __init__(self):
super(IDiv, self).__init__()

def forward(self, x, y):
x /= y
return x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_div_idiv():
return IDiv()


class TorchDiv(torch.nn.Module):
def __init__(self):
super(TorchDiv, self).__init__()

def forward(self, x, y):
return torch.div(x, y)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_div_torchdiv():
return TorchDiv()


class RDivInt(torch.nn.Module):
def __init__(self):
super(RDivInt, self).__init__()

def forward(self, x):
return 100 / x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_rdiv_int():
return RDivInt()


class RDivFloat(torch.nn.Module):
def __init__(self):
super(RDivFloat, self).__init__()

def forward(self, x):
return 100.0 / x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_rdiv_float():
return RDivFloat()


class DivConstantNoBatch(torch.nn.Module):
def __init__(self):
super(DivConstantNoBatch, self).__init__()
self.register_buffer('y', torch.ones((3, 10, 10)))

def forward(self, x):
return x / self.y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)])
def test_div_constant_nobatch():
return DivConstantNoBatch()


class DivConstantBatch(torch.nn.Module):
def __init__(self):
super(DivConstantBatch, self).__init__()
self.register_buffer('y', torch.ones((1, 3, 10, 10)))

def forward(self, x):
return x / self.y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)])
def test_div_constant_batch():
return DivConstantBatch()
37 changes: 37 additions & 0 deletions torch2trt/converters/dummy_converters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from torch2trt.torch2trt import *


def is_private(method):
method = method.split('.')[-1] # remove prefix
return method[0] == '_' and method[1] is not '_'

def is_function_type(method):
fntype = eval(method + '.__class__.__name__')
return fntype == 'function' or fntype == 'builtin_function_or_method' or fntype == 'method_descriptor'

def get_methods(namespace):
methods = []
for method in dir(eval(namespace)):
full_method = namespace + '.' + method
if not is_private(full_method) and is_function_type(full_method):
methods.append(full_method)
return methods


TORCH_METHODS = []
TORCH_METHODS += get_methods('torch')
TORCH_METHODS += get_methods('torch.Tensor')
TORCH_METHODS += get_methods('torch.nn.functional')


for method in TORCH_METHODS:

@tensorrt_converter(method, is_real=False)
def warn_method(ctx):
print('Warning: Encountered known unsupported method %s' % ctx.method_str)


@tensorrt_converter('torch.Tensor.dim', is_real=False)
@tensorrt_converter('torch.Tensor.size', is_real=False)
def dont_warn(ctx):
pass
43 changes: 43 additions & 0 deletions torch2trt/converters/expand.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.Tensor.expand')
def convert_expand(ctx):
input = ctx.method_args[0]
sizes = ctx.method_args[1:]
output = ctx.method_return

inshape = tuple(input.shape)[1:] # exclude batch
shape = tuple(output.shape)[1:]
ndim = len(shape)
start = tuple([0]*ndim)
stride = tuple([int(i == o) for i, o in zip(inshape, shape)]) # stride == 1 if dimensions match, 0 otherwise

layer = ctx.network.add_slice(input._trt, start, shape, stride)

output._trt = layer.get_output(0)


class ExpandModule(torch.nn.Module):
def __init__(self, *sizes):
super(ExpandModule, self).__init__()
self.sizes = sizes

def forward(self, x):
return x.expand(*self.sizes)


@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)])
def test_tensor_expand_singledim():
return ExpandModule(1, 3, 3, 3)


@add_module_test(torch.float32, torch.device('cuda'), [(1,1,1,3)])
def test_tensor_expand_multidim():
return ExpandModule(1, 3, 3, 3)


@add_module_test(torch.float32, torch.device('cuda'), [(1,1,1,3)])
def test_tensor_expand_inferdim():
return ExpandModule(1, 3, -1, -1)
81 changes: 81 additions & 0 deletions torch2trt/converters/floordiv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.Tensor.__floordiv__')
@tensorrt_converter('torch.Tensor.__ifloordiv__')
@tensorrt_converter('torch.floor_divide')
def convert_floordiv(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
# we can not use ElementWiseOperation.FLOOR_DIV directly because Torch truncate negative result toward 0
# but TensorRT FLOOR_DIV op toward -Inf
# sign = ab / |ab|
# floordiv result: sign * (|a| // |b|)
ab_layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD)
abs_ab_layer = ctx.network.add_unary(ab_layer.get_output(0), trt.UnaryOperation.ABS)
sign_layer = ctx.network.add_elementwise(ab_layer.get_output(0), abs_ab_layer.get_output(0),
trt.ElementWiseOperation.DIV)
abs_a_layer = ctx.network.add_unary(input_a_trt, trt.UnaryOperation.ABS)
abs_b_layer = ctx.network.add_unary(input_b_trt, trt.UnaryOperation.ABS)
abs_floor_layer = ctx.network.add_elementwise(abs_a_layer.get_output(0), abs_b_layer.get_output(0),
trt.ElementWiseOperation.FLOOR_DIV)
out_layer = ctx.network.add_elementwise(sign_layer.get_output(0), abs_floor_layer.get_output(0),
trt.ElementWiseOperation.PROD)
output._trt = out_layer.get_output(0)


class FloorDiv(torch.nn.Module):
def __init__(self):
super(FloorDiv, self).__init__()

def forward(self, x, y):
return x // y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)])
def test_floordiv_op():
return FloorDiv()


class FloorDivAssign (torch.nn.Module):
def __init__(self):
super(FloorDivAssign, self).__init__()

def forward(self, x, y):
x //= y
return x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)])
def test_floordiv_op_assign():
return FloorDivAssign()


class FloorDivConst(torch.nn.Module):
def __init__(self):
super(FloorDivConst, self).__init__()

def forward(self, x):
return x // 2.


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)])
def test_floordiv_op_const():
return FloorDivConst()


class TorchFloorDiv(torch.nn.Module):
def __init__(self):
super(TorchFloorDiv, self).__init__()

def forward(self, x, y):
return torch.floor_divide(x, y)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)])
def test_floordiv_func():
return TorchFloorDiv()
155 changes: 155 additions & 0 deletions torch2trt/converters/getitem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


def slice_to_trt(dim_size, dim_slice):

start = 0 if dim_slice.start is None else dim_slice.start
stop = dim_size if dim_slice.stop is None else dim_slice.stop
stride = 1 if dim_slice.step is None else dim_slice.step

size = (stop - start - 1) // stride + 1

return start, size, stride


def num_slice_types(slices):
num_slice = 0
for s in slices:
if isinstance(s, slice) or isinstance(s, int):
num_slice += 1
return num_slice


@tensorrt_converter('torch.Tensor.__getitem__')
def convert_tensor_getitem(ctx):
input = ctx.method_args[0]
slices = ctx.method_args[1]
output = ctx.method_return

input_trt = input._trt

# Step 1 - Replace ellipsis with expanded slices

num_ellipsis = len(input.shape) - num_slice_types(slices)

new_slices = []
for s in slices:

if s == Ellipsis:
while num_ellipsis > 0:
new_slices.append(slice(None, None, None))
num_ellipsis -= 1
elif isinstance(s, slice):
new_slices.append(s)
elif s is None:
new_slices.append(None)
elif isinstance(s, int):
new_slices.append(s)

# fill missing slices at end
while num_slice_types(new_slices) < len(input.shape):
new_slices.append(slice(None, None, None))

# Step 2 - Remove batch from slices (TRT from this point)

slices = tuple(new_slices[1:]) # remove batch


# Step 3 - Add slice layer (will currently ignore 'None' slices)

starts = []
sizes = []
strides = []

input_dim = 0
for s in slices:

if input_dim >= len(input_trt.shape):
break

input_size = int(input_trt.shape[input_dim])

if isinstance(s, slice):
start, size, stride = slice_to_trt(input_size, s)
starts.append(start)
sizes.append(size)
strides.append(stride)
input_dim += 1

elif isinstance(s, int):
starts.append(s)
sizes.append(1)
strides.append(1)
input_dim += 1

output_trt = ctx.network.add_slice(input_trt, starts, sizes, strides).get_output(0)

# Step 4 - Add shuffle layer to insert dimensions for 'None' slices and remove dimensions for 'int' slices

num_non_slice = len([s for s in slices if not isinstance(s, slice)])
if num_non_slice > 0:
layer = ctx.network.add_shuffle(output_trt)
layer.reshape_dims = tuple(output.shape[1:]) # exclude batch
output_trt = layer.get_output(0)

output._trt = output_trt


class LambdaModule(torch.nn.Module):
def __init__(self, fn):
super(LambdaModule, self).__init__()
self.fn = fn

def forward(self, x):
return self.fn(x)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
def test_tensor_getitem_1d_int():
return LambdaModule(lambda x: x[:, 0])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)])
def test_tensor_getitem_2d_int():
return LambdaModule(lambda x: x[:, 0])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)])
def test_tensor_getitem_2d_strided():
return LambdaModule(lambda x: x[:, ::2])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)])
def test_tensor_getitem_2d_strided_offset():
return LambdaModule(lambda x: x[:, 1::2])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)])
def test_tensor_getitem_2d_strided_range():
return LambdaModule(lambda x: x[:, 1:3:2])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)])
def test_tensor_getitem_2d_insert_dim():
return LambdaModule(lambda x: x[:, None])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)])
def test_tensor_getitem_2d_insert_dim_ellipsis():
return LambdaModule(lambda x: x[:, None, ...])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)])
def test_tensor_getitem_2d_append_dim():
return LambdaModule(lambda x: x[:, ..., None])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)])
def test_tensor_getitem_2d_append_2dim():
return LambdaModule(lambda x: x[:, ..., None, None])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)])
def test_tensor_getitem_2d_weird_combo():
return LambdaModule(lambda x: x[:, 0:3:4, None, None, 1, ...])
48 changes: 48 additions & 0 deletions torch2trt/converters/group_norm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import torch.nn as nn
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test

def has_group_norm_plugin():
try:
from torch2trt.plugins import GroupNormPlugin
return True
except:
return False


def get_group_norm_plugin(num_groups, weight, bias, eps):
from torch2trt.plugins import GroupNormPlugin
PLUGIN_NAME = 'group_norm'
registry = trt.get_plugin_registry()
creator = [c for c in registry.plugin_creator_list if c.name == PLUGIN_NAME and c.plugin_namespace == 'torch2trt'][0]
torch2trt_plugin = GroupNormPlugin(num_groups=num_groups, weight=weight, bias=bias, eps=eps)
return creator.deserialize_plugin(PLUGIN_NAME, torch2trt_plugin.serializeToString())

@tensorrt_converter('torch.nn.GroupNorm.forward', has_group_norm_plugin())
def convert_group_norm_trt(ctx):
module = ctx.method_args[0]
input = ctx.method_args[1]
num_groups = module.num_groups
weight = module.weight
bias = module.bias
eps = module.eps
input_trt = add_missing_trt_tensors(ctx.network, [input])
output = ctx.method_return
plugin = get_group_norm_plugin(num_groups, weight, bias, eps)

layer = ctx.network.add_plugin_v2(input_trt, plugin)

output._trt = layer.get_output(0)



@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], has_group_norm_plugin())
def test_group_norm_trt_g2_fp32():
return torch.nn.GroupNorm(2, 10)

@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], has_group_norm_plugin())
def test_group_norm_trt_g2_eps_fp32():
return torch.nn.GroupNorm(2, 10, eps=1e-4)



22 changes: 22 additions & 0 deletions torch2trt/converters/identity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from torch2trt.torch2trt import *


@tensorrt_converter('torch.Tensor.contiguous')
@tensorrt_converter('torch.nn.functional.dropout')
@tensorrt_converter('torch.nn.functional.dropout2d')
@tensorrt_converter('torch.nn.functional.dropout3d')
def convert_functional_identity(ctx):
input = ctx.method_args[0]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return
output._trt = input_trt


@tensorrt_converter('torch.nn.Dropout.forward')
@tensorrt_converter('torch.nn.Dropout2d.forward')
@tensorrt_converter('torch.nn.Dropout3d.forward')
def convert_identity(ctx):
input = ctx.method_args[1]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return
output._trt = input_trt
150 changes: 150 additions & 0 deletions torch2trt/converters/instance_norm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


def _add_scale_1d2d3d(network, x_trt, mode, offset, scale, power):
ndim = len(x_trt.shape)

y_trt = x_trt

# shape to 2D
if ndim != 3:
layer = network.add_shuffle(y_trt)
layer.reshape_dims = (x_trt.shape[0], x_trt.shape[1], -1) # NCH -> NCHW
y_trt = layer.get_output(0)

y_trt = network.add_scale(y_trt, mode, offset, scale, power).get_output(0)

# shape to original dimension
if ndim != 3:
layer = network.add_shuffle(layer.get_output(0))
layer.reshape_dims = tuple(x_trt.shape)
y_trt = layer.get_output(0)

return y_trt

@tensorrt_converter('torch.instance_norm')
@tensorrt_converter('torch.nn.functional.instance_norm')
def convert_instance_norm(ctx):
input = get_arg(ctx, 'input', pos=0, default=None)
running_mean = get_arg(ctx, 'running_mean', pos=1, default=None)
running_var = get_arg(ctx, 'running_var', pos=2, default=None)
weight = get_arg(ctx, 'weight', pos=3, default=None)
bias = get_arg(ctx, 'bias', pos=4, default=None)
use_input_stats = get_arg(ctx, 'use_input_stats', pos=5, default=True)
momentum = get_arg(ctx, 'momentum', pos=6, default=0.1)
eps = get_arg(ctx, 'eps', pos=7, default=1e-05)
output = ctx.method_return


# CASE 1 - USING RUNNING STATISTICS
if not use_input_stats:

# equivalent to batch norm
scale = 1.0 / np.sqrt(running_var.detach().cpu().numpy() + eps)
offset = -running_mean.detach().cpu().numpy() * scale
power = np.ones_like(scale)

if weight is not None:
scale *= weight.detach().cpu().numpy()
offset += bias.detach().cpu().numpy()

result_trt = _add_scale_1d2d3d(ctx.network, input._trt, trt.ScaleMode.CHANNEL, offset, scale, power)

output._trt = result_trt

# CASE 2 - USING INPUT STATS
else:

eps_np = np.array([eps], dtype=np.float32)
keep_dims = True
reduce_axes = torch_dim_to_trt_axes(tuple(range(2, len(input.shape))))

# compute mean over spatial
mean_trt = ctx.network.add_reduce(input._trt, trt.ReduceOperation.AVG, reduce_axes, keep_dims).get_output(0)

# compute variance over spatial (include eps, to reduce layer count)
delta_trt = ctx.network.add_elementwise(input._trt, mean_trt, trt.ElementWiseOperation.SUB).get_output(0)
var_trt = ctx.network.add_scale(delta_trt, trt.ScaleMode.UNIFORM, np.zeros_like(eps_np), np.ones_like(eps_np), 2 * np.ones_like(eps_np)).get_output(0)
var_trt = ctx.network.add_reduce(var_trt, trt.ReduceOperation.AVG, reduce_axes, keep_dims).get_output(0)

# compute sqrt(var + eps)
var_trt = ctx.network.add_scale(var_trt, trt.ScaleMode.UNIFORM, eps_np, np.ones_like(eps_np), 0.5 * np.ones_like(eps_np)).get_output(0)

# compute final result
result_trt = ctx.network.add_elementwise(delta_trt, var_trt, trt.ElementWiseOperation.DIV).get_output(0)

# compute affine (if applicable)
if weight is not None:

weight_np = weight.detach().cpu().numpy()
bias_np = bias.detach().cpu().numpy()

result_trt = _add_scale_1d2d3d(ctx.network, result_trt, trt.ScaleMode.CHANNEL, bias_np, weight_np, np.ones_like(bias_np))

output._trt = result_trt


# STATIC

@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)])
def test_instance_norm_1d_static():
return torch.nn.InstanceNorm1d(10, track_running_stats=True)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)])
def test_instance_norm_2d_static():
return torch.nn.InstanceNorm2d(10, track_running_stats=True)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)])
def test_instance_norm_3d_static():
return torch.nn.InstanceNorm3d(10, track_running_stats=True)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)])
def test_instance_norm_1d_static_affine():
return torch.nn.InstanceNorm1d(10, affine=True, track_running_stats=True)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)])
def test_instance_norm_2d_static_affine():
return torch.nn.InstanceNorm2d(10, affine=True, track_running_stats=True)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)])
def test_instance_norm_3d_static_affine():
return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=True)

# DYNAMIC

# @TODO(jwelsh): 1D dynamic test failing
# @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)])
# def test_instance_norm_1d_dynamic():
# return torch.nn.InstanceNorm1d(10, track_running_stats=False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)])
def test_instance_norm_2d_dynamic():
return torch.nn.InstanceNorm2d(10, track_running_stats=False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)])
def test_instance_norm_3d_dynamic():
return torch.nn.InstanceNorm3d(10, track_running_stats=False)


# @TODO(jwelsh): 1D dynamic test failing
# @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)])
# def test_instance_norm_1d_dynamic_affine():
# return torch.nn.InstanceNorm1d(10, affine=True, track_running_stats=False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)])
def test_instance_norm_2d_dynamic_affine():
return torch.nn.InstanceNorm2d(10, affine=True, track_running_stats=False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)])
def test_instance_norm_3d_dynamic_affine():
return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=False)
179 changes: 179 additions & 0 deletions torch2trt/converters/interpolate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import torch.nn.functional as F
import torch.nn as nn
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test
import collections


def has_interpolate_plugin():
try:
from torch2trt.plugins import InterpolatePlugin
return True
except:
return False

def get_interpolate_plugin(size, mode, align_corners):
from torch2trt.plugins import InterpolatePlugin
PLUGIN_NAME = 'interpolate'
registry = trt.get_plugin_registry()
creator = [c for c in registry.plugin_creator_list if c.name == PLUGIN_NAME and c.plugin_namespace == 'torch2trt'][0]
torch2trt_plugin = InterpolatePlugin(size=size, mode=mode, align_corners=align_corners)
return creator.deserialize_plugin(PLUGIN_NAME, torch2trt_plugin.serializeToString())


@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() < '7.1' and has_interpolate_plugin())
def convert_interpolate_plugin(ctx):
input = ctx.method_args[0]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

try:
mode = get_arg(ctx, 'mode', pos=3, default='nearest')
except KeyError:
mode = 'nearest'

try:
align_corners = get_arg(ctx, 'align_corners', pos=4, default=None)
except KeyError:
align_corners = False

# currently only works for NCHW
size = list(output.shape[2:])

plugin = get_interpolate_plugin(size=size, mode=mode, align_corners=align_corners)


layer = ctx.network.add_plugin_v2([input_trt], plugin)

output._trt = layer.get_output(0)


@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() >= '7.1')
@tensorrt_converter('torch.nn.functional.upsample', enabled=trt_version() >= '7.1')
def convert_interpolate_trt7(ctx):
#parse args
input = get_arg(ctx, 'input', pos=0, default=None)
size = get_arg(ctx, 'size', pos=1, default=None)
scale_factor=get_arg(ctx, 'scale_factor', pos=2, default=None)
mode = get_arg(ctx, 'mode', pos=3, default='nearest')
align_corners = get_arg(ctx, 'align_corners', pos=4, default=None)

input_dim = input.dim() - 2

input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return
layer = ctx.network.add_resize(input=input_trt)

shape = size
if shape != None:
if isinstance(shape, collections.Sequence):
shape = [input.size(1)] + list(shape)
else:
shape = [input.size(1)] + [shape] * input_dim

layer.shape = shape

scales = scale_factor
if scales != None:
if not isinstance(scales, collections.Sequence):
scales = [scales] * input_dim
layer.scales = [1] + list(scales)

resize_mode = mode
if resize_mode.lower() in ["linear","bilinear","trilinear"]:
layer.resize_mode = trt.ResizeMode.LINEAR
else:
layer.resize_mode=trt.ResizeMode.NEAREST

if align_corners != None:
layer.align_corners = align_corners

output._trt = layer.get_output(0)


class Interpolate(torch.nn.Module):
def __init__(self, size, mode, align_corners):
super(Interpolate, self).__init__()
self.size = size
self.mode = mode
self.align_corners = align_corners

def forward(self, x):
return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin())
def test_interpolate_nearest():
return Interpolate((224, 224), 'nearest', None)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin())
def test_interpolate_bilinear():
return Interpolate((224, 224), 'bilinear', False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin())
def test_interpolate_bicubic():
return Interpolate((224, 224), 'bicubic', False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin())
def test_interpolate_area():
return Interpolate((56, 56), 'area', None)

@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin())
def test_upsample_scale_factor2():
return nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False)

@add_module_test(torch.float32, torch.device('cuda'), [(1,2,12,12)], enabled=trt_version() >= '7.1')
def test_nearest_mode():
return torch.nn.Upsample(scale_factor=2, mode="nearest")

@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1')
def test_bilinear_mode():
return torch.nn.Upsample(scale_factor=3, mode="bilinear",align_corners=False)

@add_module_test(torch.float32, torch.device('cuda'), [(1,3,12,12)], enabled=trt_version() >= '7.1')
def test_align_corner():
return torch.nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)

@add_module_test(torch.float32, torch.device('cuda'), [(1,5,13,13)], enabled=trt_version() >= '7.1')
def test_bilinear_mode_odd_input_shape():
return torch.nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False)

@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1')
def test_size_parameter():
return torch.nn.Upsample(size=3,mode="nearest")

@add_module_test(torch.float32, torch.device('cuda'), [(1,3,13,13)], enabled=trt_version() >= '7.1')
@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1)], enabled=trt_version() >= '7.1')
def test_size_parameter_odd_input():
return torch.nn.Upsample(size=[6,3],mode="nearest")


@add_module_test(torch.float32, torch.device('cuda'), [(1,4,6,6,6)], enabled=trt_version() >= '7.1')
def test_nearest_mode_3d():
return torch.nn.Upsample(scale_factor=2, mode="nearest")

@add_module_test(torch.float32, torch.device('cuda'), [(1,3,5,5,5)], enabled=trt_version() >= '7.1')
def test_bilinear_mode_3d():
return torch.nn.Upsample(scale_factor=3, mode="trilinear",align_corners=False)

@add_module_test(torch.float32, torch.device('cuda'), [(1,4,8,8,8)], enabled=trt_version() >= '7.1')
def test_align_corner_3d():
return torch.nn.Upsample(scale_factor=4, mode="trilinear", align_corners=True)

@add_module_test(torch.float32, torch.device('cuda'), [(1,6,7,7,7)], enabled=trt_version() >= '7.1')
@add_module_test(torch.float32, torch.device('cuda'), [(1,3,2,4,4)], enabled=trt_version() >= '7.1')
@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1,1)], enabled=trt_version() >= '7.1')
def test_bilinear_mode_odd_input_shape_3d():
return torch.nn.Upsample(scale_factor=2, mode="trilinear",align_corners=False)

@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)], enabled=trt_version() >= '7.1')
def test_size_parameter_3d():
return torch.nn.Upsample(size=3,mode="trilinear", align_corners=True)

@add_module_test(torch.float32, torch.device('cuda'), [(1,3,7,9,5)], enabled=trt_version() >= '7.1')
@add_module_test(torch.float32, torch.device('cuda'), [(1,4,3,5,1)], enabled=trt_version() >= '7.1')
def test_size_parameter_odd_input_3d():
return torch.nn.Upsample(size=[11,14,17],mode="trilinear", align_corners=False)
62 changes: 62 additions & 0 deletions torch2trt/converters/max.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test
from .unary import UnaryModule


def __convert_max_elementwise(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MAX)
output._trt = layer.get_output(0)


def __convert_max_reduce(ctx):
input = ctx.method_args[0]
dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, len(input.shape))))
keepdim = get_arg(ctx, 'keepdim', pos=2, default=False)
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output_val = ctx.method_return[0]
output_idx = ctx.method_return[1]
layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MAX, torch_dim_to_trt_axes(dim), keepdim)
output_val._trt = layer.get_output(0)


@tensorrt_converter('torch.max')
@tensorrt_converter('torch.Tensor.max')
def convert_max(ctx):
if len(ctx.method_args) > 1 and isinstance(ctx.method_args[1], torch.Tensor):
__convert_max_elementwise(ctx)
else:
__convert_max_reduce(ctx)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_max_reduce_dim1():
return UnaryModule(lambda x: torch.max(x, 1)[0])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_max_reduce_dim22():
return UnaryModule(lambda x: torch.max(x, 2)[0])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_max_reduce_dim1_keepdim():
return UnaryModule(lambda x: torch.max(x, 1, keepdim=True)[0])


class MaxElementwise(torch.nn.Module):
def forward(self, x, y):
return torch.max(x, y)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1,)]) # broadcast
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3), (1, 3, 3)]) # broadcast
def test_max_elementwise():
return MaxElementwise()
53 changes: 53 additions & 0 deletions torch2trt/converters/max_pool2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.nn.functional.max_pool2d')
def convert_max_pool2d(ctx):
# parse args
input = get_arg(ctx, 'input', pos=0, default=None)
kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None)
stride = get_arg(ctx, 'stride', pos=2, default=None)
padding = get_arg(ctx, 'padding', pos=3, default=0)
dilation = get_arg(ctx, 'dilation', pos=4, default=1)
ceil_mode = get_arg(ctx, 'ceil_mode', pos=5, default=False)

# get input trt tensor (or create constant if it doesn't exist)
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]

output = ctx.method_return

# get kernel size
if not isinstance(kernel_size, tuple):
kernel_size = (kernel_size, ) * 2

# get stride
if not isinstance(stride, tuple):
stride = (stride, ) * 2

# get padding
if not isinstance(padding, tuple):
padding = (padding, ) * 2

layer = ctx.network.add_pooling(
input=input_trt, type=trt.PoolingType.MAX, window_size=kernel_size)

layer.stride = stride
layer.padding = padding

if ceil_mode:
layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)])
def test_MaxPool2d_without_ceil_mode():
return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)])
def test_MaxPool2d_with_ceil_mode():
return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)
68 changes: 68 additions & 0 deletions torch2trt/converters/mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.mean')
@tensorrt_converter('torch.Tensor.mean')
def convert_mean(ctx):
input = ctx.method_args[0]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

# get dims from args or kwargs
if 'dim' in ctx.method_kwargs:
dim = ctx.method_kwargs['dim']
elif len(ctx.method_args) >= 2:
dim = ctx.method_args[1]

# convert list to tuple
if isinstance(dim, list):
dim = tuple(dim)

if not isinstance(dim, tuple):
dim = (dim, )

# create axes bitmask for reduce layer
axes = 0
for d in dim:
axes |= 1 << (d - 1) # -1 to remove batch dimension

# get whether to keep dimensions
if 'keepdim' in ctx.method_kwargs:
keep_dims = ctx.method_kwargs['keepdim']
elif len(ctx.method_args) == 3:
keep_dims = ctx.method_args[2]
else:
keep_dims = False

layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.AVG, axes, keep_dims)
output._trt = layer.get_output(0)


class Mean(torch.nn.Module):
def __init__(self, dim, keepdim):
super(Mean, self).__init__()
self.dim = dim
self.keepdim = keepdim
def forward(self, x):
return x.mean(self.dim, self.keepdim)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_mean_channel():
return Mean(1, False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_mean_tuple():
return Mean((1, 2), False)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_mean_keepdim():
return Mean(1, True)
62 changes: 62 additions & 0 deletions torch2trt/converters/min.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test
from .unary import UnaryModule


def __convert_min_elementwise(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MIN)
output._trt = layer.get_output(0)


def __convert_min_reduce(ctx):
input = ctx.method_args[0]
dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1,len(input.shape))))
keepdim = get_arg(ctx, 'keepdim', pos=2, default=False)
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output_val = ctx.method_return[0]
output_idx = ctx.method_return[1]
layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MIN, torch_dim_to_trt_axes(dim), keepdim)
output_val._trt = layer.get_output(0)


@tensorrt_converter('torch.min')
@tensorrt_converter('torch.Tensor.min')
def convert_min(ctx):
if len(ctx.method_args) > 1 and isinstance(ctx.method_args[1], torch.Tensor):
__convert_min_elementwise(ctx)
else:
__convert_min_reduce(ctx)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_min_reduce_dim1():
return UnaryModule(lambda x: torch.min(x, 1)[0])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_min_reduce_dim22():
return UnaryModule(lambda x: torch.min(x, 2)[0])


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_min_reduce_dim1_keepdim():
return UnaryModule(lambda x: torch.min(x, 1, keepdim=True)[0])


class MinElementwise(torch.nn.Module):
def forward(self, x, y):
return torch.min(x, y)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1,)]) # broadcast
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3), (1, 3, 3)]) # broadcast
def test_min_elementwise():
return MinElementwise()
99 changes: 99 additions & 0 deletions torch2trt/converters/mod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.fmod')
def convert_mod(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
# we can not use ElementWiseOperation.FLOOR_DIV directly because Torch truncate negative result toward 0
# but TensorRT FLOOR_DIV op toward -Inf
# sign = ab / |ab|
# floordiv result: sign * (|a| // |b|)
ab_layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD)
abs_ab_layer = ctx.network.add_unary(ab_layer.get_output(0), trt.UnaryOperation.ABS)
sign_layer = ctx.network.add_elementwise(ab_layer.get_output(0), abs_ab_layer.get_output(0),
trt.ElementWiseOperation.DIV)
abs_a_layer = ctx.network.add_unary(input_a_trt, trt.UnaryOperation.ABS)
abs_b_layer = ctx.network.add_unary(input_b_trt, trt.UnaryOperation.ABS)
abs_floor_layer = ctx.network.add_elementwise(abs_a_layer.get_output(0), abs_b_layer.get_output(0),
trt.ElementWiseOperation.FLOOR_DIV)
# a % b = a - (a//b) * b
floordiv_layer = ctx.network.add_elementwise(sign_layer.get_output(0), abs_floor_layer.get_output(0),
trt.ElementWiseOperation.PROD)
prod_layer = ctx.network.add_elementwise(floordiv_layer.get_output(0), input_b_trt, trt.ElementWiseOperation.PROD)
sub_layer = ctx.network.add_elementwise(input_a_trt, prod_layer.get_output(0), trt.ElementWiseOperation.SUB)
output._trt = sub_layer.get_output(0)


@tensorrt_converter('torch.Tensor.__mod__')
# we need separate converter for operator because for some reason Torch use truncation toward -Inf for this op.
# bug is filed: https://github.com/pytorch/pytorch/issues/52425
# but for now we have to convert model exactly
def convert_mod(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
# a % b = a - (a//b) * b
floordiv_layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.FLOOR_DIV)
prod_layer = ctx.network.add_elementwise(floordiv_layer.get_output(0), input_b_trt, trt.ElementWiseOperation.PROD)
mod_layer = ctx.network.add_elementwise(input_a_trt, prod_layer.get_output(0), trt.ElementWiseOperation.SUB)
output._trt = mod_layer.get_output(0)


class Mod(torch.nn.Module):
def __init__(self):
super(Mod, self).__init__()

def forward(self, x, y):
return x % y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)])
def test_mod_op():
return Mod()


class ModAssign(torch.nn.Module):
def __init__(self):
super(ModAssign, self).__init__()

def forward(self, x, y):
x %= y
return x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)])
def test_mod_op_assign():
return ModAssign()


class ModConst(torch.nn.Module):
def __init__(self):
super(ModConst, self).__init__()

def forward(self, x):
return x % 2.


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)])
def test_mod_op_const():
return ModConst()


class TorchMod(torch.nn.Module):
def __init__(self):
super(TorchMod, self).__init__()

def forward(self, x, y):
return torch.fmod(x, y)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 40, 20)])
def test_mod_func():
return TorchMod()
108 changes: 108 additions & 0 deletions torch2trt/converters/mul.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.mul')
@tensorrt_converter('torch.Tensor.__imul__')
@tensorrt_converter('torch.Tensor.__mul__')
@tensorrt_converter('torch.Tensor.__rmul__')
def convert_mul(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD)
output._trt = layer.get_output(0)

class Mul(torch.nn.Module):
def __init__(self):
super(Mul, self).__init__()

def forward(self, x, y):
return x * y

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_mul_basic():
return Mul()


class IMul(torch.nn.Module):
def __init__(self):
super(IMul, self).__init__()

def forward(self, x, y):
x *= y
return x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_mul_imul():
return IMul()


class TorchMul(torch.nn.Module):
def __init__(self):
super(TorchMul, self).__init__()

def forward(self, x, y):
return torch.mul(x, y)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_mul_torchmul():
return TorchMul()


class RMulInt(torch.nn.Module):
def __init__(self):
super(RMulInt, self).__init__()

def forward(self, x):
return 10 * x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_rmul_int():
return RMulInt()


class RMulFloat(torch.nn.Module):
def __init__(self):
super(RMulFloat, self).__init__()

def forward(self, x):
return 10.0 * x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_rmul_float():
return RMulFloat()


class MulConstantNoBatch(torch.nn.Module):
def __init__(self):
super(MulConstantNoBatch, self).__init__()
self.register_buffer('y', torch.ones((3, 10, 10)))

def forward(self, x):
return x * self.y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)])
def test_mul_constant_nobatch():
return MulConstantNoBatch()


class MulConstantBatch(torch.nn.Module):
def __init__(self):
super(MulConstantBatch, self).__init__()
self.register_buffer('y', torch.ones((1, 3, 10, 10)))

def forward(self, x):
return x * self.y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)])
def test_mul_constant_batch():
return MulConstantBatch()
40 changes: 40 additions & 0 deletions torch2trt/converters/narrow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import tensorrt as trt
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test

@tensorrt_converter('torch.Tensor.narrow')
@tensorrt_converter('torch.narrow')
def convert_narrow(ctx):
inputs = get_arg(ctx, 'input', pos=0, default=None)
start = get_arg(ctx, 'start', pos=2, default=None)
output = ctx.method_return
shape = list(inputs.shape)
start = [0]*len(shape)
stride = [1]*len(shape)
dim = ctx.method_args[1] if get_arg(ctx, 'dim', pos=1, default=0) >=0 else len(shape)+get_arg(ctx, 'dim', pos=1, default=0)
start[dim] = ctx.method_args[2]
shape[dim] = ctx.method_args[3]
# not consider batch dimension
input_trt = trt_(ctx.network,inputs)
layer = ctx.network.add_slice(input=input_trt,start=start[1:], shape=shape[1:],stride=stride[1:])
output._trt = layer.get_output(0)

class Narrow(torch.nn.Module):
def __init__(self, dim, start, length):
super(Narrow, self).__init__()
self.dim = dim
self.start = start
self.length = length

def forward(self, x):
return torch.narrow(x,self.dim,self.start,self.length)

@add_module_test(torch.float32, torch.device('cuda'), [(1,3,224,224)])
def test_narrow1():
return Narrow(1,0,2)

@add_module_test(torch.float32, torch.device('cuda'), [(1,3,224,224)])
def test_narrow2():
return Narrow(2,0,50)


54 changes: 54 additions & 0 deletions torch2trt/converters/ne.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.ne')
@tensorrt_converter('torch.Tensor.__ne__')
def convert_ne(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer_1 = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.EQUAL)
layer_2 = ctx.network.add_unary(layer_1.get_output(0), trt.UnaryOperation.NOT)
output._trt = layer_2.get_output(0)


class NotEqual(torch.nn.Module):
def __init__(self):
super(NotEqual, self).__init__()

def forward(self, x, y):
return x != y


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)])
def test_ne_op():
return NotEqual()


class NotEqualConst(torch.nn.Module):
def __init__(self):
super(NotEqualConst, self).__init__()

def forward(self, x):
return x != 13.62


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)])
def test_ne_op_const():
return NotEqualConst()


class TorchNotEqual(torch.nn.Module):
def __init__(self):
super(TorchNotEqual, self).__init__()

def forward(self, x, y):
return torch.ne(x, y)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)])
def test_ne_torch():
return TorchNotEqual()
67 changes: 67 additions & 0 deletions torch2trt/converters/normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.nn.functional.normalize')
def convert_normalize(ctx):
# get args
input = get_arg(ctx, 'input', pos=0, default=None)
p = get_arg(ctx, 'p', pos=1, default=2)
dim = get_arg(ctx, 'dim', pos=2, default=1)
eps = get_arg(ctx, 'eps', pos=3, default=1e-12)

# input_trt = input._trt
output = ctx.method_return

# add broadcastable scalar constants to network
input_trt, eps_trt, p_trt, p_inv_trt = add_missing_trt_tensors(ctx.network, [input, eps, p, 1.0 / p])
input_trt, eps_trt, p_trt, p_inv_trt = broadcast_trt_tensors(ctx.network, [input_trt, eps_trt, p_trt, p_inv_trt], len(input_trt.shape))

# compute norm = sum(abs(x)**p, dim=dim)**(1./p)
norm = ctx.network.add_unary(input_trt, trt.UnaryOperation.ABS).get_output(0)
norm = ctx.network.add_elementwise(norm, p_trt, trt.ElementWiseOperation.POW).get_output(0)
norm = ctx.network.add_reduce(norm, trt.ReduceOperation.SUM, torch_dim_to_trt_axes(dim), keep_dims=True).get_output(0)
norm = ctx.network.add_elementwise(norm, p_inv_trt, trt.ElementWiseOperation.POW).get_output(0)

# clamp norm = max(norm, eps)
norm = ctx.network.add_elementwise(norm, eps_trt, trt.ElementWiseOperation.MAX).get_output(0)

# divide input by norm
output._trt = ctx.network.add_elementwise(input_trt, norm, trt.ElementWiseOperation.DIV).get_output(0)


class Normalize(torch.nn.Module):
def __init__(self, *args, **kwargs):
super(Normalize, self).__init__()
self.args = args
self.kwargs = kwargs

def forward(self, x):
return torch.nn.functional.normalize(x, *self.args, **self.kwargs)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_normalize_basic():
return Normalize()


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_normalize_l1_basic():
return Normalize(p=1.0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_normalize_l1p5_basic():
return Normalize(p=1.5)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
def test_normalize_l2_height():
return Normalize(p=2.0, dim=2)
33 changes: 33 additions & 0 deletions torch2trt/converters/pad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.nn.functional.pad')
def convert_pad(ctx):
input = ctx.method_args[0]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

pad = ctx.method_args[1]
pre_padding = (pad[2], pad[0])
post_padding = (pad[3], pad[1])

# mode / value are ignored since not supported by TensorRT

layer = ctx.network.add_padding(input_trt, pre_padding, post_padding)
output._trt = layer.get_output(0)


class Pad(torch.nn.Module):

def __init__(self, pad):
super(Pad, self).__init__()
self.pad = pad

def forward(self, x):
return torch.nn.functional.pad(x, self.pad)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_pad_basic():
return Pad((1, 2, 3, 4))
58 changes: 58 additions & 0 deletions torch2trt/converters/permute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.Tensor.permute')
def convert_permute(ctx):
input = ctx.method_args[0]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return

# permutation -1 because TRT does not include batch dim
if isinstance(ctx.method_args[1], int):
permutation = tuple(ctx.method_args[1:]) # handle permute(a, b, c)
else:
permutation = tuple(ctx.method_args[1]) # handle permute([a, b, c])

assert(permutation[0] == 0) # cannot move batch dim

trt_permutation = tuple([p - 1 for p in permutation])[1:]

layer = ctx.network.add_shuffle(input_trt)
layer.second_transpose = tuple(trt_permutation)

output._trt = layer.get_output(0)


class Permute(torch.nn.Module):
def __init__(self, *args):
super(Permute, self).__init__()
self.args = args
def forward(self, x):
return x.permute(*self.args).contiguous()


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)])
def test_permute_2d_0123():
return Permute(0, 1, 2, 3)

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)])
def test_permute_2d_0312():
return Permute(0, 3, 1, 2)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)])
def test_permute_3d_01234():
return Permute(0, 1, 2, 3, 4)

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)])
def test_permute_3d_04132():
return Permute(0, 4, 1, 3, 2)

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)])
def test_permute_list():
return Permute([0, 4, 1, 3, 2])

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)])
def test_permute_tuple():
return Permute((0, 4, 1, 3, 2))
92 changes: 92 additions & 0 deletions torch2trt/converters/pow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.pow')
@tensorrt_converter('torch.Tensor.__ipow__')
@tensorrt_converter('torch.Tensor.__pow__')
def convert_pow(ctx):
input_a = ctx.method_args[0]
input_b = ctx.method_args[1]
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW)
output._trt = layer.get_output(0)


@tensorrt_converter('torch.Tensor.__rpow__')
def convert_pow(ctx):
input_a = ctx.method_args[1]
input_b = ctx.method_args[0] # flipped for rpow
output = ctx.method_return
input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW)
output._trt = layer.get_output(0)


class Pow(torch.nn.Module):
def __init__(self):
super(Pow, self).__init__()

def forward(self, x, y):
return x ** y

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_pow_basic():
return Pow()


# __ipow__ not yet impl in torch
# class IPow(torch.nn.Module):
# def __init__(self):
# super(IPow, self).__init__()

# def forward(self, x, y):
# x **= y
# return x


# @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
# def test_pow_ipow():
# return IPow()


class TorchPow(torch.nn.Module):
def __init__(self):
super(TorchPow, self).__init__()

def forward(self, x, y):
return torch.pow(x, y)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
def test_torch_pow():
return TorchPow()


class RpowInt(torch.nn.Module):
def __init__(self):
super(RpowInt, self).__init__()

def forward(self, x):
return 2 ** x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_rpow_int():
return RpowInt()


class RpowFloat(torch.nn.Module):
def __init__(self):
super(RpowFloat, self).__init__()

def forward(self, x):
return 2.0 ** x


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
def test_rpow_float():
return RpowFloat()
47 changes: 47 additions & 0 deletions torch2trt/converters/prelu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.nn.functional.prelu')
def convert_prelu(ctx):
input = get_arg(ctx, 'input', pos=0, default=None)
weight = get_arg(ctx, 'weight', pos=1, default=None)
output = ctx.method_return

weight_shape = [1] * (len(input.shape) - 1)
weight_shape[0] = weight.numel()

input_trt = add_missing_trt_tensors(ctx.network, [input])[0]


# y = prelu(x) = relu(x) - alpha * relu(-x)
weight_trt = ctx.network.add_constant(weight_shape, -weight.detach().view(weight_shape).cpu().numpy()).get_output(0) # detach so considered leaf

# x >= 0
a = ctx.network.add_activation(input_trt, trt.ActivationType.RELU).get_output(0)

# x <= 0
b = ctx.network.add_unary(input_trt, trt.UnaryOperation.NEG).get_output(0)
b = ctx.network.add_activation(b, trt.ActivationType.RELU).get_output(0)
b = ctx.network.add_elementwise(b, weight_trt, trt.ElementWiseOperation.PROD).get_output(0)

# y = a + b
y = ctx.network.add_elementwise(a, b, trt.ElementWiseOperation.SUM)

output._trt = y.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)])
def test_prelu_scalar():
return torch.nn.PReLU()


@add_module_test(torch.float32, torch.device('cuda'), [(1, 5)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)])
def test_prelu_vector():
m = torch.nn.PReLU(5)
m.weight = torch.nn.Parameter(torch.randn(5)) # randn so each channel different
return m
38 changes: 38 additions & 0 deletions torch2trt/converters/prod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test
from .unary import UnaryModule


@tensorrt_converter('torch.prod')
@tensorrt_converter('torch.Tensor.prod')
def convert_prod(ctx):
input = ctx.method_args[0]
dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, len(input.shape))))
keepdim = get_arg(ctx, 'keepdim', pos=2, default=False)
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return
layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.PROD, torch_dim_to_trt_axes(dim), keepdim)
output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_prod_reduce_all():
return UnaryModule(lambda x: torch.prod(x))


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_prod_reduce_dim1():
return UnaryModule(lambda x: torch.prod(x, 1))


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_prod_reduce_dim22():
return UnaryModule(lambda x: torch.prod(x, 2))


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
def test_prod_reduce_dim1_keepdim():
return UnaryModule(lambda x: torch.prod(x, 1, keepdim=True))
49 changes: 49 additions & 0 deletions torch2trt/converters/relu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.relu')
@tensorrt_converter('torch.relu_')
@tensorrt_converter('torch.nn.functional.relu')
@tensorrt_converter('torch.nn.functional.relu_')
@tensorrt_converter('torch.Tensor.relu')
def convert_functional_relu(ctx):
ctx.method_args = (torch.nn.ReLU(),) + ctx.method_args
convert_relu(ctx)


@tensorrt_converter('torch.nn.ReLU.forward')
def convert_relu(ctx):
input = ctx.method_args[1]
input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
output = ctx.method_return
layer = ctx.network.add_activation(
input=input_trt, type=trt.ActivationType.RELU)
output._trt = layer.get_output(0)

@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)])
def test_relu_basic():
return torch.nn.ReLU()


class FunctionalRelu(torch.nn.Module):
def forward(self, x):
return torch.nn.functional.relu(x)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)])
def test_functional_relu_basic():
return FunctionalRelu()


class TensorRelu(torch.nn.Module):
def __init__(self):
super(TensorRelu, self).__init__()

def forward(self, x):
return x.relu()


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)])
def test_tensor_relu():
return TensorRelu()
40 changes: 40 additions & 0 deletions torch2trt/converters/relu6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test


@tensorrt_converter('torch.nn.functional.relu6')
def convert_functional_relu6(ctx):
ctx.method_args = (torch.nn.ReLU6(),) + ctx.method_args
convert_relu6(ctx)


@tensorrt_converter('torch.nn.ReLU6.forward')
def convert_relu6(ctx):
input = ctx.method_args[1]
output = ctx.method_return

input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input, 6])
input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)

layer = ctx.network.add_activation(
input=input_a_trt, type=trt.ActivationType.RELU)
layer = ctx.network.add_elementwise(
layer.get_output(0), input_b_trt, trt.ElementWiseOperation.MIN)

output._trt = layer.get_output(0)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)])
def test_relu6_basic():
return torch.nn.ReLU6()


class FunctionalRelu6(torch.nn.Module):
def forward(self, x):
return torch.nn.functional.relu6(x)


@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)])
def test_functional_relu6_basic():
return FunctionalRelu6()

Loading