In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

class PyramidPoolingModule(nn.Module):
    def __init__(self, in_channels, pool_sizes):
        super(PyramidPoolingModule, self).__init__()
        self.pools = [nn.AdaptiveAvgPool2d(output_size=size) for size in pool_sizes]
        self.conv_blocks = nn.ModuleList([nn.Conv2d(in_channels, 512, 1) for _ in pool_sizes])
        self.batch_norms = nn.ModuleList([nn.BatchNorm2d(512) for _ in pool_sizes])

    def forward(self, x):
        features = [x]
        for pool, conv, bn in zip(self.pools, self.conv_blocks, self.batch_norms):
            pooled = pool(x)
            convolved = conv(pooled)
            upsampled = F.interpolate(convolved, size=x.shape[2:], mode='bilinear', align_corners=False)
            features.append(bn(upsampled))
        return torch.cat(features, dim=1)

class CascadeFeatureFusion(nn.Module):
    def __init__(self, low_channels, high_channels, out_channels, num_classes):
        super(CascadeFeatureFusion, self).__init__()
        self.conv_low = nn.Conv2d(low_channels, out_channels, 3, padding=2, dilation=2)
        self.conv_high = nn.Conv2d(high_channels, out_channels, 1)
        self.conv_low_bn = nn.BatchNorm2d(out_channels)
        self.conv_high_bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.class_conv = nn.Conv2d(out_channels, num_classes, 1)

    def forward(self, low_res_input, high_res_input):
        low_res = self.relu(self.conv_low_bn(self.conv_low(low_res_input)))
        high_res = self.relu(self.conv_high_bn(self.conv_high(high_res_input)))
        high_res = F.interpolate(high_res, size=low_res.shape[2:], mode='bilinear', align_corners=False)
        result = low_res + high_res
        class_output = self.class_conv(result)
        return result, class_output

class ICNet(nn.Module):
    def __init__(self, num_classes):
        super(ICNet, self).__init__()
        self.backbone = models.resnet50(pretrained=True)
        self.backbone = nn.Sequential(*list(self.backbone.children())[:-2])
        self.ppm = PyramidPoolingModule(2048, [1, 2, 3, 6])
        self.cff1 = CascadeFeatureFusion(1024, 4096, 256, num_classes)
        self.cff2 = CascadeFeatureFusion(512, 256, 128, num_classes)

    def forward(self, x):
        layer1 = self.backbone[4](self.backbone[3](self.backbone[2](self.backbone[1](self.backbone[0](x)))))
        layer2 = self.backbone[5](layer1)
        layer3 = self.backbone[6](layer2)
        layer4 = self.backbone[7](layer3)
        ppm_output = self.ppm(layer4)
        cff1_output, class_output1 = self.cff1(layer3, ppm_output)
        cff2_output, class_output2 = self.cff2(layer2, cff1_output)
        final_output = F.interpolate(class_output2, scale_factor=4, mode='bilinear', align_corners=False)
        return final_output

# Initialize model and load weights
model = ICNet(num_classes=19)
model.load_state_dict(torch.load('trained_icnet_model_final.pth'))




<All keys matched successfully>

In [2]:
import torch
import torch2trt
from torchvision import models

# Ensure your model is on the GPU
model = ICNet(num_classes=19).cuda().eval()  # Add .cuda() here
model.load_state_dict(torch.load('/home/maith/Desktop/cityscapes/trained_icnet_model_final.pth'))

# Create dummy input data as a tensor on the GPU
x = torch.randn(1, 3, 256, 512).cuda()  # Input tensor is already on GPU

# Convert to TensorRT using torch2trt
model_trt = torch2trt.torch2trt(model, [x])

# Optionally, save the converted model
torch.save(model_trt.state_dict(), 'model_trt.pth')

print("Model has been successfully converted to TensorRT.")

Model has been successfully converted to TensorRT.


In [5]:
import torch
import torch2trt
from torchvision import models

# Ensure your model is on the GPU
model = ICNet(num_classes=19).cuda().eval()
model.load_state_dict(torch.load('/home/maith/Desktop/cityscapes/trained_icnet_model_final.pth'))

# Save the PyTorch model state dictionary
torch.save(model.state_dict(), 'model_state_dict.pth')

print("Original PyTorch model state dictionary has been saved.")

Original PyTorch model state dictionary has been saved.


In [6]:
import torch
import torch2trt
from torchvision import models

# Ensure your model architecture is defined
model = ICNet(num_classes=19).cuda().eval()

# Load the saved state dictionary
model.load_state_dict(torch.load('model_state_dict.pth'))

# Create dummy input data as a tensor on the GPU
x = torch.randn(1, 3, 256, 512).cuda()  # Adjust input size as necessary

# Convert to TensorRT using torch2trt
model_trt = torch2trt.torch2trt(model, [x])

# Perform a forward pass with the same dummy input or new test data
output = model_trt(x)
print("Output Shape:", output.shape)
print("Sample Output:", output[0, :5])  # Print part of the output to check

[07/16/2024-03:14:32] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
Output Shape: torch.Size([1, 19, 128, 256])
Sample Output: tensor([[[-7.1987, -7.1987, -7.2587,  ..., -6.8243, -6.8004, -6.8004],
         [-7.1987, -7.1987, -7.2587,  ..., -6.8243, -6.8004, -6.8004],
         [-7.3054, -7.3054, -7.4263,  ..., -6.8824, -6.8196, -6.8196],
         ...,
         [-8.0553, -8.0553, -8.0198,  ..., -7.6188, -7.6388, -7.6388],
         [-8.0771, -8.0771, -7.9855,  ..., -7.6263, -7.6839, -7.6839],
         [-8.0771, -8.0771, -7.9855,  ..., -7.6263, -7.6839, -7.6839]],

        [[-9.2225, -9.2225, -9.2125,  ..., -9.2795, -9.4608, -9.4608],
         [-9.2225, -9.2225, -9.2125,  ..., -9.2795, -9.4608, -9.4608],
         [-9.2586, -9.2586, -9.2564,  ..., -9.2289, -9.4335, -9.4335],
         ...,
         [-7.6792, -7.6792, -7.6215,

In [7]:
import torch
import torch2trt
from torchvision import models

# Ensure your model architecture is defined and moved to GPU
model = ICNet(num_classes=19).cuda().eval()

# Load the saved state dictionary
model.load_state_dict(torch.load('model_state_dict.pth'))

# Create dummy input data as a tensor on the GPU
x = torch.randn(1, 3, 256, 512).cuda()  # Adjust input size as necessary

# Convert to TensorRT using torch2trt
model_trt = torch2trt.torch2trt(model, [x])

# Create a non-default CUDA stream
stream = torch.cuda.Stream()

# Perform inference using the non-default stream
with torch.cuda.stream(stream):
    output = model_trt(x)

# Ensure the stream is synchronized before accessing the results
stream.synchronize()

print("Output Shape:", output.shape)
print("Sample Output:", output[0, :5])  # Print part of the output to check

Output Shape: torch.Size([1, 19, 128, 256])
Sample Output: tensor([[[ -6.6982,  -6.6982,  -6.7221,  ...,  -7.2160,  -7.2406,  -7.2406],
         [ -6.6982,  -6.6982,  -6.7221,  ...,  -7.2160,  -7.2406,  -7.2406],
         [ -6.7570,  -6.7570,  -6.8429,  ...,  -7.2853,  -7.2654,  -7.2654],
         ...,
         [ -7.2554,  -7.2554,  -7.3055,  ...,  -7.8772,  -7.8347,  -7.8347],
         [ -7.2161,  -7.2161,  -7.2289,  ...,  -7.8565,  -7.8380,  -7.8380],
         [ -7.2161,  -7.2161,  -7.2289,  ...,  -7.8565,  -7.8380,  -7.8380]],

        [[ -9.1625,  -9.1625,  -9.0636,  ..., -10.2583, -10.4156, -10.4156],
         [ -9.1625,  -9.1625,  -9.0636,  ..., -10.2583, -10.4156, -10.4156],
         [ -9.1241,  -9.1241,  -9.0631,  ..., -10.1369, -10.3068, -10.3068],
         ...,
         [ -7.0837,  -7.0837,  -7.1369,  ...,  -4.9820,  -5.0064,  -5.0064],
         [ -7.0995,  -7.0995,  -7.1609,  ...,  -4.9968,  -4.9958,  -4.9958],
         [ -7.0995,  -7.0995,  -7.1609,  ...,  -4.9968,  -4.9958

In [8]:
import time

# Create a dummy input tensor
x = torch.randn(1, 3, 256, 512).cuda()

# Timing the PyTorch model
start_time = time.time()
output = model(x)
end_time = time.time()
pytorch_inference_time = end_time - start_time
print(f"PyTorch Inference Time: {pytorch_inference_time:.6f} seconds")

# Timing the TensorRT model
start_time = time.time()
output_trt = model_trt(x)
end_time = time.time()
tensorrt_inference_time = end_time - start_time
print(f"TensorRT Inference Time: {tensorrt_inference_time:.6f} seconds")

PyTorch Inference Time: 0.002998 seconds
[07/16/2024-03:16:17] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
TensorRT Inference Time: 0.000863 seconds
