### UNet Inference kernel


In [1]:
import pdb
import os
import cv2
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset
from albumentations import (Normalize, Compose)
from albumentations.pytorch import ToTensor
import torch.utils.data as data
import tensorrt as trt
import time

In [4]:
class TestDataset(Dataset):
    '''Dataset for test prediction'''
    def __init__(self, root, df, mean, std):
        self.root = root
        #df['ImageId'] = df['ImageId_ClassId'].apply(lambda x: x.split('_')[0])
        self.fnames = df['ImageId'].unique().tolist()
        self.num_samples = len(self.fnames)
        self.transform = Compose(
            [
                Normalize(mean=mean, std=std, p=1),
                ToTensor(),
            ]
        )

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        path = os.path.join(self.root, fname)
        image = cv2.imread(path)
        images = self.transform(image=image)["image"]
        return fname, images

    def __len__(self):
        return self.num_samples

In [8]:
# initialize test dataloader
sample_submission_path = './severstal-steel-defect-detection/sample_submission.csv'
test_data_folder = "./severstal-steel-defect-detection/test_images"
best_threshold = 0.5
num_workers = 2
batch_size = 4
print('best_threshold', best_threshold)
min_size = 3500
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
df = pd.read_csv(sample_submission_path)
testset = DataLoader(
    TestDataset(test_data_folder, df, mean, std),
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

best_threshold 0.5


### Load PyT Model

In [9]:
# Initialize mode and load trained weights
ckpt_path = "./models/modelmodel.pth"
device = torch.device("cuda")
state = torch.load(ckpt_path)
model_pyt = torch.load(ckpt_path)
model_pyt.eval()



Unet(
  (encoder): SENetEncoder(
    (layer0): Sequential(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace=True)
      (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    )
    (layer1): Sequential(
      (0): SEResNeXtBottleneck(
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

### Inference on PyT model

In [31]:
# start prediction
import time
start = time.time()
predictions = []
for i, batch in enumerate(tqdm(testset)):
    fnames, images = batch
    batch_preds = torch.sigmoid(model_pyt(images.to(device)))
    batch_preds = batch_preds.detach().cpu().numpy()
end = time.time()
inference_pyt=end-start
print("Time for predictions: ",inference_pyt,'s')

100%|██████████| 1377/1377 [02:28<00:00,  9.30it/s]

Time for predictions:  148.0280213356018 s





### Load TRT model

In [14]:
import common2
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
engine_path = "my_engine.trt"
model = common2.load_engine(trt_runtime,engine_path)
device = torch.device("cuda")

context = model.create_execution_context()

inputs, outputs, bindings, stream = common2.allocate_buffers(model)

### Loading Images

In [28]:
from os import listdir
from os.path import isfile, join
import numpy
import cv2

mypath='./severstal-steel-defect-detection/test_images/'
batch_size=1377
onlyfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
image = numpy.empty(batch_size, dtype=object)
for n in range(0, batch_size):
    image[n] = cv2.imread( join(mypath,onlyfiles[n]) )

### Inference on TRT model

In [32]:
start = time.time()
for n in range(0, batch_size):
    inputs[0].host = image[n]
    trt_outputs = common2.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
end = time.time()
inference_trt = end-start
print("Inference time: ",inference_trt, "s")

Inference time:  3.0406620502471924 s


### Total SpeedUp

In [36]:
print("Speedup =",inference_pyt/inference_trt,"x")

Speedup = 48.6828259403467 x


### Refrences:

Few kernels from which I've borrowed some code:

* https://www.kaggle.com/amanooo/defect-detection-starter-u-net
* https://www.kaggle.com/go1dfish/clear-mask-visualization-and-simple-eda