### UNet Inference kernel

This kernel is an inference kernel of my [UNet starter kernel](https://www.kaggle.com/rishabhiitbhu/unet-starter-kernel-pytorch-lb-0-888). 
Don't forget to add the `model.pth` file generated from the starter kernel as dataset to predict on the test set.

In [1]:
!pip install ../input/pretrainedmodels/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4/

Processing /kaggle/input/pretrainedmodels/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4
Building wheels for collected packages: pretrainedmodels
  Building wheel for pretrainedmodels (setup.py) ... [?25l- \ done
[?25h  Created wheel for pretrainedmodels: filename=pretrainedmodels-0.7.4-cp36-none-any.whl size=60963 sha256=be2d5a91efacf4b379a116ea4e733c6ba22ef89ef00fbce9117ecabedea045df
  Stored in directory: /tmp/.cache/pip/wheels/52/1a/2a/9e4582032d4e47d36ff06371d5579b3a6622985bdf37ee4b20
Successfully built pretrainedmodels
Installing collected packages: pretrainedmodels
Successfully installed pretrainedmodels-0.7.4


In [2]:
# What this is doing? please refer to my above linked kernel
#!pip install ../input/pretrainedmodels/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4/ > /dev/null
package_path = '../input/unetmodelscript/'
import sys
sys.path.append(package_path)

In [3]:
import pdb
import os
import cv2
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset
from albumentations import (HorizontalFlip, ShiftScaleRotate, Normalize, Resize, Compose, GaussNoise)
from albumentations.pytorch import ToTensor
import torch.utils.data as data
from model import Unet

In [4]:
#https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [5]:
class TestDataset(Dataset):
    '''Dataset for test prediction'''
    def __init__(self, root, df, mean, std,TTA=False):
        self.root = root
        df['ImageId'] = df['ImageId_ClassId'].apply(lambda x: x.split('_')[0])
        self.fnames = df['ImageId'].unique().tolist()
        self.num_samples = len(self.fnames)
        if TTA==True:
            self.transform = Compose(
                [
                    HorizontalFlip(),
                    Normalize(mean=mean,std=std,p=1),
                    ToTensor(),
                ]
            )
        else:
            self.transform = Compose(
                [
                    Normalize(mean=mean, std=std, p=1),
                    ToTensor(),
                ]
            )

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        path = os.path.join(self.root, fname)
        image = cv2.imread(path)
        images = self.transform(image=image)["image"]
        return fname, images

    def __len__(self):
        return self.num_samples

In [6]:
def post_process(probability, threshold, min_size):
    '''Post processing of each predicted mask, components with lesser number of pixels
    than `min_size` are ignored'''
    mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros((256, 1600), np.float32)
    num = 0
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
            num += 1
    return predictions, num

In [7]:
!ls ../input/infer_test/

ls: cannot access '../input/infer_test/': No such file or directory


In [8]:
sample_submission_path = '../input/severstal-steel-defect-detection/sample_submission.csv'
test_data_folder = "../input/severstal-steel-defect-detection/test_images"

In [9]:
# initialize test dataloader
best_threshold = 0.5
num_workers = 2
batch_size = 1
print('best_threshold', best_threshold)
min_size = 3500
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
df = pd.read_csv(sample_submission_path)
testset = DataLoader(
    TestDataset(test_data_folder, df, mean, std),
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

testset_TTA = DataLoader(
    TestDataset(test_data_folder,df,mean,std,TTA=True),
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

best_threshold 0.5


In [10]:
import os
os.listdir('../input/infer-test')

['model_fold_2.pth',
 'model_fold_1.pth',
 'model_fold_4.pth',
 'model_fold_3.pth',
 'model_fold_0.pth']

In [11]:
# # Initialize mode and load trained weights
# predictions = []
# model_name = "resnet18"
# models = []
# for i in range(4):
#     #ckpt_path = "../input/uresnet/u-resnet18_fold_{}.pth".format(i)
#     ckpt_path = "../input/ures184fold/u-resnet18_fold_{}.pth".format(i)
#     device = torch.device("cuda")
#     model = Unet(encoder_name="resnet18",classes=4,activation=None, encoder_weights=None)
#     model.to(device)
#     model.eval()
#     state = torch.load(ckpt_path,map_location=lambda storage,loc:storage)
#     #model.load_state_dict(state["state_dict"])
#     from collections import OrderedDict
#     new_state_dict = OrderedDict()

#     for k, v in state['state_dict'].items():
#         if k in model.state_dict():
#             new_state_dict[k]=v

#     model.load_state_dict(new_state_dict)
#     models.append(model)

In [12]:
# Initialize mode and load trained weights
from collections import OrderedDict

predictions = []
model_name = "resnet18"
models = []
for ckpt_path in os.listdir('../input/infer-test'):
    #ckpt_path = "../input/uresnet/u-resnet18_fold_{}.pth".format(i)
    #ckpt_path = "../input/ures184fold/u-resnet18_fold_{}.pth".format(i)
    ckpt_path = os.path.join('../input/infer-test', ckpt_path)
    print(ckpt_path)
    device = torch.device("cuda")
    model = Unet(encoder_name="resnet18",classes=4,activation=None, encoder_weights=None)
    model.to(device)
    model.eval()
    state = torch.load(ckpt_path,map_location=lambda storage,loc:storage)
    #model.load_state_dict(state["state_dict"])
    new_state_dict = OrderedDict()

    for k, v in state['state_dict'].items():
        if k in model.state_dict():
            new_state_dict[k]=v

    model.load_state_dict(new_state_dict)
    models.append(model)

../input/infer-test/model_fold_2.pth
../input/infer-test/model_fold_1.pth
../input/infer-test/model_fold_4.pth
../input/infer-test/model_fold_3.pth
../input/infer-test/model_fold_0.pth


In [13]:
# start prediction
predictions = []
for i, (batch,batch_TTA) in enumerate(tqdm(zip(testset,testset_TTA))):
    fnames, images = batch
    fnames, images_TTA = batch_TTA
    batch_preds = 0
    for model in models:
        pred = torch.sigmoid(model(images.to(device))).detach().cpu().numpy()
        pred_TTA = torch.sigmoid(model(images_TTA.to(device))).detach().cpu().numpy()
        batch_preds += (pred+pred_TTA[:,:,:,::-1])/2
        batch_preds+=pred
    batch_preds/= len(models)
    #batch_preds = batch_preds.detach().cpu().numpy()
    for fname, preds in zip(fnames, batch_preds):
        for cls, pred in enumerate(preds):
            pred, num = post_process(pred, best_threshold, min_size)
            rle = mask2rle(pred)
            name = fname + f"_{cls+1}"
            predictions.append([name, rle])

# save predictions to submission.csv
df = pd.DataFrame(predictions, columns=['ImageId_ClassId', 'EncodedPixels'])
df.to_csv("submission.csv", index=False)

1801it [05:44,  5.23it/s]


In [14]:
df.head(50)

Unnamed: 0,ImageId_ClassId,EncodedPixels
0,004f40c73.jpg_1,
1,004f40c73.jpg_2,
2,004f40c73.jpg_3,
3,004f40c73.jpg_4,
4,006f39c41.jpg_1,
5,006f39c41.jpg_2,
6,006f39c41.jpg_3,
7,006f39c41.jpg_4,
8,00b7fb703.jpg_1,
9,00b7fb703.jpg_2,
