In [1]:
import os
import cv2
import pdb
import time
import warnings
import random
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import StratifiedKFold
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from matplotlib import pyplot as plt
from albumentations import (HorizontalFlip, ShiftScaleRotate, Normalize, Resize, Compose, GaussNoise)
from albumentations.torch import ToTensor
warnings.filterwarnings("ignore")

In [2]:
import segmentation_models_pytorch as smp

In [3]:
def run_length_decode(rle, height=1024, width=1024, fill_value=1):
    component = np.zeros((height, width), np.float32)
    component = component.reshape(-1)
    rle = np.array([int(s) for s in rle.strip().split(' ')])
    rle = rle.reshape(-1, 2)
    start = 0
    for index, length in rle:
        start = start+index
        end = start+length
        component[start: end] = fill_value
        start = end
    component = component.reshape(width, height).T
    return component

def run_length_encode(component):
    component = component.T.flatten()
    start = np.where(component[1:] > component[:-1])[0]+1
    end = np.where(component[:-1] > component[1:])[0]+1
    length = end-start
    rle = []
    for i in range(len(length)):
        if i == 0:
            rle.extend([start[0], length[0]])
        else:
            rle.extend([start[i]-end[i-1], length[i]])
    rle = ' '.join([str(r) for r in rle])
    return rle

In [4]:
test_data_folder = "test2_png"
sample_submission_path = "stage_2_sample_submission.csv"

In [5]:
class TestDataset(Dataset):
    def __init__(self, root, df, size, mean, std, tta=4):
        self.root = root
        self.size = size
        self.fnames = list(df["ImageId"])
        self.num_samples = len(self.fnames)
        self.transform = Compose(
            [
                Normalize(mean=mean, std=std, p=1),
                Resize(size, size),
                ToTensor(),
            ]
        )

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        path = os.path.join(self.root, fname + ".png")
        image = cv2.imread(path)
        images = self.transform(image=image)["image"]
        return images

    def __len__(self):
        return self.num_samples

def post_process(probability, threshold, min_size):
    mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros((1024, 1024), np.float32)
    num = 0
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
            num += 1
    return predictions, num

In [6]:
size = 768
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
num_workers = 8
batch_size = 1
best_threshold = 0.6
min_size = 3500
device = torch.device("cuda:0")
df = pd.read_csv(sample_submission_path)
testset = DataLoader(
    TestDataset(test_data_folder, df, size, mean, std),
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)
model1 = smp.FPN("resnet34", encoder_weights="imagenet", activation=None).to(torch.device("cuda:0"))
model1.eval()
state1 = torch.load('./stage2_models/resnet34_fpn.pth', map_location=lambda storage, loc: storage)
model1.load_state_dict(state1["state_dict"])
model2 = smp.FPN("se_resnet50", encoder_weights="imagenet", activation=None).to(torch.device("cuda:0"))
model2.eval()
state2 = torch.load('./stage2_models/se_resnet_fpn.pth', map_location=lambda storage, loc: storage)
model2.load_state_dict(state2["state_dict"])
encoded_pixels = []
for i, batch in enumerate(tqdm(testset)):
    preds1 = torch.sigmoid(model1(batch.to(device)))
    preds1 = preds1.detach().cpu().numpy()[:, 0, :, :] 
    preds2 = torch.sigmoid(model2(batch.to(device)))
    preds2 = preds2.detach().cpu().numpy()[:, 0, :, :] 
    for i in range(len(preds1)):
        probability1 = preds1[i]
        probability2 = preds2[i]
        probability = (probability1+probability2) / 2.0
        if probability.shape != (1024, 1024):
            probability = cv2.resize(probability, dsize=(1024, 1024), interpolation=cv2.INTER_LINEAR)
        predict, num_predict = post_process(probability, best_threshold, min_size)
        if num_predict == 0:
            encoded_pixels.append('-1')
        else:
            r = run_length_encode(predict)
            encoded_pixels.append(r)
df['EncodedPixels'] = encoded_pixels
df.to_csv('submission.csv', columns=['ImageId', 'EncodedPixels'], index=False)

HBox(children=(IntProgress(value=0, max=3205), HTML(value='')))




In [7]:
df.head()

Unnamed: 0,ImageId,EncodedPixels
0,ID_0011fe81e,-1
1,ID_9ca06e9b9,-1
2,ID_6f00d6ce6,-1
3,ID_9258110b0,-1
4,ID_fa01c9546,298289 8 1007 19 1000 25 38 20 937 31 27 33 92...
