## Train on Image Pairs

We have spent a lot of time trying to create and discover images pairs within our dataset. Now it's time to see whether or not we can actually extract any training signal from these pairs.

In [1]:
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm

from PIL import Image

import fastai
from fastai.vision import open_image, pil2tensor
from fastai.vision import get_image_files, get_transforms, unet_learner, imagenet_stats
from fastai.vision import models, SegmentationItemList, ResizeMethod, DatasetType

from pathlib import Path
from sklearn.model_selection import train_test_split, StratifiedKFold
from src.utils import convert_mask_to_rle, convert_masks_to_rle
from src.utils import override_open_mask, get_training_image_size, multiclass_dice, BCEDiceLoss

In [2]:
DATA = Path('data')
TRAIN = DATA/"train.csv"
TEST = DATA/"sample_submission.csv"

In [3]:
train = pd.read_csv(TRAIN)
test = pd.read_csv(TEST)

train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])
test['label'] = test['Image_Label'].apply(lambda x: x.split('_')[1])
test['im_id'] = test['Image_Label'].apply(lambda x: x.split('_')[0])

unique_images = train.iloc[::4, :]
unique_test_images = test.iloc[::4, :]

test['EncodedPixels'] = ''

In [4]:
# Load image paths
TRAIN_FOLDER = DATA/'train_images_350x525'
TEST_FOLDER = DATA/'test_images_350x525'
train_images = get_image_files(TRAIN_FOLDER)
test_images = get_image_files(TEST_FOLDER)

In [5]:
train_train_pairs = np.load(DATA/'train_train_pairs.npy', allow_pickle=True)[()]
train_test_pairs = np.load(DATA/'train_test_pairs.npy', allow_pickle=True)[()]
test_test_pairs = np.load(DATA/'test_test_pairs.npy', allow_pickle=True)[()]

In [6]:
print("train to train pairs:", len(train_train_pairs))
print("train to test pairs:", len(train_test_pairs))
print("test to test pairs:", len(test_test_pairs))

train to train pairs: 1770
train to test pairs: 1202
test to test pairs: 798


In [7]:
trainID_testID_pairs = {}
trainID_trainID_pairs = {}

for trainIdx1, trainIdx2 in train_train_pairs.items():
    
    train_id1 = train_images[trainIdx1].name
    train_id2 = train_images[trainIdx2].name
    
    trainID_trainID_pairs[train_id1] = train_id2

for trainIdx, testIdx in train_test_pairs.items():
    
    train_id = train_images[trainIdx].name
    test_id = test_images[testIdx].name
    
    trainID_testID_pairs[train_id] = test_id

## Baseline Model

The first thing we want to do is generate a baseline for performance so we can gauge whether or not we're improving things. We'll train a model against the entire training set, but only test it against the `1202` images found within `train_test_pairs`.

In the future we're hoping that using pair information will help boost our score on these test images.

In [8]:
#Ensure we open our 4D masks properly
override_open_mask()

def get_y_fn(x):
    # Given a path to a training image, build the corresponding mask path
    split = x.split('/')
    newPath = DATA/("train_images_annots" + SUFFIX)/split[-1].replace('.jpg','.png')
    return newPath

size = (350,525)
training_image_size = get_training_image_size(size)     #UNet requires that inputs are multiples of 32
#If we want to train on smaller images, we can add their suffix here
SUFFIX = "_" + str(size[0]) + "x" + str(size[1])        #eg. _350x525
batch_size=8
codes = np.array(['Fish', 'Flower', 'Gravel', 'Sugar'])

In [9]:
# Create test set consisting of images for which we have a pair in train
paired_test_images = unique_test_images.loc[unique_test_images['im_id'].isin(list(trainID_testID_pairs.values()))].reset_index()

len(paired_test_images)

1202

In [10]:
src = (SegmentationItemList.from_df(unique_images, DATA/('train_images'+str(SUFFIX)), cols='im_id')
       .split_none()
    .label_from_func(get_y_fn, classes=codes))

test_src = SegmentationItemList.from_df(paired_test_images, DATA / ('test_images' + str(SUFFIX)), cols='im_id')

transforms = get_transforms()
data = (src.transform(get_transforms(), tfm_y=True, size=training_image_size, resize_method=ResizeMethod.PAD, padding_mode="zeros")
        .add_test(test_src, tfm_y=False)
        .databunch(bs=batch_size)
        .normalize(imagenet_stats))

learn = unet_learner(data, models.xresnet18, pretrained=False, metrics=[multiclass_dice], loss_func=BCEDiceLoss(), model_dir=DATA)

In [11]:
# Train
learn.fit_one_cycle(60, 1e-3)

epoch,train_loss,valid_loss,multiclass_dice,time
0,0.949255,#na#,06:15,
1,0.896205,#na#,06:12,
2,0.909518,#na#,06:40,
3,0.883361,#na#,06:10,
4,0.913122,#na#,05:56,
5,0.884484,#na#,05:56,
6,0.883727,#na#,05:56,
7,0.861732,#na#,05:56,
8,0.846374,#na#,05:56,
9,0.845968,#na#,05:56,


In [12]:
# Get test predictions
test_preds, _ = learn.get_preds(DatasetType.Test)
if test_preds.max() > 1:
    # If we use custom loss functions, we have to apply the activation ourselves
    print("TEST: It looks like these are logits. Max:", test_preds.max())
    test_preds = torch.sigmoid(test_preds)

test_preds = test_preds.numpy()
test_preds = test_preds[:, :, :350, :525]

TEST: It looks like these are logits. Max: tensor(17.5079)


In [13]:
#Convert masks to RLE
threshold = 0.5
min_size = 10000
for i, row in tqdm(paired_test_images.iterrows()):
    saved_pred = test_preds[i]

    fish_rle = convert_mask_to_rle(saved_pred[0], threshold, min_size)
    flower_rle = convert_mask_to_rle(saved_pred[1], threshold, min_size)
    gravel_rle = convert_mask_to_rle(saved_pred[2], threshold, min_size)
    sugar_rle = convert_mask_to_rle(saved_pred[3], threshold, min_size)

    # Save in dataframe
    test.loc[test['Image_Label'] == row['im_id'] + "_Fish", 'EncodedPixels'] = fish_rle
    test.loc[test['Image_Label'] == row['im_id'] + "_Flower", 'EncodedPixels'] = flower_rle
    test.loc[test['Image_Label'] == row['im_id'] + "_Gravel", 'EncodedPixels'] = gravel_rle
    test.loc[test['Image_Label'] == row['im_id'] + "_Sugar", 'EncodedPixels'] = sugar_rle

submission = test.drop(columns=['label', 'im_id'])
submission.to_csv("submissions/pair_test_baseline.csv", index=False)

1202it [00:24, 49.46it/s]


## Train with Labels

In [11]:
# Clear submission
test['EncodedPixels'] = ''

In [12]:
paired_train_images = unique_images.loc[unique_images['im_id'].isin(list(trainID_trainID_pairs.keys()))].reset_index()

In [13]:
pair_mask = [trainID_trainID_pairs[k] for k in paired_train_images['im_id']]
paired_train_images['pair_im_id'] = pair_mask

In [14]:
paired_train_images

Unnamed: 0,index,Image_Label,EncodedPixels,label,im_id,pair_im_id
0,4,002be4f.jpg_Fish,233813 878 235213 878 236613 878 238010 881 23...,Fish,002be4f.jpg,2688104.jpg
1,8,0031ae9.jpg_Fish,3510 690 4910 690 6310 690 7710 690 9110 690 1...,Fish,0031ae9.jpg,da0d544.jpg
2,12,0035239.jpg_Fish,,Fish,0035239.jpg,61aa8dc.jpg
3,36,008a5ff.jpg_Fish,1038475 213 1039875 213 1041275 213 1042675 21...,Fish,008a5ff.jpg,269a0ef.jpg
4,48,009e2f3.jpg_Fish,65812 93 65906 4 65911 10 67212 96 67309 5 673...,Fish,009e2f3.jpg,dd2de1a.jpg
...,...,...,...,...,...,...
1765,22144,ffbf254.jpg_Fish,,Fish,ffbf254.jpg,bc3f6f9.jpg
1766,22148,ffc31af.jpg_Fish,,Fish,ffc31af.jpg,4f3407b.jpg
1767,22152,ffca427.jpg_Fish,613784 292 614079 2 615184 294 615480 1 616584...,Fish,ffca427.jpg,9901417.jpg
1768,22168,ffd11b6.jpg_Fish,,Fish,ffd11b6.jpg,523a7eb.jpg


In [15]:
# Create a mapping from test images to corresponding train images
testID_trainID_pairs = inv_map = {v: k for k, v in trainID_testID_pairs.items()}

In [33]:
def custom_open(self, fn):
    """
    Opens an image and it's corresponding pair's mask. 
    Concatenates them together along the first (channel) dimension and returns them as a fastai Image
    """

    img = Image.open(fn).convert(self.convert_mode)
    if self.after_open: 
        img = self.after_open(img)
        
    img = pil2tensor(img, np.float32)
    img.div_(255)
    
    # Open mask for image pair
    im_id = fn.split('/')[-1]
    
    # HACK: We have two different lookups:
    # One maps pairs from train to train
    # One maps pairs from test to train
    # We're assuming that if we can't find the pair in the train-to-train lookup, it must exist in the other one
    pair_id = None
    if 'train_images' in fn and im_id in trainID_trainID_pairs:
        pair_id = trainID_trainID_pairs[im_id]
    elif 'test_images' in fn:
        pair_id = testID_trainID_pairs[im_id]

    if pair_id is not None:
        mask_id = pair_id.replace('.jpg', '.png')

        mask_path = DATA/'train_images_annots_350x525'/mask_id
        mask = Image.open(mask_path).convert('RGBA')
        mask = pil2tensor(mask, np.float32)
    else:
        mask = torch.zeros((4, img.shape[1], img.shape[2]))
    
    x = torch.cat([img, mask], dim=0)

    hybrid_image = fastai.vision.Image(x)

    return hybrid_image

SegmentationItemList.open = custom_open

In [34]:
src = (SegmentationItemList.from_df(unique_images, DATA/('train_images'+str(SUFFIX)), cols='im_id')
       .split_none()
    .label_from_func(get_y_fn, classes=codes))

test_src = SegmentationItemList.from_df(paired_test_images, DATA / ('test_images' + str(SUFFIX)), cols='im_id')

transforms = get_transforms()
data = (src.transform(get_transforms(), tfm_y=True, size=training_image_size, resize_method=ResizeMethod.PAD, padding_mode="zeros")
        .add_test(test_src, tfm_y=False)
        .databunch(bs=batch_size))

def custom_resnet(pretrained=False, progress=True, **kwargs):
    """
    Create custom ResNet that accepts 7-channel inputs
    """
    model = models.xresnet18(pretrained, **kwargs)
    print("Before", model[0][0].weight.std())
    model[0][0] = torch.nn.Conv2d(7, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    torch.nn.init.kaiming_normal_(model[0][0].weight)
    print("After", model[0][0].weight.std())

    return model

learn = unet_learner(data, custom_resnet, pretrained=False, metrics=[multiclass_dice], loss_func=BCEDiceLoss(), model_dir=DATA)

Before tensor(0.2763, grad_fn=<StdBackward0>)
After tensor(0.1782, grad_fn=<StdBackward0>)


In [35]:
# Train
learn.fit_one_cycle(60, 1e-3)

epoch,train_loss,valid_loss,multiclass_dice,time
0,0.964705,#na#,07:35,
1,0.932413,#na#,07:34,
2,0.913778,#na#,08:05,
3,0.918253,#na#,08:19,
4,0.90777,#na#,08:04,
5,0.929762,#na#,08:01,
6,0.89898,#na#,07:47,
7,0.883485,#na#,07:08,
8,0.874966,#na#,07:08,
9,0.849967,#na#,07:29,


In [36]:
# Get test predictions
test_preds, _ = learn.get_preds(DatasetType.Test)
if test_preds.max() > 1:
    # If we use custom loss functions, we have to apply the activation ourselves
    print("TEST: It looks like these are logits. Max:", test_preds.max())
    test_preds = torch.sigmoid(test_preds)

test_preds = test_preds.numpy()
test_preds = test_preds[:, :, :350, :525]

TEST: It looks like these are logits. Max: tensor(12.2118)


In [37]:
#Convert masks to RLE
threshold = 0.5
min_size = 10000
for i, row in tqdm(paired_test_images.iterrows()):
    saved_pred = test_preds[i]

    fish_rle = convert_mask_to_rle(saved_pred[0], threshold, min_size)
    flower_rle = convert_mask_to_rle(saved_pred[1], threshold, min_size)
    gravel_rle = convert_mask_to_rle(saved_pred[2], threshold, min_size)
    sugar_rle = convert_mask_to_rle(saved_pred[3], threshold, min_size)

    # Save in dataframe
    test.loc[test['Image_Label'] == row['im_id'] + "_Fish", 'EncodedPixels'] = fish_rle
    test.loc[test['Image_Label'] == row['im_id'] + "_Flower", 'EncodedPixels'] = flower_rle
    test.loc[test['Image_Label'] == row['im_id'] + "_Gravel", 'EncodedPixels'] = gravel_rle
    test.loc[test['Image_Label'] == row['im_id'] + "_Sugar", 'EncodedPixels'] = sugar_rle

submission = test.drop(columns=['label', 'im_id'])
submission.to_csv("submissions/trained_with_pair_labels.csv", index=False)

1202it [00:25, 47.19it/s]


## Train with Labels and Paired Image

One other approach that may work would be to train with both the corresponding label and paired image.

In [None]:
# Clear submission
test['EncodedPixels'] = ''

In [None]:
paired_train_images = unique_images.loc[unique_images['im_id'].isin(list(trainID_trainID_pairs.keys()))].reset_index()

In [None]:
pair_mask = [trainID_trainID_pairs[k] for k in paired_train_images['im_id']]
paired_train_images['pair_im_id'] = pair_mask

In [None]:
# Create a mapping from test images to corresponding train images
testID_trainID_pairs = inv_map = {v: k for k, v in trainID_testID_pairs.items()}

In [None]:
def custom_open(self, fn):
    """
    Opens an image and it's corresponding pair's mask. 
    Concatenates them together along the first (channel) dimension and returns them as a fastai Image
    """

    img = Image.open(fn).convert(self.convert_mode)
    if self.after_open: 
        img = self.after_open(img)
        
    img = pil2tensor(img, np.float32)
    img.div_(255)
    
    # Open mask for image pair
    im_id = fn.split('/')[-1]
    
    # HACK: We have two different lookups:
    # One maps pairs from train to train
    # One maps pairs from test to train
    # We're assuming that if we can't find the pair in the train-to-train lookup, it must exist in the other one
    if im_id in trainID_trainID_pairs:
        pair_id = trainID_trainID_pairs[im_id]
    else:
        pair_id = testID_trainID_pairs[im_id]
        
    mask_id = pair_id.replace('.jpg', '.png')

    mask_path = DATA/'train_images_annots_350x525'/mask_id
    mask = Image.open(mask_path).convert('RGBA')
    mask = pil2tensor(mask, np.float32)
    
    #open the paired image
    img2 = Image.open(DATA/'train_images_350x525'/pairId).convert(self.convert_mode)
    if self.after_open: 
        img2 = self.after_open(img2)
        
    img2 = pil2tensor(img2, np.float32)
    img2.div_(255)
    
    
    x = torch.cat([img, mask, img2], dim=0)

    hybrid_image = fastai.vision.Image(x)

    return hybrid_image

SegmentationItemList.open = custom_open

In [None]:
src = (SegmentationItemList.from_df(paired_train_images, DATA/('train_images'+str(SUFFIX)), cols='im_id')
       .split_none()
    .label_from_func(get_y_fn, classes=codes))

test_src = SegmentationItemList.from_df(paired_test_images, DATA / ('test_images' + str(SUFFIX)), cols='im_id')

transforms = get_transforms()
data = (src.transform(get_transforms(), tfm_y=True, size=training_image_size, resize_method=ResizeMethod.PAD, padding_mode="zeros")
        .add_test(test_src, tfm_y=False)
        .databunch(bs=batch_size))

def custom_resnet(pretrained=False, progress=True, **kwargs):
    """
    Create custom ResNet that accepts 10-channel inputs
    """
    model = models.xresnet18(pretrained, **kwargs)
    print("Before", model[0][0].weight.std())
    model[0][0] = torch.nn.Conv2d(10, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    torch.nn.init.kaiming_normal_(model[0][0].weight)
    print("After", model[0][0].weight.std())

    return model

learn = unet_learner(data, custom_resnet, pretrained=False, metrics=[multiclass_dice], loss_func=BCEDiceLoss(), model_dir=DATA)

In [None]:
# Train
learn.fit_one_cycle(10, 1e-3)
learn.unfreeze()
learn.fit_one_cycle(60, slice(1e-6, 1e-3))