### Download Data

In [None]:
# Get all the dataset files
!wget -q \
    https://api.datameka.com:8080/media/uploads/competition/a7eeb262-9c95-477e-913f-4b41c2ce1d90/label.csv \
    https://api.datameka.com:8080/media/uploads/competition/a7eeb262-9c95-477e-913f-4b41c2ce1d90/visibles.zip \
    https://api.datameka.com:8080/media/uploads/competition/a7eeb262-9c95-477e-913f-4b41c2ce1d90/masks.zip \
    https://api.datameka.com:8080/media/uploads/competition/a7eeb262-9c95-477e-913f-4b41c2ce1d90/infrareds.zip

# unzip them
!unzip ./visibles.zip -d ./
!unzip ./masks.zip -d ./
!unzip ./infrareds.zip -d ./

!rm visibles.zip masks.zip infrareds.zip

In [2]:
VISIBLE_FOLDER  = "./visibles/"
INFRARED_FOLDER = "./infrareds/"
MASK_FOLDER     = "./masks/"
LABEL_FILE      = "./label.csv"

In [3]:
!pip -q install segmentation-models-pytorch

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/106.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m102.4/106.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/58.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━

In [4]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, transformers
Successfully installed tokenizers-0.13.3 transformers-4.31.0


# Data Preparations

In [5]:
import random, os, cv2

import numpy as np
import pandas as pd

from glob import glob

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import segmentation_models_pytorch as smp
import albumentations as A

from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle

import warnings
warnings.filterwarnings("ignore")

In [6]:
def cv_equalize_hist(img):
  img = cv2.normalize(img, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8U)

  img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
  img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
  img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
  return img_output

def equalize_clahe(img, cl, gs):
  img = cv2.normalize(img, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8UC1)
  hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  h, s, v = hsv_img[:,:,0],  hsv_img[:,:,1], hsv_img[:,:,2]

  clahe = cv2.createCLAHE(clipLimit = cl, tileGridSize = gs)
  v = clahe.apply(v)
  hsv_img = np.dstack((h,s,v))
  rgb = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR)

  return rgb

In [7]:
def Augment(mode):

    if mode == "train":
        return A.Compose([A.RandomContrast( p=0.2),
                          A.RandomGamma(p=0.2),
                        #   A.RandomBrightness(p=0.2),
                          A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), # default imagenet mean & std.
                          A.HorizontalFlip(p=0.5),
                          A.VerticalFlip(p=0.5)],
                         additional_targets={'image2': 'image'}) # this is to augment both the normal and infrared sattellite images.

    else: # valid test
        return A.Compose([A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))],
                         additional_targets={'image2': 'image'})

class FOREST(Dataset):
    def __init__(self,
                 visible_folder,
                 infrared_folder,
                 mask_folder,
                 label_file,
                 mode = "train" # train | valid | test
                ):

        _label_df = pd.read_csv(label_file)
        self.label_df        = _label_df[_label_df["mode"] == mode]
        self.mode            = mode
        self.visible_folder  = visible_folder
        self.infrared_folder = infrared_folder
        self.mask_folder     = mask_folder
        self.augment         = Augment(mode)
        self.mask_dict       = {"plantation"             : 1,
                                "grassland shrubland"    : 2,
                                "smallholder agriculture": 3,
                                "other"                  : 4}

    def __len__(self):
        return len(self.label_df)

    def __getitem__(self, index):

        case_id, deforestation_type, lat, long, year, _ = self.label_df.iloc[index].to_list()

        # load image and mask
        visible  = cv2.imread(self.visible_folder  + str(case_id) + "/composite.png")
        infrared = cv2.imread(self.infrared_folder + str(case_id) + "/composite.png")
        mask     = cv2.imread(self.mask_folder     + str(case_id) + ".png", 0) if (self.mode != "test") else np.zeros(visible.shape[:2]) # dummy mask for test-set case.

        # convert the foreground region in the mask to the corressponding label integer
        label = self.mask_dict[deforestation_type]
        mask[mask == 1.] = label

        #augment mask and image
        visible, infrared, mask = self.augment(image  = visible,
                                               image2 = infrared,
                                               mask   = mask).values()

        # concat visible and infared and a single 5-channel image
        image = np.concatenate((equalize_clahe(visible, 2.0, (32, 32)), infrared), axis = -1)

        return torch.tensor(image), torch.tensor(mask), label, case_id

In [8]:
def show_image(image,
               mask   = None,
               labels = ["no deforestation",
                         "plantation",
                         "grassland shrubland",
                         "smallholder agriculture",
                         "other"],
               colors = np.array([(0.,0.,0.),
                                  (0.667,0.,0.),
                                  (0.,0.667,0.677),
                                  (0.,0.,0.667),
                                  (0.667, 0.667, 0.667)])):

    # copy to prevent from modifying the input image and mask
    image = np.copy(image)
    mask  = np.copy(mask) if mask is not None else mask

    # normalize to [0-1]
    image = (image - image.min())/(image.max() - image.min())
    # add good-looking color
    mask  = colors[mask] if mask is not None else mask

    plt.imshow(image, cmap='bone')
    if mask is not None:
        plt.imshow(mask, alpha=0.6)
        handles = [Rectangle((0,0),1,1, color=color) for color in colors]
        plt.legend(handles, labels)
    plt.axis('off')

    return None

In [9]:
from transformers import SegformerForSemanticSegmentation

pretrained_model_name = "nvidia/segformer-b5-finetuned-ade-640-640"
# pretrained_model = "nvidia/segformer-b5-finetuned-cityscapes-1024-1024"
model = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    num_labels=5,
    num_channels=6,
    ignore_mismatched_sizes=True
).cuda()

Downloading (…)lve/main/config.json:   0%|          | 0.00/6.89k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/339M [00:00<?, ?B/s]

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b5-finetuned-ade-640-640 and are newly initialized because the shapes did not match:
- segformer.encoder.patch_embeddings.0.proj.weight: found shape torch.Size([64, 3, 7, 7]) in the checkpoint and torch.Size([64, 6, 7, 7]) in the model instantiated
- decode_head.classifier.weight: found shape torch.Size([150, 768, 1, 1]) in the checkpoint and torch.Size([5, 768, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([5]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
def dice_loss(logits, true, eps=1e-7):
    """Computes the Sørensen–Dice loss.
    Note that PyTorch optimizers minimize a loss. In this
    case, we would like to maximize the dice loss so we
    return the negated dice loss.
    Args:
        true: a tensor of shape [B, H, W].
        logits: a tensor of shape [B, C, H, W]. Corresponds to
            the raw output or logits of the model.
        eps: added to the denominator for numerical stability.
    Returns:
        dice_loss: the Sørensen–Dice loss.
    """
    logits = logits.logits
    true = true.unsqueeze(1)

    # scale the logits to the size of the label
    logits = nn.functional.interpolate(
        logits,
        size=true.shape[-2:],
        mode="bilinear",
        align_corners=False,
    )

    num_classes = logits.shape[1]
    if num_classes == 1:
        true_1_hot = torch.eye(num_classes + 1)[true.squeeze(1)]
        true_1_hot = true_1_hot.permute(0, 3, 1, 2).float()
        true_1_hot_f = true_1_hot[:, 0:1, :, :]
        true_1_hot_s = true_1_hot[:, 1:2, :, :]
        true_1_hot = torch.cat([true_1_hot_s, true_1_hot_f], dim=1)
        pos_prob = torch.sigmoid(logits)
        neg_prob = 1 - pos_prob
        probas = torch.cat([pos_prob, neg_prob], dim=1)
    else:
        true_1_hot = torch.eye(num_classes, device="cuda")[true.squeeze(1)]
        true_1_hot = true_1_hot.permute(0, 3, 1, 2).float()
        probas = F.softmax(logits.float(), dim=1)
    true_1_hot = true_1_hot.type(logits.type())
    dims = (0,) + tuple(range(2, true.ndimension()))
    intersection = torch.sum(probas * true_1_hot, dims)
    cardinality = torch.sum(probas + true_1_hot, dims)
    dice_loss = (2. * intersection / (cardinality + eps)).mean()
    return (1 - dice_loss)

# hard dice score for vadiation set evaluation
def hard_dice(pred, mask, label):

    #pick the channel that coressponds to the true label
    pred = (torch.argmax(pred, dim = 1) == label).long().view(-1)
    mask = mask.view(-1)

    # compute hard dice score for the foreground region
    score = (torch.sum(pred * mask)*2)/ (torch.sum(pred) + torch.sum(mask))

    return np.array(score)

In [11]:
loss_fn = dice_loss
optimizer = optim.Adam(model.parameters(), lr=0.00006)

In [12]:
from tqdm import tqdm

def train(trainloader, validloader, model,
          n_epoch = 10):

    pbar = tqdm(range(n_epoch))
    for epoch in pbar:
        model.train()
        train_loss = train_epoch(trainloader, model)

        with torch.no_grad():
            valid_dice = evaluate_epoch(validloader, model)

        pbar.set_postfix_str(f"Loss: {round(train_loss, 4)} valid_dice: {round(valid_dice, 4)}")

    return model

def train_epoch(trainloader, model):

    losses = []
    for (inputs, targets, *_) in trainloader:
        # forward pass
        outputs = model(inputs.permute(0,-1,1,2).cuda()) # channel first
        targets = targets.long().cuda()

        # calculate loss
        loss = loss_fn(outputs, targets)

        # backward pass and update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(loss.item())

    return np.mean(losses)

def evaluate_epoch(validloader, model):

    scores = []
    for (inputs, targets, label, _) in validloader:
        outputs = model(inputs.permute(0,-1,1,2).cuda()).logits
        outputs = nn.functional.interpolate(
            outputs,
            size=[320, 320],
            mode="bilinear",
            align_corners=False,
        ).detach().cpu() #channel first
        targets = targets.long()

        #calculate dice
        score = hard_dice(outputs, targets, label)

        scores.append(score)

    return np.mean(scores)

In [13]:
train_dataset = FOREST(VISIBLE_FOLDER, INFRARED_FOLDER, MASK_FOLDER, LABEL_FILE,
                       mode = "train")
valid_dataset = FOREST(VISIBLE_FOLDER, INFRARED_FOLDER, MASK_FOLDER, LABEL_FILE,
                       mode = "valid")

train_loader = DataLoader(train_dataset,
                          batch_size  = 12,
                          num_workers = 16,
                          shuffle     = True,
                          pin_memory  = True)

valid_loader = DataLoader(train_dataset,
                          batch_size  = 1,
                          num_workers = 4,
                          shuffle     = False,
                          pin_memory  = False)

model = train(train_loader, valid_loader, model,
              n_epoch = 20)

100%|██████████| 20/20 [2:41:19<00:00, 483.96s/it, Loss: 0.5792 valid_dice: 0.3953999876976013]


In [None]:
# lets define mask to RLE conversion
def rle_encode(mask_image):
    pixels = mask_image.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] = runs[1::2] - runs[:-1:2]

    # to string format
    runs = ' '.join(str(x) for x in runs)

    return runs

def predict(model, loader):

    test_results = []
    for (inputs, _, label, image_id) in loader:

        # forward pass
        pred = model(inputs.permute(0,-1,1,2).cuda()).logits # channel first
        pred = nn.functional.interpolate(
            pred,
            size=[320, 320],
            mode="bilinear",
            align_corners=False,
        )
        # move back to cpu
        pred     = pred.detach().cpu()
        image_id = str(image_id[0].item())

        #pick the channel that coressponds to the true label
        pred = (torch.argmax(pred, dim = 1) == label).squeeze(0).long().numpy()

        #convert to rle
        pred_rle = rle_encode(pred)

        test_results.append({"image_id" : image_id,
                             "pred_rle" : pred_rle})

    return test_results

In [None]:
test_dataset = FOREST(VISIBLE_FOLDER, INFRARED_FOLDER, MASK_FOLDER, LABEL_FILE,
                      mode = "test")

test_loader = DataLoader(test_dataset,
                         batch_size  = 1,
                         num_workers = 14,
                         shuffle     = False,
                         pin_memory  = False)


test_results = predict(model, test_loader)

df_submission = pd.DataFrame.from_dict(test_results)

df_submission.to_csv("my_submission.csv", index = False)

In [None]:
from google.colab import files
files.download("my_submission.csv")

In [None]:
model.save_pretrained("./segformer")

In [None]:
!zip -r segformer.zip ./segformer

  adding: segformer/ (stored 0%)
  adding: segformer/pytorch_model.bin (deflated 7%)
  adding: segformer/config.json (deflated 57%)
