In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/carvana-image-masking-challenge/train_masks.zip
/kaggle/input/carvana-image-masking-challenge/29bb3ece3180_11.jpg
/kaggle/input/carvana-image-masking-challenge/train_masks.csv.zip
/kaggle/input/carvana-image-masking-challenge/train.zip
/kaggle/input/carvana-image-masking-challenge/metadata.csv.zip
/kaggle/input/carvana-image-masking-challenge/sample_submission.csv.zip
/kaggle/input/carvana-image-masking-challenge/test.zip
/kaggle/input/carvana-image-masking-challenge/test_hq.zip
/kaggle/input/carvana-image-masking-challenge/train_hq.zip


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# importing the zipfile module 
from zipfile import ZipFile 

with ZipFile("/kaggle/input/carvana-image-masking-challenge/train.zip", 'r') as f:  
    f.extractall( path="/kaggle/working/train_images/") 


In [4]:
with ZipFile("/kaggle/input/carvana-image-masking-challenge/train_masks.zip", 'r') as f:  
    f.extractall( path="/kaggle/working/") 

In [5]:
train_imgs = sorted(os.listdir("/kaggle/working/train_images/train"))

In [6]:
train_masks = sorted(os.listdir("/kaggle/working/train_masks"))

In [7]:
train_imgs[:16], train_masks[:16]

(['00087a6bd4dc_01.jpg',
  '00087a6bd4dc_02.jpg',
  '00087a6bd4dc_03.jpg',
  '00087a6bd4dc_04.jpg',
  '00087a6bd4dc_05.jpg',
  '00087a6bd4dc_06.jpg',
  '00087a6bd4dc_07.jpg',
  '00087a6bd4dc_08.jpg',
  '00087a6bd4dc_09.jpg',
  '00087a6bd4dc_10.jpg',
  '00087a6bd4dc_11.jpg',
  '00087a6bd4dc_12.jpg',
  '00087a6bd4dc_13.jpg',
  '00087a6bd4dc_14.jpg',
  '00087a6bd4dc_15.jpg',
  '00087a6bd4dc_16.jpg'],
 ['00087a6bd4dc_01_mask.gif',
  '00087a6bd4dc_02_mask.gif',
  '00087a6bd4dc_03_mask.gif',
  '00087a6bd4dc_04_mask.gif',
  '00087a6bd4dc_05_mask.gif',
  '00087a6bd4dc_06_mask.gif',
  '00087a6bd4dc_07_mask.gif',
  '00087a6bd4dc_08_mask.gif',
  '00087a6bd4dc_09_mask.gif',
  '00087a6bd4dc_10_mask.gif',
  '00087a6bd4dc_11_mask.gif',
  '00087a6bd4dc_12_mask.gif',
  '00087a6bd4dc_13_mask.gif',
  '00087a6bd4dc_14_mask.gif',
  '00087a6bd4dc_15_mask.gif',
  '00087a6bd4dc_16_mask.gif'])

In [8]:
# 5088 images, let us keep 80 (5x16) images as validation set

In [9]:
os.makedirs("/kaggle/working/val_images")
os.makedirs("/kaggle/working/val_masks")

In [10]:
import shutil

In [11]:
for idx, (image, mask) in enumerate(zip(train_imgs, train_masks)):
    if idx == 80:
        break
    
    img_path = os.path.join("/kaggle/working/train_images/train/", image)
    mask_path = os.path.join("/kaggle/working/train_masks/", mask)
    
    new_img_path = os.path.join("/kaggle/working/val_images/", image)
    new_mask_path = os.path.join("/kaggle/working/val_masks/", mask)

    shutil.move(img_path, new_img_path)
    shutil.move(mask_path, new_mask_path)

In [12]:
len(os.listdir("/kaggle/working/train_images/train")), len(os.listdir("/kaggle/working/train_masks"))

(5008, 5008)

In [13]:
len(os.listdir("/kaggle/working/val_images")), len(os.listdir("/kaggle/working/val_masks"))

(80, 80)

### Model

In [14]:
import torch
import torch.nn as nn
import torchvision.transforms.functional as TF


class DoubleConv(nn.Module):  # (conv 3x3, ReLU)x2 blocks
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.conv(x)


class UNET(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, out_channels_list=None):
        if out_channels_list is None:
            out_channels_list = [64, 128, 256, 512]

        super(UNET, self).__init__()

        self.downs = nn.ModuleList()  # storing list of modules
        self.ups = nn.ModuleList()
        self.pool = nn.MaxPool2d(2, 2)

        # UNET down sampling (4 blocks)
        for out_channel_size in out_channels_list:
            self.downs.append(DoubleConv(in_channels, out_channel_size))
            in_channels = out_channel_size  # update in_channels

        # Bottleneck layer (lowermost)
        self.bottleneck = DoubleConv(in_channels=out_channels_list[-1],
                                     out_channels=out_channels_list[-1]*2)

        # UNET up sampling (4 blocks)
        for out_channel_size in reversed(out_channels_list):
            self.ups.append(  # in_channels=out_channel_size*2 because of channels concat from skip connection
                nn.ConvTranspose2d(out_channel_size*2, out_channel_size, kernel_size=2, stride=2)
            )  # green arrows (up-conv 2x2)
            self.ups.append(DoubleConv(out_channel_size*2, out_channel_size))  # (conv 3x3, ReLU)x2

        # Final convolution (channel reduction conv)
        self.final_conv = nn.Conv2d(in_channels=out_channels_list[0],
                                    out_channels=out_channels,
                                    kernel_size=1)

    def forward(self, x):
        skip_connections = []

        # creating red arrows (max pool 2x2) [down sampling]
        for down in self.downs:
            x = down(x)
            skip_connections.append(x)
            x = self.pool(x)

        # bottleneck layer
        x = self.bottleneck(x)
        # we now need the values in reverse while going in up sample part
        skip_connections.reverse()

        # creating green arrows [up sampling]
        for i in range(0, len(self.ups), 2):  # step of 2 as we want only the conv
            x = self.ups[i](x)
            skip_connection = skip_connections[i//2]  # because our loop is running with step 2

            if x.shape != skip_connection.shape:  # if w, h not a multiple of 16, there is flooring of values
                x = TF.resize(x, size=skip_connection.shape[2:])  # resize just h and w of the feature map

            concatenation = torch.cat([skip_connection, x], dim=1)  # channel concatenation
            x = self.ups[i+1](concatenation)

        # channel reduction convolutions
        x = self.final_conv(x)
        return x


### Dataset

In [15]:
import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np


class CarvanaDataset(Dataset):
    """
    creating the carvana dataset for our model
    """
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, item):
        img_path = os.path.join(self.image_dir, self.images[item])
        mask_path = os.path.join(self.mask_dir, self.images[item].replace('.jpg', '_mask.gif'))
        # reading the image and mask
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
        mask[mask == 255.0] = 1.0  # setting all bright pixels to 1
        # 1.0, as a positive label, as later on we'll use sigmoid function

        if self.transform is not None:
            augmentations = self.transform(image=image, mask=mask)
            image = augmentations["image"]
            mask = augmentations["mask"]

        return image, mask

### Utils

In [16]:
os.makedirs("/kaggle/working/saved_images/")

In [18]:
import torchvision
from torch.utils.data import DataLoader


def save_checkpoint(state, epoch, filename="/kaggle/working/checkpoint.pth"):
    print("=> Saving checkpoint")
    filename = f"/kaggle/working/checkpoint{epoch}.pth"
    torch.save(state, filename)


def load_checkpoint(checkpoint, model):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])


def get_data_loaders(train_dir, train_maskdir, val_dir, val_maskdir, batch_size, train_transform, val_transform, num_workers=4, pin_memory=True):
    train_ds = CarvanaDataset(
        image_dir=train_dir,
        mask_dir=train_maskdir,
        transform=train_transform,
    )
    train_loader = DataLoader(
        train_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=True,
    )

    val_ds = CarvanaDataset(
        image_dir=val_dir,
        mask_dir=val_maskdir,
        transform=val_transform,
    )

    val_loader = DataLoader(
        val_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=False,
    )

    return train_loader, val_loader


def check_accuracy(loader, model, device="cuda"):
    num_correct = 0
    num_pixels = 0
    dice_score = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device).unsqueeze(1)
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
            num_correct += (preds == y).sum()
            num_pixels += torch.numel(preds)
            dice_score += (2 * (preds * y).sum()) / ((preds + y).sum() + 1e-8)
            # dice score = 2 * size of the intersection / sum of the sizes of the two sets
            # similarity between two samples (here, segmentation maps which are images)

    print(f"Got {num_correct}/{num_pixels} with acc {num_correct / num_pixels * 100:.2f}")
    print(f"Dice score: {dice_score / len(loader)}")
    model.train()


def save_predictions_as_imgs(loader, model, folder="/kaggle/working/saved_images/", device="cuda"):
    model.eval()  # set model to evaluation mode
    for idx, (x, y) in enumerate(loader):
        x = x.to(device=device)
        with torch.no_grad():
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
        torchvision.utils.save_image(
            preds, f"{folder}/pred_{idx}.png"
        )
        torchvision.utils.save_image(y.unsqueeze(1), f"{folder}{idx}.png")

    model.train()

### Training


In [20]:
# Imports
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim


# Hyperparameters
LEARNING_RATE = 1e-4
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
NUM_EPOCHS = 5
NUM_WORKERS = 2
IMAGE_HEIGHT = 320  # 1280 originally
IMAGE_WIDTH = 480  # 1918 originally
PIN_MEMORY = True
LOAD_MODEL = False
TRAIN_IMG_DIR = "/kaggle/working/train_images/train/"
TRAIN_MASK_DIR = "/kaggle/working/train_masks/"
VAL_IMG_DIR = "/kaggle/working/val_images"
VAL_MASK_DIR = "/kaggle/working/val_masks"


def train_loop(loader, model, optimizer, loss_fn, scaler):
    loop = tqdm(loader)

    for batch_idx, (X, y) in enumerate(loop):
        X = X.to(device=DEVICE)
        y = y.float().unsqueeze(1).to(device=DEVICE)  # .unsqueeze(1) to add as a channel dimension

        # forward pass (incorporating mixed precision training)
        with torch.cuda.amp.autocast():
            # mixed precision (float16 sometimes for quicker training, while maintaining accuracy)
            predictions = model(X)
            loss = loss_fn(predictions, y)

        # backward pass
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # update tqdm loop
        loop.set_postfix(loss=loss.item())


#####################
### Main Function ###
#####################

train_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Rotate(limit=30, p=1.0),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.1),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,  # pixel val between 0 to 1
        ),
        ToTensorV2(),
    ],
)

val_transforms = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ],
)

model = UNET(in_channels=3, out_channels=1).to(DEVICE)
loss_fn = nn.BCEWithLogitsLoss()  # similar to from_logits = True (tensorflow)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_loader, val_loader = get_data_loaders(
    TRAIN_IMG_DIR,
    TRAIN_MASK_DIR,
    VAL_IMG_DIR,
    VAL_MASK_DIR,
    BATCH_SIZE,
    train_transform,
    val_transforms,
    NUM_WORKERS,
    PIN_MEMORY,
)

if LOAD_MODEL:
    try:
        load_checkpoint(torch.load("/kaggle/working/checkpoint.pth"), model)
    except EOFError:  # end of file error - if no such file found, do nothing (pass)
        pass

# check_accuracy(val_loader, model, device=DEVICE)  # check accuracy after loading a checkpoint
scaler = torch.cuda.amp.GradScaler()

for epoch in range(NUM_EPOCHS):
    train_loop(train_loader, model, optimizer, loss_fn, scaler)

    # save model
    checkpoint = {
        "state_dict": model.state_dict(),
        "optimizer": optimizer.state_dict(),
    }
    save_checkpoint(checkpoint, epoch)

    # check accuracy
    check_accuracy(val_loader, model, device=DEVICE)

    # print some examples to a folder
    save_predictions_as_imgs(
        val_loader, model, folder="/kaggle/working/saved_images/", device=DEVICE
    )

100%|██████████| 313/313 [04:46<00:00,  1.09it/s, loss=0.147]


=> Saving checkpoint
Got 12098644/12288000 with acc 98.46
Dice score: 0.9642143249511719


100%|██████████| 313/313 [04:46<00:00,  1.09it/s, loss=0.0874]


=> Saving checkpoint
Got 12224047/12288000 with acc 99.48
Dice score: 0.9874655604362488


100%|██████████| 313/313 [04:46<00:00,  1.09it/s, loss=0.058] 


=> Saving checkpoint
Got 12215935/12288000 with acc 99.41
Dice score: 0.9859868288040161


100%|██████████| 313/313 [04:46<00:00,  1.09it/s, loss=0.0402]


=> Saving checkpoint
Got 12242185/12288000 with acc 99.63
Dice score: 0.9910537600517273


100%|██████████| 313/313 [04:46<00:00,  1.09it/s, loss=0.0302]


=> Saving checkpoint
Got 12174267/12288000 with acc 99.07
Dice score: 0.9781190752983093
