In [1]:
# imports
import torch.nn as nn
import torchvision.transforms.functional as TF

In [32]:
# Define model class

class Residual(nn.Module):
    def __init__(self, in_c, out_c):
        super(Residual, self).__init__()
        self.in_c = in_c
        self.out_c = out_c
        self.conv = nn.Sequential(
            nn.Conv2d(in_c, out_c, 1, 1, 0, bias=False),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_c, out_c, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_c, in_c, 3, 1, 1, bias=False),
            #
            # nn.Conv2d(in_c, out_c, 3, 1, 1, bias=False),
            # nn.BatchNorm2d(out_c),
            # nn.ReLU(inplace=True),
            # nn.Conv2d(out_c, out_c, 3, 1, 1, bias=False),
            # nn.BatchNorm2d(out_c),
            # nn.ReLU(inplace=True),
        )
    
    def forward(self, x):
        tmp = self.conv(x)
        x += tmp
        return nn.Conv2d(self.in_c, self.out_c, 1, 1, 0, bias=False, device='cuda:0')(x)
    
class Unet(nn.Module):
    def __init__ (
        self,
        in_c=3,
        out_c=1,
        features=None
    ):
        super(Unet, self).__init__()
        if features is None:
            features = [
                64,
                128,
                256,
                512
            ]
        self.up = nn.ModuleList()
        self.down = nn.ModuleList()
        self.pool = nn.MaxPool2d(2,2)
        for feature in features:
            self.down.append(Residual(in_c, feature))
            in_c = feature
        
        for feature in reversed(features):
            self.up.append(
                nn.ConvTranspose2d(
                    feature * 2, feature, kernel_size=2, stride=2
                )
            )
            self.up.append(Residual(feature * 2, feature))
        
        self.bottleneck = Residual(features[-1], features[-1] * 2)
        self.final_conv = nn.Conv2d(features[0], out_c, kernel_size=1)
        
    def forward(self, x):
        skip_connections = []

        for down in self.down:
            x = down(x)
            skip_connections.append(x)
            x = self.pool(x)

        x = self.bottleneck(x)
        skip_connections = skip_connections[::-1]

        for idx in range(0, len(self.up), 2):
            x = self.up[idx](x)
            skip_connection = skip_connections[idx//2]

            if x.shape != skip_connection.shape:
                x = TF.resize(x, size=skip_connection.shape[2:])

            concat_skip = torch.cat((skip_connection, x), dim=1)
            x = self.up[idx+1](concat_skip)

        return self.final_conv(x)

In [33]:
# Define Dataset class
import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np

class RoadDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.image_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index])
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
        mask[mask == 255.0] = 1.0
        if self.transform is not None:
            aug = self.transform(image=image, mask=mask)
            image = aug['image']
            mask = aug['mask']
        return image, mask

In [34]:
# Helper functions
import torchvision
from torch.utils.data import DataLoader

def save_checkpoint(state, filename):
    print(f"===> Saving checkpoint {filename}")
    torch.save(state, filename)

def get_loaders(
    train_dir,
    train_mask_dir,
    val_dir,
    val_mask_dir,
    batch_size,
    train_transform,
    val_transform,
    # num_workers=4,
    pin_memory=True,
):
    train_ds = RoadDataset(
        image_dir=train_dir,
        mask_dir=train_mask_dir,
        transform=train_transform
    )

    
    train_loader = DataLoader(
        train_ds, 
        batch_size=batch_size, 
        # num_workers=num_workers,
        pin_memory=pin_memory, 
        shuffle=True
    )
    
    val_ds = RoadDataset(
        image_dir=val_dir,
        mask_dir=val_mask_dir,
        transform=val_transform,
    )
    
    val_loader = DataLoader(
        val_ds,
        batch_size=batch_size,
        # num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=False,
    )
    
    return train_loader, val_loader


class DiceLoss(nn.Module):
    def __init__(self):
        super(DiceLoss, self).__init__()

    def forward(self, input, target):
        smooth = 1.
        iflat = input.contiguous().view(-1)
        tflat = target.contiguous().view(-1)
        intersection = (iflat * tflat).sum()

        return 1 - ((2. * intersection + smooth) /
                  (iflat.sum() + tflat.sum() + smooth))


def check_accuracy(loader, model, device="cuda"):
    num_correct = 0
    num_pixels = 0
    dice_score = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device).unsqueeze(1)
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
            num_correct += (preds == y).sum()
            num_pixels += torch.numel(preds)
            dice_score += (2 * (preds * y).sum()) / (
                (preds + y).sum() + 1e-8
            )

    print(
        f"Got {num_correct}/{num_pixels} with acc {num_correct/num_pixels*100:.2f}"
    )
    print(f"Dice score: {dice_score/len(loader)}")
    model.train()
    
def save_preds_as_imgs(
    loader, model, folder="saved_images/", device="cuda"
):
    model.eval()
    for idx, (x, y) in enumerate(loader):
        x = x.to(device=device)
        with torch.no_grad():
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
        torchvision.utils.save_image(
            preds, f"{folder}/pred_{idx}.png"
        )
        torchvision.utils.save_image(y.unsqueeze(1), f"{folder}{idx}.png")

    model.train()


In [37]:
import torch

import albumentations as albu
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim


# Hyperparameters etc.
LEARNING_RATE = 1e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 4
NUM_EPOCHS = 10
NUM_WORKERS = 4
IMAGE_HEIGHT = 256  # 1280 originally
IMAGE_WIDTH = 256  # 1918 originally
PIN_MEMORY = True
TRAIN_IMG_DIR = "input/tiff/train/"
TRAIN_MASK_DIR = "input/tiff/train_labels/"
VAL_IMG_DIR = "input/tiff/val/"
VAL_MASK_DIR = "input/tiff/val_labels/"

def train_fn(loader, model, optimizer, loss_fn, scaler):
    loop = tqdm(loader)

    for batch_idx, (data, targets) in enumerate(loop):
        data = data.to(device=DEVICE)
        targets = targets.float().to(device=DEVICE)

        # forward
        with torch.cuda.amp.autocast():
            predictions = model(data)
            loss = loss_fn(predictions, targets)

        # backward

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # update tqdm loop
        loop.set_postfix(loss=loss.item())


def main():
    train_transform = albu.Compose(
        [
            albu.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
            albu.Rotate(limit=35, p=1.0),
            albu.HorizontalFlip(p=0.5),
            albu.VerticalFlip(p=0.1),
            albu.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0),

            ToTensorV2()
        ],
    )

    val_transforms = albu.Compose(
        [
            albu.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
            albu.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0),
            ToTensorV2()
        ],
    )

    model = Unet(in_c=3, out_c=1).to(DEVICE)
    loss_fn = DiceLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    train_loader, val_loader = get_loaders(
        TRAIN_IMG_DIR,
        TRAIN_MASK_DIR,
        VAL_IMG_DIR,
        VAL_MASK_DIR,
        BATCH_SIZE,
        train_transform,
        val_transforms,
        # NUM_WORKERS,
        PIN_MEMORY,
    )



    # check_accuracy(val_loader, model, device=DEVICE)
    scaler = torch.cuda.amp.GradScaler()

    for epoch in range(NUM_EPOCHS):
        train_fn(train_loader, model, optimizer, loss_fn, scaler)

        # save model
        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer":optimizer.state_dict(),
        }
        save_checkpoint(checkpoint, f"checkpoints/{epoch}_.pth")

        # check accuracy
        check_accuracy(val_loader, model, device=DEVICE)

        # print some examples to a folder
        save_preds_as_imgs(
            val_loader, model, folder="saved_images/", device=DEVICE
        )


In [38]:
main()

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  0%|          | 0/277 [00:00<?, ?it/s]


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.HalfTensor [4, 128, 256, 256]], which is output 0 of AddBackward0, is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!