In [2]:
# Importing libraries
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
import torchvision
import torchvision.transforms.functional as tf
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import os
from PIL import Image

In [4]:
# Hyperparameters etc.
LEARNING_RATE = 0.01
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16  # batch-size
NUM_EPOCHS = 3  # no of epochs
NUM_WORKERS = 0
IMAGE_HEIGHT = 120  # 1280 originally
IMAGE_WIDTH = 200  # 1918 originally
PIN_MEMORY = True  # data preloaded into pinned memory
LOAD_MODEL = False

In [5]:
# Directory path
TRAIN_IMG_DIR = "trainf/images"
TRAIN_MASK_DIR = "trainf/masks"
VAL_IMG_DIR = "valf/images"
VAL_MASK_DIR = "valf/masks"

In [6]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.conv = nn.Sequential(
            
            # consists of Conv layer, batch norm, and relu act
            nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )
    def forward(self,x):
        return self.conv(x)

In [7]:
class UNET(nn.Module):
    
    def __init__(self, in_channels=3, out_channels=1, features=[64,128,256,512]):
        super(UNET, self).__init__()
        
        self.ups = nn.ModuleList()
        self.downs = nn.ModuleList()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Downsampling part
        for feature in features:
            self.downs.append(DoubleConv(in_channels, feature))
            in_channels = feature
            
        # Upsampling part
        for feature in features[::-1]:
            self.ups.append(nn.ConvTranspose2d(
                feature*2, feature, kernel_size=2, stride=2,)
            )
            self.ups.append(DoubleConv(feature*2, feature))
        self.bottleneck = DoubleConv(features[-1],features[-1]*2)
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)
            
    def forward(self, x):
        skip_connections = []

        # Downward part
        for down in self.downs:
            x=down(x)
            skip_connections.append(x)
            # Maxpooling
            x=self.pool(x)

        # Bottommost layer
        x= self.bottleneck(x)
        skip_connections = skip_connections[::-1]

        for idx in range(0,len(self.ups),2):
            x=self.ups[idx](x)
            skip_connection = skip_connections[idx//2]

            # Checks if the shape of x and skip conn are equal
            if x.shape != skip_connection.shape:
                x=tf.resize(x, size=skip_connection.shape[2:])
            
            # concat skip connections
            concat_skip = torch.cat((skip_connection,x),dim=1)
            x=self.ups[idx+1](concat_skip)

        return self.final_conv(x)

In [8]:
def test():
    x=torch.randn((3, 1, 160, 160))
    model = UNET(in_channels=1, out_channels=1)
    preds = model(x)
    #print(preds)
    assert preds.shape == x.shape
            
test()       

In [9]:
class CarvanaDataset(Dataset):
    
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        img_path = os.path.join(self.image_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index].replace(".jpg", "_mask.gif"))
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
        mask[mask == 255.0] = 1.0

        if self.transform is not None:
            augmentations = self.transform(image=image, mask=mask)
            image = augmentations["image"]
            mask = augmentations["mask"]

        return image, mask

In [10]:
def get_loaders( train_dir, train_maskdir, val_dir, val_maskdir, batch_size, train_transform,
                   val_transform, num_workers=0, pin_memory=True ):
    
    train_loader = DataLoader(
        CarvanaDataset(image_dir=train_dir, mask_dir=train_maskdir, transform=train_transform),
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=True,
    )

    val_loader = DataLoader(
        CarvanaDataset(image_dir=val_dir, mask_dir=val_maskdir, transform=val_transform),
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=False,
    )

    return train_loader, val_loader

In [11]:
def check_accuracy(loader, model, device="cuda"):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device).unsqueeze(1)
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
            correct += (preds == y).sum()
            total += torch.numel(preds)
    accuracy = correct/total * 100
    print(f"Got {correct}/{total} with accuracy of {accuracy:.2f}")
    model.train()

In [12]:
def save_pred_img(loader, model, folder="saved_images/", device="cuda"):
    
    model.eval()
    for idx, (x, y) in enumerate(loader):
        x = x.to(device=device)
        with torch.no_grad():
            preds = (torch.sigmoid(model(x)) > 0.5).float()
        torchvision.utils.save_image(preds, f"{folder}/pred_mask_{idx}.png")
        torchvision.utils.save_image(y.unsqueeze(1), f"{folder}{idx}.png")

    model.train()

In [18]:
def train(loader, model, optimizer, loss_fn):
    loop= tqdm(loader)
    for batch_idx,(data, targets) in enumerate(loop):
        data = data.to(device=DEVICE)
        targets = targets.float().unsqueeze(1).to(device=DEVICE)

        # forward prop
        predictions = model(data)
        loss = loss_fn(predictions, targets)

        # back prop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # loss
        loop.set_postfix(loss=loss.item())

In [19]:
def main():
    train_transform = A.Compose(
        [
            A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
            A.Rotate(limit=35, p=1.0),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.1),
            A.Normalize(max_pixel_value=255.0),
            ToTensorV2(),
        ],
        additional_targets={'mask': 'mask'}
    )

    val_transforms = A.Compose(
        [
            A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
            A.Normalize(max_pixel_value=255.0),
            ToTensorV2(),
        ],
        additional_targets={'mask': 'mask'}
    )

    model = UNET(in_channels=3, out_channels=1).to(DEVICE)
    loss = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    train_loader, val_loader = get_loaders(
        TRAIN_IMG_DIR,
        TRAIN_MASK_DIR,
        VAL_IMG_DIR,
        VAL_MASK_DIR,
        BATCH_SIZE,
        train_transform,
        val_transforms,
        NUM_WORKERS,
        PIN_MEMORY,
    )

    check_accuracy(val_loader, model, device=DEVICE)

    for epoch in range(NUM_EPOCHS):
        train(train_loader, model, optimizer, loss)

        # check accuracy
        check_accuracy(val_loader, model, device=DEVICE)

        # save the predicted masks
        save_pred_img (val_loader, model, folder="saved_images", device=DEVICE)

In [20]:
main()

Got 14496097/19200000 with accuracy of 75.50


100%|████████████████████████████████████████████████████████████████████| 200/200 [02:26<00:00,  1.37it/s, loss=0.371]


Got 15335176/19200000 with accuracy of 79.87


100%|████████████████████████████████████████████████████████████████████| 200/200 [02:33<00:00,  1.30it/s, loss=0.451]


Got 16039060/19200000 with accuracy of 83.54


100%|████████████████████████████████████████████████████████████████████| 200/200 [02:34<00:00,  1.30it/s, loss=0.395]


Got 16650832/19200000 with accuracy of 86.72


In [None]:
torch.cuda.empty_cache()