In [1]:
from zipfile import ZipFile

train_path = "/kaggle/input/carvana-image-masking-challenge/train.zip"
train_masks_path = "/kaggle/input/carvana-image-masking-challenge/train_masks.zip"

with ZipFile(train_path, 'r') as z:
    z.extractall(path='./')
    print("Done")

with ZipFile(train_masks_path, 'r') as z:
    z.extractall(path='./')
    print("Done")

Done
Done


In [14]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import albumentations as A
from albumentations.pytorch import ToTensorV2

import os
import numpy as np
from PIL import Image
from tqdm import tqdm
from sklearn import metrics
import matplotlib.pyplot as plt

In [15]:
class doubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(doubleConv, self).__init__()
        self.same_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.same_conv(x)

In [16]:
class Unet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, channels=[64, 128, 256, 512]):
        super(Unet, self).__init__()
        self.down_same_convs = nn.ModuleList()
        self.up_same_convs = nn.ModuleList()

        self.down_sample = nn.MaxPool2d(kernel_size=2, stride=2)
        self.up_samples = nn.ModuleList()
        

        for channel in channels:
            down_same_conv = doubleConv(in_channels, channel)
            self.down_same_convs.append(down_same_conv)
            in_channels = channel

        self.bottleNeck = doubleConv(channels[-1], channels[-1]*2)

        channels = list(reversed(channels))
        for channel in channels:
            up_sample = nn.ConvTranspose2d(in_channels=channel*2, out_channels=channel, kernel_size=2, stride=2, padding=0)
            up_same_conv = doubleConv(channel*2, channel)

            self.up_samples.append(up_sample)
            self.up_same_convs.append(up_same_conv)
        
        self.final_same_conv = nn.Conv2d(in_channels=channels[-1], out_channels=out_channels, kernel_size=1, stride=1, padding=0)
    
    def forward(self, x):  
        skip_connections = []
        for down_same_conv in self.down_same_convs:
            x = down_same_conv(x)
            skip_connections.append(x)
            x = self.down_sample(x)

        x = self.bottleNeck(x)

        skip_connections = list(reversed(skip_connections))
        for i in range(4):
            x = self.up_samples[i](x)
            x = torch.cat((skip_connections[i], x), dim=1)
            x = self.up_same_convs[i](x)

        x = self.final_same_conv(x)
        return x

In [17]:
x = torch.randn((16, 3, 160, 240))
unet = Unet(in_channels=3, out_channels=1)
unet(x).shape

torch.Size([16, 1, 160, 240])

In [18]:
class CarvanaDataset:
    def __init__(self, img_paths, transform=None):
        self.img_paths = img_paths
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, index):
        img_path = self.img_paths[index]
        mask_path = self.img_paths[index].replace("train", "train_masks").replace(".jpg", "_mask.gif")
        img = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
        mask[mask == 255.0] = 1.0

        if self.transform is not None:
            augmentations = self.transform(image=img, mask=mask)
            img = augmentations["image"]
            mask = augmentations["mask"]

        return img, mask

In [19]:
IMAGE_HEIGHT = 160
IMAGE_WIDTH = 240
BATCH_SIZE = 16
NUM_WORKERS = 0
PIN_MEMORY = False

train_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Rotate(limit=35, p=1.0),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.1),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ],
)

val_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ],
)

train_img_paths = [os.path.join("./train", img_name) for img_name in os.listdir("./train")]
train_dataset = CarvanaDataset(
    train_img_paths,
    transform=train_transform,
)

train_data_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    shuffle=True,
)

In [20]:
def save_checkpoint(model, optimizer, filename="model.pth.tar"):
    print("=>Saving Checkpoint...")
    checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
    torch.save(checkpoint, filename)

def load_checkpoint(checkpoint_path, model, optimizer):
    print("=>Loading Checkpoint...")
    checkpoint = torch.load(checkpoint_path, map_location=torch.device(DEVICE))
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])

In [21]:
def evaluate(loader, model):
    model.eval()
    predictions = []
    targets = []

    for idx, (imgs, masks) in tqdm(enumerate(loader), total=len(loader), leave=True):
        imgs= imgs.to(DEVICE)
        masks = masks.to(DEVICE).unsqueeze(1)
        
        with torch.no_grad():
            outputs = torch.sigmoid(model(imgs))
            outputs = (outputs > 0.5)
            
        predictions.append(outputs)
        targets.append(masks)
    
    predictions = torch.cat(predictions, dim=0).detach().cpu().numpy()
    targets = torch.cat(targets, dim=0).detach().cpu().numpy()
    
    accuracy = metrics.accuracy_score(targets.reshape(-1), predictions.reshape(-1))
    dice_score = (2 * (predictions * targets).sum()) / ((predictions + targets).sum() + 1e-8)
    return accuracy, dice_score

def save_predictions_as_imgs(loader, model, epoch, folder="saved_imgs/"):
    model.eval()    
    for idx, (imgs, masks) in enumerate(loader):
        imgs = imgs.to(device=DEVICE)
        masks = masks.to(device=DEVICE)
        
        with torch.no_grad():
            outputs = torch.sigmoid(model(imgs))
            outputs = (outputs > 0.5).type(torch.float)
        torchvision.utils.save_image(masks.unsqueeze(1), f"{folder}real_{epoch}.png")
        torchvision.utils.save_image(outputs, f"{folder}/fake_{epoch}.png")
        break

In [22]:
def train(train_data_loader, model, criterion, optimizer, scaler, epoch):
    model.train()
    losses = []

    train_progress_bar = tqdm(enumerate(train_data_loader), total=len(train_data_loader), leave=True)
    for batch_idx, (imgs, masks) in train_progress_bar:
        imgs = imgs.to(device=DEVICE)
        masks = masks.unsqueeze(1).to(device=DEVICE)
        
        with torch.cuda.amp.autocast():
            outputs = model(imgs)
            loss = criterion(outputs, masks)
        
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_progress_bar.set_description(f"Epoch [{epoch}/{epochs-1}]")
        train_progress_bar.set_postfix(train_loss=loss.item())
        losses.append(loss.item())
        
    train_progress_bar.close()
    return sum(losses) / len(losses)

In [24]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CHECKPOINT_FILE = "unet.pth.tar"
LOAD_MODEL = False
learning_rate = 3e-4
epochs = 3

model = Unet(in_channels=3, out_channels=1).to(DEVICE)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scaler = torch.cuda.amp.GradScaler()

if LOAD_MODEL: 
    load_checkpoint(CHECKPOINT_FILE, model, optimizer)

losses = []
for epoch in range(epochs):
    loss = train(train_data_loader, model, criterion, optimizer, scaler, epoch)
    accuracy, dice_score = evaluate(train_data_loader, model)

    save_checkpoint(model, optimizer, filename=CHECKPOINT_FILE)
    save_predictions_as_imgs(train_data_loader, model, epoch)
    print(f"Epoch: {epoch}, loss: {loss}, accuracy: {accuracy}, dice_score: {dice_score}")
    losses.append(loss)

Epoch [0/2]: 100%|██████████| 318/318 [05:08<00:00,  1.03it/s, train_loss=0.0638]
100%|██████████| 318/318 [03:48<00:00,  1.39it/s]


=>Saving Checkpoint...
Epoch: 0, loss: 0.14234318689636463, accuracy: 0.9879480210790095, dice_score: 0.9716116068849536


Epoch [1/2]: 100%|██████████| 318/318 [05:09<00:00,  1.03it/s, train_loss=0.0313]
100%|██████████| 318/318 [03:49<00:00,  1.39it/s]


=>Saving Checkpoint...
Epoch: 1, loss: 0.045289488175427016, accuracy: 0.9925582610636138, dice_score: 0.9823999350874817


Epoch [2/2]: 100%|██████████| 318/318 [05:09<00:00,  1.03it/s, train_loss=0.0242]
100%|██████████| 318/318 [03:49<00:00,  1.39it/s]


=>Saving Checkpoint...
Epoch: 2, loss: 0.029736392299568502, accuracy: 0.9928924522159984, dice_score: 0.9832002340281316


In [25]:
import shutil
shutil.make_archive('./saved_imgs', 'zip', "saved_imgs")

'/kaggle/working/saved_imgs.zip'