In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import cv2
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch import optim

torch.backends.cudnn.benchmark = True
scaler = torch.cuda.amp.GradScaler()

In [None]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using {device}")

Using mps


In [None]:
run_test_cases = False # Set to True if debugging is required
cloud_labels = ["Flower", "Gravel", "Fish", "Sugar"] # All possible labels for the clouds

# Original Image resolutions
in_res_y = 1400
in_res_x = 2100

# New Image resolutions
new_res_y = 512
new_res_x = 768

# data directories
test_dir = "./test_images"
train_dir = "./train_images"

# Training params
num_filters = 24 # Number of filters in first conv layer  
num_train_images = 128
num_test_images = 32

batch_size = 8
num_epochs = 40
lr = 1e-4

In [4]:
image_names = os.listdir(train_dir)

train_images = image_names[:num_train_images]
test_images = image_names[num_train_images:num_train_images+num_test_images]
print(test_images)

df = pd.read_csv('train.csv')
df[['Image', 'Label']] = df['Image_Label'].str.split('_', expand=True)

if run_test_cases:
    print(df[['Image', 'Label', 'EncodedPixels']].head(8))
    print()
    print(df['Image'].unique()[:10])

['af4d36b.jpg', 'ebe596d.jpg', '423c22d.jpg', '61259d9.jpg', 'bacd6fe.jpg', '1230c60.jpg', '4b8a262.jpg', '3614409.jpg', '3361513.jpg', '7473a71.jpg', 'f431307.jpg', '0e42958.jpg', '4e5b275.jpg', 'a1d13fa.jpg', '57559fb.jpg', 'df426c2.jpg', '6e1e7cb.jpg', 'a00ab01.jpg', '5e70a5b.jpg', '6d9de9e.jpg', 'd836ac5.jpg', '19807b1.jpg', '5683db4.jpg', '40dd239.jpg', '87a1831.jpg', 'fa12d07.jpg', '741dcf3.jpg', 'e3b3009.jpg', '2d2bd73.jpg', 'd2eb9fe.jpg', 'de2c9bb.jpg', 'cea2726.jpg']


In [5]:
# Get labels and rle from image name
def get_labels_rle(image_name: str, df) -> list:
    rles = df[df['Image'] == image_name]['EncodedPixels'].to_list()
    labels = df[df['Image'] == image_name]['Label'].to_list()
    return rles, labels

# Debugging
if run_test_cases:

    # Get Files
    train_images = os.listdir(train_dir)[:2]
    print(f"Train images: {train_images}")

    for image in train_images:
        rles, labels = get_labels_rle(f"{image}", df)
        for rle, label in zip(rles, labels):
            print(f"Label: {label} \n rle: {rle} \n")

In [6]:
# Convert rle mask encoding into 2D arrays
def rle_to_array(rle_list: list) -> np.array:

    # Create empty array for
    array = np.zeros(in_res_y * in_res_x)

    # Skip if cloud formation is not on picture
    if not rle_list or pd.isna(rle_list):
        mask = array.reshape((in_res_x, in_res_y), order="F").T
        return mask
    
    rle_array = np.array(list(map(int, rle_list.split())), dtype=int)
    start_pixels = rle_array[::2] - 1 # Offset because pixel 1 is arr position 0
    num_pixels = rle_array[1::2]

    # Create 2D mask
    for start_pixel, num_pixels in zip(start_pixels, num_pixels): # Format is [start_idx_0, num_pixels_0 ...]
        array[start_pixel:start_pixel+num_pixels] = 1.0
    
    # Reshape
    mask = array.reshape((in_res_x, in_res_y), order="A").T # 2D array of [Height, Width]

    return mask

# For debugging
if run_test_cases:

    # Get Files
    train_images = os.listdir(train_dir)[:2]
    print(f"Train images: {train_images}")

    # Plot files
    for image_name in train_images:
        img = cv2.imread(f"{train_dir}/{image_name}")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0

        rles, labels = get_labels_rle(image_name, df)

        # Raw Image
        plt.imshow(img)
        plt.show()

        for rle, label in zip(rles, labels):

            # Masked Image
            mask = rle_to_array(rle)
            print(np.unique(mask))
            print(f"titel: {label}")
            plt.imshow(mask, cmap="grey", vmin=0.0, vmax=1.0)
            #plt.imshow(mask[:, :, None].repeat(3, axis=-1)*img, cmap="grey", vmin=0.0, vmax=1.0)
            plt.show()
        print("---------------------------------")

In [7]:
def dice_coef(preds, target, eps=1e-6):
    # [B, 4, H, W]

    preds = torch.sigmoid(preds)
    overlap = (preds * target).sum((1,2,3))

    dice = (2. * overlap + eps) / (preds.sum((1,2,3)) + target.sum((1,2,3)) + eps)

    return dice.mean()

In [None]:
def dice_loss(preds, target, eps=1e-6):
    return 1 - dice_coef(preds, target, eps=eps)

bce_loss = nn.BCEWithLogitsLoss()

In [None]:
def loss_fn(preds, target, eps=1e-6):
    return 0.5 * dice_loss(preds, target, eps) + 0.5 * bce_loss(preds, target)

In [None]:
# Images: [3, H, W]
# Masks : [4, H, W] (4 cloud types)
class ImageDataset(Dataset):
    def __init__(self, data_frame, img_dir, image_names):
        self.img_dir = img_dir
        self.image_names = image_names
        self.data_frame = data_frame

    def __len__(self):
        return len(self.image_names)
    
    def __getitem__(self, idx): 
        image_name = self.image_names[idx]

        # Read in image files
        image = cv2.imread(f"{self.img_dir}/{image_name}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # Downsample
        image = cv2.resize(image, (new_res_x, new_res_y), interpolation=cv2.INTER_AREA)
        image = (image / 255.0).astype(np.float32)
        image = torch.from_numpy(image).float()

        rles, _ = get_labels_rle(image_name, self.data_frame)

        # Create mask
        mask = torch.zeros((4, in_res_y, in_res_x), dtype=torch.float32) # [4, H, W]

        for i, rle in enumerate(rles):
            single_mask = rle_to_array(rle)
            single_mask = torch.from_numpy(single_mask).float()
            mask[i, :, :] = single_mask

        # Downsample mask
        mask = mask.unsqueeze(0)
        mask = F.interpolate(mask.float(), size=(new_res_y, new_res_x), mode='nearest').long()
        mask = mask.squeeze(0)

        # [H, W, C] -> [C, H, W]
        image = image.transpose(0, 2).transpose(1, 2) # [C, H, W]

        return image, mask

In [10]:
class ConvolutionBlock(nn.Module):
    def __init__(self, in_channels, out_channels, conv_kernel_size, padding):
        super().__init__()

        self.conv_block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, conv_kernel_size, padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, conv_kernel_size, padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
            )

    def forward(self, x):
        output = self.conv_block(x)
        return output # [batch, out_channels, H_out, W_out]


In [11]:
class PoolBlock(nn.Module):
    def __init__(self, downsample):
        super().__init__()

        self.pool = nn.MaxPool2d(downsample)

    def forward(self, x):
        output = self.pool(x)
        return output

In [12]:
class UpSampleBlock(nn.Module):
    def __init__(self, channels, upsample):
        super().__init__()

        self.up_sample_block = nn.Sequential(
            nn.Upsample(scale_factor=upsample, mode='bilinear', align_corners=False)
            #nn.Conv2d(channels, channels // upsample, kernel_size=1)
        )

    def forward(self, x):
        upsample_block = self.up_sample_block(x)
        return upsample_block

In [13]:
class Network(nn.Module):
    def __init__(self, filters):
        super().__init__()

        # in_channels, out_channels, conv_kernel_size, padding, pool_kernel_size
        self.encoder_layer_0 = ConvolutionBlock(3, filters, 3, 1)
        self.encoder_layer_1 = ConvolutionBlock(filters, 2*filters, 3, 1)

        self.pool_block_0 = PoolBlock(downsample=2)
        self.pool_block_1 = PoolBlock(downsample=2)

        self.bottle_neck = nn.Conv2d(2*filters, 4*filters, 3, padding=1)

        # in_channels, out_channels, conv_kernel_size, stride
        self.decoder_layer_1 = ConvolutionBlock(4*filters, 2*filters, 3, 1)
        self.decoder_layer_0 = ConvolutionBlock(2*filters, filters, 3, 1)

        self.up_sample_block1 = UpSampleBlock(4 * filters, upsample=2)
        self.up_sample_block0 = UpSampleBlock(2 * filters, upsample=2)

        self.output_layer = nn.Conv2d(filters, 4, 1)

    def forward(self, x): # [Batch, Color, Height, Width]
        
        enc0 = self.encoder_layer_0(x) # [B, num_filters, H, W]
        pool0 = self.pool_block_0(enc0) # [B, num_filters, H / 2, W / 2]

        enc1 = self.encoder_layer_1(pool0) # [B, 2 * num_filters, H / 2, W / 2]
        pool1 = self.pool_block_1(enc1) # [B, 2 * num_filters, H / 4, W / 4]

        bottle_neck = self.bottle_neck(pool1) # [B, 4 * num_filters, H / 4, W / 4]

        up1 = self.up_sample_block1(bottle_neck) # [B, 2 * num_filters, H / 2, W / 2]
        #concat1 = torch.cat([up1, enc1], dim=1) # [B, 4 * num_filters, H / 2, W / 2]
        dec1 = self.decoder_layer_1(up1) # [B, 2 * num_filters, H / 2, W / 2]

        up0 = self.up_sample_block0(dec1) # [B, num_filters, H, W]
        #concat0 = torch.cat([up0, enc0], dim=1) # [B, 2 * num_filters, H, W]
        dec0 = self.decoder_layer_0(up0) # [B, num_filters, H, W]

        logits = self.output_layer(dec0) # [B, 4, H, W]

        return logits

In [None]:
# Datasets and DataLoader
train_dataset = ImageDataset(df, train_dir, train_images)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=8)

test_dataset = ImageDataset(df, train_dir, test_images)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=8)

In [None]:
model = Network(num_filters).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:
# Training loop
for epoch in range(num_epochs):

    # Training
    model.train()
    train_loss = 0.0
    
    for image, mask in train_dataloader:
        image, mask = image.to(device), mask.to(device)

        with torch.cuda.amp.autocast():
            preds = model(image)
            loss = loss_fn(preds, mask)

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
    train_loss /= len(train_dataloader)

    # Validation
    model.eval()
    val_loss = 0.0
    dice = 0.0

    with torch.no_grad():
        for image, mask in test_dataloader:
            image, mask = image.to(device), mask.to(device)

            preds = model(image)
            loss = loss_fn(preds, mask)

            val_loss += loss.item()

            hard_preds = torch.where(preds > 0.5, 1.0, 0.0)
            dice += dice_coef(hard_preds, mask)
        val_loss /= len(test_dataloader)
        dice /= len(test_dataloader)

    # Print metrics
    print(f"Epoch: {epoch}")
    print(f"Train loss: {train_loss:.4f}")
    print(f"Val loss: {val_loss:.4f}")
    print(f"Dice coefficient: {dice:.4f}")
    print()

Epoch: 0
Train loss: 0.7668
Val loss: 0.7789
Dice coefficient: 0.2176

Epoch: 1
Train loss: 0.7537
Val loss: 0.7787
Dice coefficient: 0.2234

Epoch: 2
Train loss: 0.7500
Val loss: 0.7606
Dice coefficient: 0.2284



KeyboardInterrupt: 