<a href="https://colab.research.google.com/github/R12942159/DeepLearning/blob/main/DLCV_hw1_p3_VGG16FCN32s.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch


# Get cuda from GPU device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using: {device}")

Using: cpu


In [3]:
import os


train_paths = '/content/drive/MyDrive/NTU_DLCV/p3_data/train'
img_paths_train = [os.path.join(train_paths, i) for i in os.listdir(train_paths) if i.endswith('.jpg')]
mask_paths_train = [os.path.join(train_paths, i) for i in os.listdir(train_paths) if i.endswith('.png')]

In [4]:
import os


val_paths = '/content/drive/MyDrive/NTU_DLCV/p3_data/validation'
img_paths_val = [os.path.join(val_paths, i) for i in os.listdir(val_paths) if i.endswith('.jpg')]
mask_paths_val = [os.path.join(val_paths, i) for i in os.listdir(val_paths) if i.endswith('.png')]

#### Data Processing

In [20]:
import torch
import numpy as np
from PIL import Image
from copy import deepcopy
from torchvision.transforms.functional import hflip, vflip


class LandDataset(torch.utils.data.Dataset):
    def __init__(self, img_paths, mask_paths, mode, augment=False):
        self.img_paths = img_paths
        self.mask_paths = mask_paths
        self.augment = augment

        # random Data Augmentation
        def original(x): return x
        self.augment = [original, hflip, vflip]
        # check the img and mask len
        assert len(self.img_paths) == len(self.mask_paths)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        # get img path
        img_path = self.img_paths[idx]
        # Read img
        img = Image.open(img_path).convert('RGB')
        # Normalize Image
        img = np.array(img)/255.0 # 0~255 -> 0~1

        if self.mode != 'test':
            # get mask path
            mask_path = self.mask_paths[idx]
            mask = Image.open(mask_path).convert('RGB')
            mask = np.array(mask)
            # Binarize mask from [0~255] to (0 or 1)
            mask = np.where(mask<127, 0, 1).astype(np.int16)
            # squeeze [a,b,c] into [x,y] with 7 category(6 classes + 1 background)
            mask = 100 * mask[:,:,0] + 10 * mask[:,:,1] + 1 * mask[:,:,2]
            dp_mask = deepcopy(mask) # Only perform numerical conversion on the most original data
            mask[dp_mask == 11] = 0  # Urban(0,255,255)
            mask[dp_mask == 110] = 1  # Agriculture(255,255,0)
            mask[dp_mask == 101] = 2  # Rangeland(255,0,255)
            mask[dp_mask == 10] = 3  # Forest(0,255,0)
            mask[dp_mask == 1] = 4  # Water(0,0,255)
            mask[dp_mask == 111] = 5  # Barren(255,255,255)
            mask[dp_mask == 0] = 6  # Unkonown(0,0,0)
            mask = torch.tensor(mask)

            if self.augmentation:
                augmentor = self.augmentor[random.randint(0, 2)]
                img = augmentor(img)
                mask = augmentor(mask)

            # img's channel last to first: (H, W, C) -> (C, H, W)
            img = torch.tensor(img, dtype=torch.float).permute(2, 0, 1)
            mask = torch.tensor(mask, dtype=torch.long)

            return img, mask

In [21]:
# IMG_SIZE : 512*512
BATCH_SIZE = 8

train_ds = LandDataset(img_paths_train, mask_paths_train, 'train', augment=True)
val_ds = LandDataset(img_paths_val, mask_paths_val, 'val', augment=False)

# num_workers > 0: accelerate loading data by muli-process
train_loader = torch.utils.data.DataLoader(train_ds, BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_ds, BATCH_SIZE, shuffle=False, num_workers=2)

In [22]:
import torch
from torch import nn
import torchvision
from torchvision import models
from torchvision.models import VGG16_Weights, vgg16


features, classifier = list(vgg16(weights=VGG16_Weights.DEFAULT).features.children()), list(vgg16(weights=VGG16_Weights.DEFAULT).classifier.children())

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:07<00:00, 72.6MB/s]


In [24]:
features[0].padding = (100, 100)

for f in features:
     if 'MaxPool' in f.__class__.__name__:
           f.ceil_mode = True
     elif 'ReLU' in f.__class__.__name__:
           f.inplace = True

In [34]:
# https://github.com/zijundeng/pytorch-semantic-segmentation/blob/master/models/fcn32s.py#L17C4-L17C4
from torch import nn
import torchvision
from torchvision import models
from torchvision.models import VGG16_Weights, vgg16


class VGG16FCN32s(nn.Module):
    def __init__(self, n_classes=7) -> None:
        super().__init__

        features, classifier = list(vgg16(weights=VGG16_Weights.DEFAULT).features.children()), list(vgg16(weights=VGG16_Weights.DEFAULT).classifier.children())
        # Able to capture information near the edge of the image and improve segmentation accuracy
        features[0].padding = (100, 100)

        self.bottleneck = nn.Sequential(*features)

        # make Conv Conv Conv TransposeCoonv
        fc6 = nn.Conv2d(512, 4096, kernal_size=7)
        fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7))
        fc6.bias.data.copy_(classifier[0].bias.data)
        fc7 = nn.Conv2d(4096, 4096, kernal_size=1)
        fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1))
        fc7.bias.data.copy_(classifier[3].bias.data)
        score_fr = nn.Conv2d(4096, n_classes, kernel_size=1)
        score_fr.weight.data.zero_()
        score_fr.bias.data.zero_()
        self.score.fr = nn.Sequential(
            fc6, nn.Relu(), nn.Dropout(), fc7, nn.Relu(), nn.Dropout(), score_fr, nn.Relu()
        )
        self.upscore = nn.ConvTranspose2d(n_classes, n_classes, kernal_size=64, stride=32, bias=False)

        def forward(self, x):
            x_size = x.size()
            bottleneck = self.bottleneck(x)
            score_fr = self.score_fr(bottleneck)
            upscore = self.upscore(score_fr)
            # crop to original size
            return upscore[:, :, upscore.shape[2]-x_size[2]:, upscore.shape[3]-x_size[3]:]

In [35]:
def mean_iou_score(pred, labels):
    '''
    Compute mean IoU score over 6 classes
    '''
    mean_iou = 0
    for i in range(6):
        tp_fp = np.sum(pred == i)
        tp_fn = np.sum(labels == i)
        tp = np.sum((pred == i) * (labels == i))
        iou = tp / (tp_fp + tp_fn - tp)
        mean_iou += iou / 6
        print('class #%d : %1.5f'%(i, iou))
    print('\nmean_iou: %f\n' % mean_iou)

    return mean_iou

In [None]:
from tqdm import tqdm


def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset) # number of samples
    num_batches = len(dataloader) # batches per epoch
    model.train() # to training mode.
    epoch_loss = 0
    epoch_iou = 0
    for batch_i, (x, y) in enumerate(tqdm(dataloader, leave=False)):
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True) # move data to GPU
        optimizer.zero_grad()
        pred = model(x)
        loss = loss_fn(pred, y)

        loss.backward() # backpropagation to compute gradients
        optimizer.step() # update model params

        epoch_loss += loss.item() # tensor -> python value
        epoch_iou += mean_iou_score(pred, y).item()

    # return avg loss of epoch, acc of epoch
    return epoch_loss/num_batches, epoch_iou/num_batches


def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset) # number of samples
    num_batches = len(dataloader) # batches per epoch

    model.eval() # model to test mode.
    epoch_loss = 0
    epoch_iou = 0
    # No gradient for test data
    with torch.no_grad():
        for batch_i, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)

            # Compute prediction loss
            pred = model(x)
            loss = loss_fn(pred, y)

            # write to logs
            epoch_loss += loss.item()
            epoch_iou += mean_iou_score(pred, y).item()

    return epoch_loss/num_batches, epoch_iou/num_batches

In [None]:
EPOCHS = 50
logs = {
    'train_loss': [], 'val_loss': [],
    'train_mean_iou': [], 'val_mean_iou': [],
}

model = VGG16FCN32s()
model.copy_params_from_vgg16(models.vgg16(pretrained=True))
model = model.to(device)

loss_fn = nn.CrossEntropyLoss() # classification for each pixel
optimizer = torch.optim.Adam(model.parameters())

# Earlystopping
patience = 5
counter = 0
best_loss = np.inf

for epoch in tqdm(range(EPOCHS)):
    train_loss, train_mean_iou = train(train_loader, model, loss_fn, optimizer)
    val_loss, val_mean_iou = test(val_loader, model, loss_fn)
    logs['train_loss'].append(train_loss)
    logs['val_loss'].append(val_loss)
    logs['train_mean_iou'].append(train_mean_iou)
    logs['val_mean_iou'].append(val_mean_iou)

    print(f'EPOCH: {epoch:04d} train_loss: {train_loss:.4f} val_loss: {val_loss:.4f}, train_mean_iou: {train_mean_iou:.3f}, val_mean_iou: {val_mean_iou:.3f}')

    # On epoch end
    torch.save(model.state_dict(), "last.pth")
    # check improvement
    if val_loss < best_loss:
        counter = 0
        best_loss = val_loss
        torch.save(model.state_dict(), "best.pth")
    else:
        counter += 1
    if counter >= patience:
        print("Earlystop!")
        break