In [1]:
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import glob
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_batchsz, val_batchsz = 32, 32
epoch = 10
lr = 1.5e-3
momentum = 0.9
numofclass = 7
hw1train_path = "/data/dlcv/hw1/hw1_data/p2_data/train/"
hw1val_path = "/data/dlcv/hw1/hw1_data/p2_data/validation/"
checkpoint_path = "/data/allen/hw1model/vgg16fcn8.pth"
log_path = "/data/allen/hw1model/vgg16fcn8_log.txt"
cls_color = {
    0:  [0, 255, 255],
    1:  [255, 255, 0],
    2:  [255, 0, 255],
    3:  [0, 255, 0],
    4:  [0, 0, 255],
    5:  [255, 255, 255],
    6: [0, 0, 0],
}
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    torch.cuda.set_device(1)
print('Device used:', device)

Device used: cuda


In [3]:
def RGBToClass(mask):
    bitmask = (np.array(mask) >= 128).astype(int)
    bitmask = 4 * bitmask[0,:,:] + 2 * bitmask[1,:,:] + bitmask[2,:,:]
    classmask = np.empty((mask.shape[1], mask.shape[2]))
    classmask[bitmask == 3] = 0
    classmask[bitmask == 6] = 1
    classmask[bitmask == 5] = 2
    classmask[bitmask == 2] = 3
    classmask[bitmask == 1] = 4
    classmask[bitmask == 7] = 5
    classmask[bitmask == 0] = 6
    return classmask

In [4]:
class Mydataset(Dataset):
    def __init__(self, dirpath, transform=None):
        self.images, self.masks = {}, {}
        self.transform = transform
        filenames = glob.glob(os.path.join(dirpath, "*sat.jpg"))
        for filename in filenames:
            image_fn = os.path.split(filename)[1]
            idx = int(image_fn.split("_")[0])
            image = Image.open(filename)
            if self.transform is not None:
                image = self.transform(image)
            self.images[idx] = image
        filenames = glob.glob(os.path.join(dirpath, "*mask.png"))
        for filename in filenames:
            image_fn = os.path.split(filename)[1]
            idx = int(image_fn.split("_")[0])
            mask = Image.open(filename)
            if self.transform is not None:
                mask = self.transform(mask)
            #convert mask pixel to each class
            self.masks[idx] = RGBToClass(mask)
            # print(mask[:,0,0], self.masks[idx][0,0])
        self.len = len(self.images)

    def __getitem__(self, index):
        return self.images[index], self.masks[index]

    def __len__(self):
        """ Total number of samples in the dataset """
        return self.len

In [5]:
train_tfm = transforms.Compose([
    transforms.PILToTensor()
])
val_tfm = transforms.Compose([
    transforms.PILToTensor()
])
trainset, valset = Mydataset(hw1train_path, transform=train_tfm), Mydataset(hw1val_path, transform=val_tfm)
trainset_loader = DataLoader(trainset, batch_size=train_batchsz, shuffle=True, num_workers=1, pin_memory=True)
valset_loader = DataLoader(valset, batch_size=val_batchsz, shuffle=False, num_workers=1, pin_memory=True)

In [6]:
class Vgg16FCN8(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        vgg16 = torchvision.models.vgg16(weights='VGG16_Weights.DEFAULT')
        # print(vgg16)
        self.block1 = nn.Sequential(*list(vgg16.children())[0][0:5])
        self.block2 = nn.Sequential(*list(vgg16.children())[0][5:10])
        self.block3 = nn.Sequential(*list(vgg16.children())[0][10:17])
        self.score3 = nn.Sequential(
            nn.Conv2d(256, numofclass, kernel_size=1, stride=1, padding=0)
        )
        self.block4 = nn.Sequential(*list(vgg16.children())[0][17:24])
        self.score4 = nn.Sequential(
            nn.Conv2d(512, numofclass, kernel_size=1, stride=1, padding=0)
        )
        self.block5 = nn.Sequential(*list(vgg16.children())[0][24:])
        self.score5 = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=7, stride=1, padding=3),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Conv2d(4096, 4096, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Conv2d(4096, numofclass, kernel_size=1, padding=0),
        )
        self.upsample2 = nn.ConvTranspose2d(numofclass, numofclass, kernel_size=2, stride=2)
        self.upsample8 = nn.ConvTranspose2d(numofclass, numofclass, kernel_size=8, stride=8)

    def forward(self, x):
        x1 = self.block1(x)
        x2 = self.block2(x1)
        x3 = self.block3(x2)
        x4 = self.block4(x3)
        x5 = self.block5(x4)
        s3 = self.score3(x3)
        s4 = self.score4(x4)
        s5 = self.score5(x5)
        # print("s3:{} s4:{} s5:{}".format(s3.shape, s4.shape, s5.shape))
        return self.upsample8(self.upsample2(self.upsample2(s5) + s4) + s3)


In [7]:
myvgg16fcn8 = Vgg16FCN8().to(device)
print(myvgg16fcn8)

Vgg16FCN8(
  (block1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): Max

In [8]:
def ComputeIoU(output, label):
    mean_iou = 0.
    label = label.astype(np.uint8)
    outputmasks = output.argmax(axis=1).astype(np.uint8)
    # print("output:{} -> outmasks:{}".format(output[0,:,0,0], outputmasks[0,0,0]))
    for i in range(6):
        tp_fp = np.sum(outputmasks == i)
        tp_fn = np.sum(label == i)
        tp = np.sum((outputmasks == i) * (label == i))
        # print("i {} tp_fp : {} tp_fn : {} tp : {}".format(i, tp_fp, tp_fn, tp))
        if (tp_fp + tp_fn - tp) > 0:
            iou = tp / (tp_fp + tp_fn - tp)
            mean_iou += iou / 6
    return mean_iou  
    
def ClassToRGB(class_img):
    class_img = np.array(class_img)
    m, n = class_img.shape[0], class_img.shape[1]
    rgb = np.empty((m, n, 3), dtype=torch.uint8)
    for i in range(numofclass):
        rgb[class_img[:,:] == i,:] = cls_color[i] 
    return rgb

In [9]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'model_state_dict': model.state_dict(),
             'optimizer_state_dict' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to {}'.format(checkpoint_path))

In [10]:
def loadbestiou():
    best_iou = 0.
    if os.path.exists(log_path):
        with open(log_path, "r") as f:
            for line in f.readlines():
                linelist = line.split(" ")
                if linelist[0] == checkpoint_path:
                    best_iou = float(linelist[-1].strip("%"))
                    break
    return best_iou / 100.

In [11]:
def train(model, optimizer):
    lrscheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,20,25], gamma=0.6)
    criterion = nn.CrossEntropyLoss()
    best_iou = loadbestiou()
    print("best_acc = {:.3%}".format(best_iou))
    for ep in range(epoch):
        myvgg16fcn8.train()
        train_loss, train_iou = 0., 0.
        output_list, label_list= [], []
        for idx, (img, label) in enumerate(trainset_loader):
            img, label = img.to(device, dtype=torch.float32), label.to(device, dtype=torch.long)
            output = myvgg16fcn8(img)
            # print(img.shape, label.shape, output.shape)
            loss = criterion(output, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            output_list.append(output.detach().cpu().numpy())
            label_list.append(label.detach().cpu().numpy())
        lrscheduler.step()
        train_loss /= len(trainset_loader.dataset)
        train_iou = ComputeIoU(np.concatenate(output_list, axis=0), np.concatenate(label_list, axis=0))

        myvgg16fcn8.eval()
        val_loss, val_iou = 0., 0.
        output_list, label_list= [], []
        with torch.no_grad():
            for idx, (img, label) in enumerate(valset_loader):
                img, label = img.to(device, dtype=torch.float32), label.to(device, dtype=torch.long)
                output = myvgg16fcn8(img)
                loss = criterion(output, label)
                val_loss += loss.item()
                output_list.append(output.detach().cpu().numpy())
                label_list.append(label.detach().cpu().numpy())
            val_loss /= len(valset_loader.dataset)
            val_iou = ComputeIoU(np.concatenate(output_list, axis=0), np.concatenate(label_list, axis=0))

        print("Epoch {} train loss = {:.6f}, train iou = {:.6f}, valid loss = {:.6f}, valid iou = {:.6f}".format(ep + 1, train_loss, train_iou, val_loss, val_iou))
        if val_iou > best_iou:
            save_checkpoint(checkpoint_path, myvgg16fcn8, optimizer)
            with open(log_path, "w") as f:    
                f.write("{} : {:.3%}".format(checkpoint_path, val_iou))
            best_iou = val_iou


In [12]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'model_state_dict': model.state_dict(),
             'optimizer_state_dict' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to {}'.format(checkpoint_path))

In [13]:
def loadbestiou():
    best_iou = 0.
    if os.path.exists(log_path):
        with open(log_path, "r") as f:
            for line in f.readlines():
                linelist = line.split(" ")
                if linelist[0] == checkpoint_path:
                    best_iou = float(linelist[-1].strip("%"))
                    break
    return best_iou / 100.

## Finetune

In [14]:
myvgg16fcn8 = Vgg16FCN8().to(device)
optimizer = optim.SGD(myvgg16fcn8.parameters(), lr=0.001, momentum=0.9)
checkpoint = torch.load(checkpoint_path)
myvgg16fcn8.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
train(myvgg16fcn8, optimizer)

best_acc = 59.758%
Epoch 1 train loss = 0.014665, train iou = 0.599360, valid loss = 0.014454, valid iou = 0.597384
Epoch 2 train loss = 0.014662, train iou = 0.600841, valid loss = 0.014394, valid iou = 0.597316
Epoch 3 train loss = 0.014632, train iou = 0.599834, valid loss = 0.014422, valid iou = 0.597291
Epoch 4 train loss = 0.014718, train iou = 0.599156, valid loss = 0.014417, valid iou = 0.597541
Epoch 5 train loss = 0.014617, train iou = 0.600553, valid loss = 0.014390, valid iou = 0.598786
model saved to /data/allen/hw1model/vgg16fcn8.pth
Epoch 6 train loss = 0.014683, train iou = 0.600296, valid loss = 0.014435, valid iou = 0.597883
Epoch 7 train loss = 0.014640, train iou = 0.600614, valid loss = 0.014710, valid iou = 0.596193
Epoch 8 train loss = 0.014645, train iou = 0.600889, valid loss = 0.014354, valid iou = 0.597580
Epoch 9 train loss = 0.014618, train iou = 0.600572, valid loss = 0.014444, valid iou = 0.598994
model saved to /data/allen/hw1model/vgg16fcn8.pth
Epoch 10