In [1]:
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import glob
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_batchsz, val_batchsz = 16, 16
epoch = 60
lr = 1.0e-3
numofclass = 7
hw1train_path = "/data/dlcv/hw1/hw1_data/p2_data/train/"
hw1val_path = "/data/dlcv/hw1/hw1_data/p2_data/validation/"
checkpoint_path = "/data/allen/hw1model/deeplabv3.pth"
log_path = "/data/allen/hw1model/deeplabv3_log.txt"
cls_color = {
    0:  [0, 255, 255],
    1:  [255, 255, 0],
    2:  [255, 0, 255],
    3:  [0, 255, 0],
    4:  [0, 0, 255],
    5:  [255, 255, 255],
    6: [0, 0, 0],
}
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    torch.cuda.set_device(1)
print('Device used:', device)

Device used: cuda


In [3]:
def RGBToClass(mask):
    bitmask = (np.array(mask) >= 128).astype(int)
    bitmask = 4 * bitmask[0,:,:] + 2 * bitmask[1,:,:] + bitmask[2,:,:]
    classmask = np.empty((mask.shape[1], mask.shape[2]))
    classmask[bitmask == 3] = 0
    classmask[bitmask == 6] = 1
    classmask[bitmask == 5] = 2
    classmask[bitmask == 2] = 3
    classmask[bitmask == 1] = 4
    classmask[bitmask == 7] = 5
    classmask[bitmask == 0] = 6
    return torch.from_numpy(classmask)

In [4]:
class Mydataset(Dataset):
    def __init__(self, dirpath, transform=False):
        self.images, self.masks = {}, {}
        self.transform = transform
        filenames = glob.glob(os.path.join(dirpath, "*sat.jpg"))
        for filename in filenames:
            image_fn = os.path.split(filename)[1]
            idx = int(image_fn.split("_")[0])
            image = transforms.PILToTensor()(Image.open(filename))
            self.images[idx] = image
        filenames = glob.glob(os.path.join(dirpath, "*mask.png"))
        for filename in filenames:
            image_fn = os.path.split(filename)[1]
            idx = int(image_fn.split("_")[0])
            mask = transforms.PILToTensor()(Image.open(filename))
            #convert mask pixel to each class
            self.masks[idx] = RGBToClass(mask)
            # print(mask[:,0,0], self.masks[idx][0,0])
        self.len = len(self.images)

    def __getitem__(self, index):
        img, mask = self.images[index], self.masks[index]
        if self.transform:
            if np.random.rand() >= 0.5:
                img, mask = torchvision.transforms.functional.hflip(img), torchvision.transforms.functional.hflip(mask)
            if np.random.rand() >= 0.5:
                img, mask = torchvision.transforms.functional.vflip(img), torchvision.transforms.functional.vflip(mask)
            if np.random.rand() >= 0.5:
                img = torchvision.transforms.functional.adjust_sharpness(img, 1.2)
            if np.random.rand() >= 0.5:
                img = torchvision.transforms.ColorJitter(brightness=0.2, contrast=0.2)(img)
        return img, mask

    def __len__(self):
        """ Total number of samples in the dataset """
        return self.len

In [5]:
train_tfm = True
val_tfm = False
trainset, valset = Mydataset(hw1train_path, transform=train_tfm), Mydataset(hw1val_path, transform=val_tfm)
trainset_loader = DataLoader(trainset, batch_size=train_batchsz, shuffle=True, num_workers=1, pin_memory=True)
valset_loader = DataLoader(valset, batch_size=val_batchsz, shuffle=False, num_workers=1, pin_memory=True)

In [6]:
class Deeplabv3(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.deeplabv3 = torchvision.models.segmentation.deeplabv3_resnet50(weight=torchvision.models.segmentation.DeepLabV3_ResNet50_Weights.DEFAULT, num_classes=numofclass)
    def forward(self, x):
        y = self.deeplabv3(x)['out']
        return y


In [7]:
def ComputeIoU(output, label):
    mean_iou = 0.
    label = label.astype(np.uint8)
    outputmasks = output.argmax(axis=1).astype(np.uint8)
    # print("output:{} -> outmasks:{}".format(output[0,:,0,0], outputmasks[0,0,0]))
    for i in range(6):
        tp_fp = np.sum(outputmasks == i)
        tp_fn = np.sum(label == i)
        tp = np.sum((outputmasks == i) * (label == i))
        # print("i {} tp_fp : {} tp_fn : {} tp : {}".format(i, tp_fp, tp_fn, tp))
        if (tp_fp + tp_fn - tp) > 0:
            iou = tp / (tp_fp + tp_fn - tp)
            mean_iou += iou / 6
    return mean_iou
    
def ClassToRGB(class_img):
    class_img = np.array(class_img)
    m, n = class_img.shape
    rgb = np.zeros((m, n, 3), dtype=np.uint8)
    for i in range(numofclass):
        rgb[class_img == i] = cls_color[i]
    return rgb

In [8]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'model_state_dict': model.state_dict(),
             'optimizer_state_dict' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to {}'.format(checkpoint_path))

In [9]:
def loadbestiou():
    best_iou = 0.
    if os.path.exists(log_path):
        with open(log_path, "r") as f:
            for line in f.readlines():
                linelist = line.split(" ")
                if linelist[0] == checkpoint_path:
                    best_iou = float(linelist[-1].strip("%"))
                    break
    return best_iou / 100.

In [10]:
def train(model, optimizer, savepoints=[0, 50]):
    # lrscheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,10,20], gamma=0.8)
    criterion = nn.CrossEntropyLoss()
    best_iou = loadbestiou()
    print("best_acc = {:.3%}".format(best_iou))
    for ep in range(epoch):
        model.train()
        train_loss, train_iou = 0., 0.
        output_list, label_list= [], []
        for idx, (img, label) in enumerate(trainset_loader):
            img, label = img.to(device, dtype=torch.float32), label.to(device, dtype=torch.long)
            output = model(img)
            # print(img.shape, label.shape, output.shape)
            loss = criterion(output, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            output_list.append(output.detach().cpu().numpy())
            label_list.append(label.detach().cpu().numpy())
        # lrscheduler.step()
        train_loss /= len(trainset_loader.dataset)
        train_iou = ComputeIoU(np.concatenate(output_list, axis=0), np.concatenate(label_list, axis=0))

        model.eval()
        val_loss, val_iou = 0., 0.
        output_list, label_list= [], []
        with torch.no_grad():
            for idx, (img, label) in enumerate(valset_loader):
                img, label = img.to(device, dtype=torch.float32), label.to(device, dtype=torch.long)
                output = model(img)
                loss = criterion(output, label)
                val_loss += loss.item()
                output_list.append(output.detach().cpu().numpy())
                label_list.append(label.detach().cpu().numpy())
            val_loss /= len(valset_loader.dataset)
            val_iou = ComputeIoU(np.concatenate(output_list, axis=0), np.concatenate(label_list, axis=0))

        print("Epoch {} train loss = {:.6f}, train iou = {:.6f}, valid loss = {:.6f}, valid iou = {:.6f}".format(ep + 1, train_loss, train_iou, val_loss, val_iou))
        if val_iou > best_iou:
            save_checkpoint(checkpoint_path, model, optimizer)
            with open(log_path, "w") as f:    
                f.write("{} : {:.3%}".format(checkpoint_path, val_iou))
            best_iou = val_iou
        elif ep in savepoints:
            save_checkpoint(os.path.join(os.path.split(checkpoint_path)[0], "deeplabv3_ep{}.pth".format(ep)), model, optimizer)


In [11]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'model_state_dict': model.state_dict(),
             'optimizer_state_dict' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to {}'.format(checkpoint_path))

In [12]:
def loadbestiou():
    best_iou = 0.
    if os.path.exists(log_path):
        with open(log_path, "r") as f:
            for line in f.readlines():
                linelist = line.split(" ")
                if linelist[0] == checkpoint_path:
                    best_iou = float(linelist[-1].strip("%"))
                    break
    return best_iou / 100.

In [13]:
mydeeplabv3= Deeplabv3().to(device)
# print(mydeeplabv3)
optimizer = optim.Adam(mydeeplabv3.parameters(), lr=lr)
train(mydeeplabv3, optimizer)

best_acc = 73.889%
Epoch 1 train loss = 0.060838, train iou = 0.309797, valid loss = 0.042331, valid iou = 0.525130
model saved to /data/allen/hw1model/deeplabv3_ep0.pth
Epoch 2 train loss = 0.046895, train iou = 0.441327, valid loss = 0.037905, valid iou = 0.529294
Epoch 3 train loss = 0.042000, train iou = 0.480999, valid loss = 0.033654, valid iou = 0.534270
Epoch 4 train loss = 0.039848, train iou = 0.507008, valid loss = 0.032151, valid iou = 0.556535
Epoch 5 train loss = 0.037284, train iou = 0.522573, valid loss = 0.041305, valid iou = 0.513027
Epoch 6 train loss = 0.036997, train iou = 0.532106, valid loss = 0.034088, valid iou = 0.562186
Epoch 7 train loss = 0.035704, train iou = 0.540393, valid loss = 0.029902, valid iou = 0.590997
Epoch 8 train loss = 0.033666, train iou = 0.563462, valid loss = 0.028817, valid iou = 0.602611
Epoch 9 train loss = 0.033294, train iou = 0.568859, valid loss = 0.033133, valid iou = 0.569331
Epoch 10 train loss = 0.033351, train iou = 0.569040, 

## Finetune

In [15]:
mydeeplabv3 = Deeplabv3().to(device)
optimizer = optim.Adam(mydeeplabv3.parameters())
checkpoint = torch.load(checkpoint_path)
mydeeplabv3.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [16]:
train(mydeeplabv3, optimizer) #107 epoch reach best

best_acc = 72.725%
Epoch 1 train loss = 0.014988, train iou = 0.773608, valid loss = 0.020105, valid iou = 0.712339
Epoch 2 train loss = 0.015264, train iou = 0.769799, valid loss = 0.021231, valid iou = 0.694199
Epoch 3 train loss = 0.016179, train iou = 0.760676, valid loss = 0.021226, valid iou = 0.698401
Epoch 4 train loss = 0.016138, train iou = 0.760764, valid loss = 0.019614, valid iou = 0.728295
model saved to /data/allen/hw1model/deeplabv3.pth
Epoch 5 train loss = 0.014461, train iou = 0.782505, valid loss = 0.020520, valid iou = 0.711552
Epoch 6 train loss = 0.014125, train iou = 0.785728, valid loss = 0.019825, valid iou = 0.717462
Epoch 7 train loss = 0.014698, train iou = 0.778758, valid loss = 0.022357, valid iou = 0.697873
Epoch 8 train loss = 0.013708, train iou = 0.791119, valid loss = 0.021656, valid iou = 0.707502
Epoch 9 train loss = 0.014167, train iou = 0.785209, valid loss = 0.018771, valid iou = 0.728171
Epoch 10 train loss = 0.013492, train iou = 0.793661, vali