In [None]:
! git clone https://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@github.com/DLCV-Fall-2021/hw1-SonicBenz0408.git
! bash hw1-SonicBenz0408/get_dataset.sh

# Import packages.
import os
import gc
import glob
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision.models.utils import load_state_dict_from_url
import imageio
from torch.autograd import Variable
import torch.nn.functional as F

# This is for the progress bar.
from tqdm import tqdm

In [None]:
batch_size = 4

voc_cls = {'urban':0, 
           'rangeland': 2,
           'forest':3,  
           'unknown':6,  
           'barreb land':5,  
           'Agriculture land':1,  
           'water':4} 
cls_color = {
    0:  [0, 255, 255],
    1:  [255, 255, 0],
    2:  [255, 0, 255],
    3:  [0, 255, 0],
    4:  [0, 0, 255],
    5:  [255, 255, 255],
    6: [0, 0, 0],
}

class SSDataset(Dataset):
    def __init__(self, fnames):
        self.fnames = fnames
        self.num_samples = len(self.fnames)

    def __getitem__(self,idx):
        fname = self.fnames[idx]
        # 1. Load the image
        img = torchvision.io.read_image(fname) / 255
        #img = size_tfm(img)
        seg_path = fname[:-7] + "mask.png"
        seg = imageio.imread(seg_path)
        masks = np.zeros((512, 512))
        mask = (seg >= 128).astype(int)
        mask = 4 * mask[:, :, 0] + 2 * mask[:, :, 1] + mask[:, :, 2]
        masks[mask == 3] = 0  # (Cyan: 011) Urban land 
        masks[mask == 6] = 1  # (Yellow: 110) Agriculture land 
        masks[mask == 5] = 2  # (Purple: 101) Rangeland 
        masks[mask == 2] = 3  # (Green: 010) Forest land 
        masks[mask == 1] = 4  # (Blue: 001) Water 
        masks[mask == 7] = 5  # (White: 111) Barren land 
        masks[mask == 0] = 6  # (Black: 000) Unknown 

        #masks = masks.long()
        return img, masks

    def __len__(self):
        return self.num_samples

data_path = "hw1_data/p2_data/"

train_fnames = glob.glob(os.path.join(os.path.join(data_path, "train"), '*.jpg'))

train_set = SSDataset(train_fnames)
train_set, val_set = torch.utils.data.random_split(train_set, [1800, 200])

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)


In [None]:
# VGG-16 FCN32 

# reference: https://github.com/wkentaro/pytorch-fcn/blob/main/torchfcn/models/fcn32s.py

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
        # conv1
        self.conv1_1 = nn.Conv2d(3, 64, 3, 1, 1)
        self.relu1_1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(64, 64, 3, 1, 1)
        self.relu1_2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(2, stride=2)  # 1/2

        # conv2
        self.conv2_1 = nn.Conv2d(64, 128, 3, 1, 1)
        self.relu2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(128, 128, 3, 1, 1)
        self.relu2_2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(2, stride=2)  # 1/4

        # conv3
        self.conv3_1 = nn.Conv2d(128, 256, 3, 1, 1)
        self.relu3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(256, 256, 3, 1, 1)
        self.relu3_2 = nn.ReLU(inplace=True)
        self.conv3_3 = nn.Conv2d(256, 256, 3, 1, 1)
        self.relu3_3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(2, stride=2)  # 1/8

        # conv4
        self.conv4_1 = nn.Conv2d(256, 512, 3, 1, 1)
        self.relu4_1 = nn.ReLU(inplace=True)
        self.conv4_2 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu4_2 = nn.ReLU(inplace=True)
        self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu4_3 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool2d(2, stride=2)  # 1/16

        # conv5
        self.conv5_1 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu5_1 = nn.ReLU(inplace=True)
        self.conv5_2 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu5_2 = nn.ReLU(inplace=True)
        self.conv5_3 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu5_3 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(2, stride=2)  # 1/32


        # fc6
        self.fc6 = nn.Conv2d(512, 4096, 1)
        self.relu6 = nn.ReLU(inplace=True)
        self.drop6 = nn.Dropout2d()

        # fc7
        self.fc7 = nn.Conv2d(4096, 4096, 1)
        self.relu7 = nn.ReLU(inplace=True)
        self.drop7 = nn.Dropout2d()

        self.score_fr = nn.Conv2d(4096, 7, 1)
        self.upscore32 = nn.ConvTranspose2d(7, 7, 32, 32, bias=False)
    
    def copy_param_vgg16(self, vgg16):
        
        features = [
            self.conv1_1, self.relu1_1,
            self.conv1_2, self.relu1_2,
            self.pool1,
            self.conv2_1, self.relu2_1,
            self.conv2_2, self.relu2_2,
            self.pool2,
            self.conv3_1, self.relu3_1,
            self.conv3_2, self.relu3_2,
            self.conv3_3, self.relu3_3,
            self.pool3,
            self.conv4_1, self.relu4_1,
            self.conv4_2, self.relu4_2,
            self.conv4_3, self.relu4_3,
            self.pool4,
            self.conv5_1, self.relu5_1,
            self.conv5_2, self.relu5_2,
            self.conv5_3, self.relu5_3,
            self.pool5,
        ]
        
        for l1, l2 in zip(vgg16.features, features):
            if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data


    def forward(self, x):
        h = x
        h = self.relu1_1(self.conv1_1(h))
        h = self.relu1_2(self.conv1_2(h))
        h = self.pool1(h)

        h = self.relu2_1(self.conv2_1(h))
        h = self.relu2_2(self.conv2_2(h))
        h = self.pool2(h)

        h = self.relu3_1(self.conv3_1(h))
        h = self.relu3_2(self.conv3_2(h))
        h = self.relu3_3(self.conv3_3(h))
        h = self.pool3(h)
        
        h = self.relu4_1(self.conv4_1(h))
        h = self.relu4_2(self.conv4_2(h))
        h = self.relu4_3(self.conv4_3(h))
        h = self.pool4(h)

        h = self.relu5_1(self.conv5_1(h))
        h = self.relu5_2(self.conv5_2(h))
        h = self.relu5_3(self.conv5_3(h))
        h = self.pool5(h)

        h = self.relu6(self.fc6(h))
        h = self.drop6(h)
        h = self.relu7(self.fc7(h))
        h = self.drop7(h)


        h = self.score_fr(h)
        h = self.upscore32(h)
        #print(h.shape)

        return h.float()


def cross_entropy2d(input, target, weight=None, size_average=True):
    # input: (n, c, h, w), target: (n, h, w)
    n, c, h, w = input.size()
    # log_p: (n, c, h, w)
    log_p = F.log_softmax(input, dim=1)
    # log_p: (n*h*w, c)
    log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous()
    log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
    log_p = log_p.view(-1, c)
    # target: (n*h*w,)
    mask = target >= 0
    target = target[mask]
    crit = nn.NLLLoss(weight=weight, reduction='sum')
    loss = crit(log_p, target)
    if size_average:
        loss /= mask.data.sum()
    return loss

# fix random seed
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True


In [None]:
# VGG-16 FCN8

# reference: https://github.com/wkentaro/pytorch-fcn/blob/main/torchfcn/models/fcn32s.py

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
        # conv1
        self.conv1_1 = nn.Conv2d(3, 64, 3, 1, 1)
        self.relu1_1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(64, 64, 3, 1, 1)
        self.relu1_2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(2, stride=2)  # 1/2

        # conv2
        self.conv2_1 = nn.Conv2d(64, 128, 3, 1, 1)
        self.relu2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(128, 128, 3, 1, 1)
        self.relu2_2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(2, stride=2)  # 1/4

        # conv3
        self.conv3_1 = nn.Conv2d(128, 256, 3, 1, 1)
        self.relu3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(256, 256, 3, 1, 1)
        self.relu3_2 = nn.ReLU(inplace=True)
        self.conv3_3 = nn.Conv2d(256, 256, 3, 1, 1)
        self.relu3_3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(2, stride=2)  # 1/8

        # conv4
        self.conv4_1 = nn.Conv2d(256, 512, 3, 1, 1)
        self.relu4_1 = nn.ReLU(inplace=True)
        self.conv4_2 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu4_2 = nn.ReLU(inplace=True)
        self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu4_3 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool2d(2, stride=2)  # 1/16

        # conv5
        self.conv5_1 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu5_1 = nn.ReLU(inplace=True)
        self.conv5_2 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu5_2 = nn.ReLU(inplace=True)
        self.conv5_3 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu5_3 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(2, stride=2)  # 1/32


        # fc6
        self.fc6 = nn.Conv2d(512, 4096, 1)
        self.relu6 = nn.ReLU(inplace=True)
        self.drop6 = nn.Dropout2d()

        # fc7
        self.fc7 = nn.Conv2d(4096, 4096, 1)
        self.relu7 = nn.ReLU(inplace=True)
        self.drop7 = nn.Dropout2d()

        self.score_fr = nn.Conv2d(4096, 7, 1)
        self.score_pool3 = nn.Conv2d(256, 7, 1)
        self.score_pool4 = nn.Conv2d(512, 7, 1)

        self.upscore2 = nn.ConvTranspose2d(7, 7, 2, 2, bias=False)
        self.upscore8 = nn.ConvTranspose2d(7, 7, 16, 8, bias=False)
        self.up_pool4 = nn.ConvTranspose2d(7, 7, 2, 2, bias=False)
    
    def copy_param_vgg16(self, vgg16):
        
        features = [
            self.conv1_1, self.relu1_1,
            self.conv1_2, self.relu1_2,
            self.pool1,
            self.conv2_1, self.relu2_1,
            self.conv2_2, self.relu2_2,
            self.pool2,
            self.conv3_1, self.relu3_1,
            self.conv3_2, self.relu3_2,
            self.conv3_3, self.relu3_3,
            self.pool3,
            self.conv4_1, self.relu4_1,
            self.conv4_2, self.relu4_2,
            self.conv4_3, self.relu4_3,
            self.pool4,
            self.conv5_1, self.relu5_1,
            self.conv5_2, self.relu5_2,
            self.conv5_3, self.relu5_3,
            self.pool5,
        ]
        
        for l1, l2 in zip(vgg16.features, features):
            if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data


    def forward(self, x):
        h = x
        h = self.relu1_1(self.conv1_1(h))
        h = self.relu1_2(self.conv1_2(h))
        h = self.pool1(h)

        h = self.relu2_1(self.conv2_1(h))
        h = self.relu2_2(self.conv2_2(h))
        h = self.pool2(h)

        h = self.relu3_1(self.conv3_1(h))
        h = self.relu3_2(self.conv3_2(h))
        h = self.relu3_3(self.conv3_3(h))
        h = self.pool3(h)
        temp1 = h

        h = self.relu4_1(self.conv4_1(h))
        h = self.relu4_2(self.conv4_2(h))
        h = self.relu4_3(self.conv4_3(h))
        h = self.pool4(h)
        temp2 = h

        h = self.relu5_1(self.conv5_1(h))
        h = self.relu5_2(self.conv5_2(h))
        h = self.relu5_3(self.conv5_3(h))
        h = self.pool5(h)

        h = self.relu6(self.fc6(h))
        h = self.drop6(h)
        h = self.relu7(self.fc7(h))
        h = self.drop7(h)


        h = self.score_fr(h)
        h = self.upscore2(h)
        upscore2 = h  # 1/16

        h = self.score_pool4(temp2)
        score_pool4c = h  # 1/16

        h = upscore2 + score_pool4c
        h = self.up_pool4(h)
        upscore_pool4 = h

        h = self.score_pool3(temp1)
        score_pool3c = h

        h = upscore_pool4 + score_pool3c

        h = self.upscore8(h)
        h = h[:, :, 4:4 + x.size()[2], 4:4 + x.size()[3]].contiguous()
        return h.float()


def cross_entropy2d(input, target, weight=None, size_average=True):
    # input: (n, c, h, w), target: (n, h, w)
    n, c, h, w = input.size()
    # log_p: (n, c, h, w)
    log_p = F.log_softmax(input, dim=1)
    # log_p: (n*h*w, c)
    log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous()
    log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
    log_p = log_p.view(-1, c)
    # target: (n*h*w,)
    mask = target >= 0
    target = target[mask]
    crit = nn.NLLLoss(weight=weight, reduction='sum')
    loss = crit(log_p, target)
    if size_average:
        loss /= mask.data.sum()
    return loss

# fix random seed
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True


In [None]:
gc.collect()

In [None]:
del model
gc.collect()

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
#pretrained_weight = load_state_dict_from_url('https://download.pytorch.org/models/vgg16-397923af.pth')
#pretrained_weight.pop('classifier.0.bias')
#pretrained_weight.pop('classifier.0.weight')
#pretrained_weight.pop('classifier.3.bias')
#pretrained_weight.pop('classifier.3.weight')
#pretrained_weight.pop('classifier.6.bias')
#pretrained_weight.pop('classifier.6.weight')

vgg16 = torchvision.models.vgg16(pretrained=True)
# reproduce
same_seeds(7414)

path = "/content/drive/MyDrive/model"
save_path = os.path.join(path, "FCN8.ckpt")

model = Classifier().to(device)
model.copy_param_vgg16(vgg16)
model.to(device)
#model.load_state_dict(torch.load(save_path))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

del vgg16
gc.collect()

# number of epoch
n_epochs = 70
accu_step = 1
count = 0

best_acc = 0.


if not os.path.exists(path):
    os.mkdir(path)


for epoch in range(n_epochs):
    
    # train
    model.train()
    

    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        imgs, labels = batch
        imgs, labels = Variable(imgs, requires_grad=True), Variable(labels, requires_grad=True)
        logits = model(imgs.to(device))
        
        loss = cross_entropy2d(logits, labels.to(device).long())

        
        loss.backward()

        # Clip the gradient norms for stable training.
        # grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        #if count % accu_step == 0:
        optimizer.step()
        optimizer.zero_grad()

        count += 1
        acc = (logits.argmax(dim=1) == labels.to(device)).float().mean()
        
        train_loss.append(loss.item())
        train_accs.append(acc)

    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # print
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")


    # validation
    model.eval()

    valid_loss = []
    valid_accs = []

    for batch in tqdm(val_loader):

        imgs, labels = batch
        imgs, labels = imgs, labels
        imgs, labels = Variable(imgs), Variable(labels)

        with torch.no_grad():
          logits = model(imgs.to(device))

        loss = cross_entropy2d(logits, labels.to(device).long())
        acc = (logits.argmax(dim=1) == labels.to(device)).float().mean()
        valid_loss.append(loss.item())
        valid_accs.append(acc)

    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    if valid_acc > best_acc :
      
      best_acc = valid_acc
      print(f"\nsave model with acc = {best_acc:.5f}")
      torch.save(model.state_dict(), save_path)

    # print
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

In [None]:
! wget https://www.dropbox.com/s/r3fb651nv0p7jif/FCN8.ckpt?dl=0

In [None]:
test_fnames = glob.glob(os.path.join(os.path.join(data_path, "validation"), '*.jpg'))
test_fnames.sort()
test_set = SSDataset(test_fnames)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=2, pin_memory=True)


In [None]:
pred_path = "./pred/"
pred_fnames = [pred_path + fname[-12:-8] + ".png" for fname in test_fnames]

In [None]:
if not os.path.exists(pred_path):
    os.mkdir(pred_path)

device = "cuda" if torch.cuda.is_available() else "cpu"
path = "/content/drive/MyDrive/model"
#save_path = os.path.join(path, "FCN8.ckpt")
save_path = "/content/FCN8.ckpt"
model = Classifier().to(device)
model.load_state_dict(torch.load(save_path))
model.to(device)
model.eval()

count = 0

for batch in test_loader:

    img, label = batch
    img, label = img, label

    img, label = Variable(img), Variable(label)
    with torch.no_grad():
        logit = model(img.to(device))

    output = logit.argmax(dim=1)[0].tolist()

    for i in range(512):
        for j in range(512):
            output[i][j] = cls_color[output[i][j]]

    output = np.array(output)
    imageio.imsave(pred_fnames[count], np.uint8(output))
    count += 1


In [None]:
! python3 ./hw1-SonicBenz0408/mean_iou_evaluate.py -g ./hw1_data/p2_data/validation/ -p ./pred/