In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import glob
import os
import numpy as np
from PIL import Image
import torchvision.models as models
import copy
from torchvision.utils import save_image
import PIL
import skimage.io
import multiprocessing as mp
import time

In [2]:
def dataType(fn):
    return fn.split('/')[-1].split('_')[0]
    
class npLoader(Dataset):
    def __init__(self, root, transform = None):
        self.transform = transform
        self.root = root
        fnList = sorted(glob.glob(os.path.join(self.root, '*.npy')))
        fnImgList = [fn for fn in fnList if dataType(fn) == 'img']
        fnMaskList = [fn for fn in fnList if dataType(fn) == 'mask']
        self.len = len(fnImgList)
        self.fnList = [(fnImgList[i], fnMaskList[i]) for i in range(self.len)]
    
    def __getitem__(self, index):
        fnImg, fnMask = self.fnList[index]
        return np.load(fnImg), np.load(fnMask)
            
    def __len__(self):
        return self.len

In [3]:
# parameters
train_set = npLoader(root='hw2_data/p2_data/train_npy', transform=transforms.ToTensor())
test_set = npLoader(root='hw2_data/p2_data/validation_npy', transform=transforms.ToTensor())

trainset_loader = DataLoader(train_set, batch_size=16, shuffle=True, num_workers=0)
testset_loader = DataLoader(test_set, batch_size=mp.cpu_count(), shuffle=False, num_workers=0)

criterion = nn.NLLLoss()

In [4]:
# activate cuda
use_cuda = torch.cuda.is_available()
torch.manual_seed(123)
device = torch.device("cuda:1" if use_cuda else "cpu")
#device = torch.device('cpu')
print('Device used:', device)

Device used: cuda:1


In [5]:
class fcn32s(nn.Module):
    def __init__(self, num_classes, pretrained = True):
        super(fcn32s, self).__init__()
        self.vgg = torchvision.models.vgg16(pretrained=True)
        self.vgg.classifier = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=(2, 2), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, num_classes, kernel_size=(1, 1), stride=(1, 1)),
            nn.ConvTranspose2d(num_classes, num_classes, 64 , 32 , 0, bias=False),
        )
    def  forward (self, x) :        
        x = self.vgg.features(x)
        x = self.vgg.classifier(x)
        return x
    
class fcn16s(nn.Module):
    def __init__(self, num_classes, pretrained = True):
        super(fcn16s, self).__init__()
        self.vgg = torchvision.models.vgg16(pretrained=True)
        self.to_pool4 = nn.Sequential(*list(self.vgg.features.children())[:24])
        self.to_pool5 = nn.Sequential(*list(self.vgg.features.children())[24:])
        self.vgg.classifier = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=(2, 2), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, num_classes, kernel_size=(1, 1), stride=(1, 1)),
            nn.ConvTranspose2d(num_classes, 512, 4 , 2 , 0, bias=False)
            )
        self.upsample16 = nn.ConvTranspose2d(512, num_classes, 16 , 16 , 0, bias=False)
        
    def forward (self, x) :        
        pool4_output = self.to_pool4(x) #pool4 output size torch.Size([64, 512, 16, 16])
        x = self.to_pool5(pool4_output)
        x = self.vgg.classifier(x)    # 2xconv7 output size torch.Size([64, 512, 16, 16])
        x = self.upsample16(x+pool4_output)
        return x

In [6]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'state_dict': model.state_dict(),
             'optimizer' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to %s' % checkpoint_path)
    
def load_checkpoint(checkpoint_path, model, optimizer):
    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])
    optimizer.load_state_dict(state['optimizer'])
    print('model loaded from %s' % checkpoint_path)

In [7]:
def train_save(model, epoch, log_interval=25, save_interval = 5):
    model.train()
    optimizer = optim.Adam(model.parameters(),lr=0.0002, betas=(0.9, 0.999))
    iteration = 0
    for ep in range(epoch):
        print('')
        for batch_idx, (data, target) in enumerate(trainset_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            output = F.log_softmax(output, dim= 1)
            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            if iteration % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    ep, batch_idx * len(data), len(trainset_loader.dataset),
                    100. * batch_idx / len(trainset_loader), loss.item()))
            iteration += 1
        if ep % save_interval == 0:
            save_checkpoint('p2_fcn32_ep'+str(ep)+'.pth', model, optimizer)
        print('')
        test(model)

In [8]:
def mIoU(labels, pred):
    mean_iou = 0
    for i in range(6):
        tp_fp = np.sum(pred == i)
        tp_fn = np.sum(labels == i)
        tp = np.sum((pred == i) * (labels == i))
        iou = tp / (tp_fp + tp_fn - tp)
        mean_iou += iou / 6
        print('class #%d : %1.5f'%(i, iou))
    print('\nmean_iou: %f\n' % mean_iou)
    return mean_iou


currentBestIoU = 0
def test(model, log_interval=5):
    loss, iteration = 0, 0
    model.eval()
    optimizer = optim.Adam(model.parameters(),lr=0.0002, betas=(0.9, 0.999))
    print('start testing model...')
    with torch.no_grad(): # This will free the GPU memory used for back-prop
        start = time.time()
        predList, maskList = [], []
        for data, target in testset_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            output = F.log_softmax(output, dim= 1)
            loss += criterion(output, target).item()*len(data)
            pred = output.max(1, keepdim=True)[1]
            predList += [singleBatch[0] for singleBatch in pred.cpu().numpy()]
            maskList += [singleBatch for singleBatch in target.cpu().numpy()]
            iteration += 1
    IoU = mIoU(np.array(predList), np.array(maskList))
    loss /= len(testset_loader.dataset)
    print('')
    print('Test set average loss =',loss,'     mIoU =',IoU)
    global currentBestIoU
    if IoU > currentBestIoU:
        print("current best model updated!")
        currentBestIoU = IoU
        save_checkpoint('p2_current_best_fcn32-'+str(round(IoU*100.0,2))+'.pth', model, optimizer)

In [None]:
model = fcn32s(7)
model.to(device)
train_save(model, 25)


model saved to p2_fcn32_ep0.pth

start testing model...
class #0 : 0.65915
class #1 : 0.74986
class #2 : 0.00093
class #3 : 0.62193
class #4 : 0.58514
class #5 : 0.00105

mean_iou: 0.436346


Test set average loss = 0.8112429301562476      mIoU = 0.43634566894260063
current best model updated!
model saved to p2_current_best_fcn32-43.63.pth


start testing model...
class #0 : 0.68570
class #1 : 0.74806
class #2 : 0.00013
class #3 : 0.70642
class #4 : 0.64374
class #5 : 0.00013

mean_iou: 0.464029


Test set average loss = 0.7055190574334289      mIoU = 0.4640291288485409
current best model updated!
model saved to p2_current_best_fcn32-46.4.pth


start testing model...
class #0 : 0.68091
class #1 : 0.76368
class #2 : 0.00212
class #3 : 0.71289
class #4 : 0.68438
class #5 : 0.00075

mean_iou: 0.474121


Test set average loss = 0.5971740062134739      mIoU = 0.47412079135140267
current best model updated!
model saved to p2_current_best_fcn32-47.41.pth


start testing model...
class #0 : 0

In [7]:
print(fcn32s(7))

fcn32s(
  (vgg): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=True)
      (16): Max