In [1]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import random
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import utils
from fcn_class import *
import torchvision
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import time

In [2]:
#images=sorted(glob.glob("/datasets/cityscapes/leftImg8bit/test/*/*.png"))
#labels = sorted(glob.glob("/datasets/cityscapes/gtFine/test/*/*labelIds.png"))
#df = pd.DataFrame({'images':images, 'labels':labels})
#df.to_csv('test.csv', index= False)

In [3]:
n_class    = 34
means     = np.array([103.939, 116.779, 123.68]) / 255. # mean of three channels in the order of BGR
h, w      = 1024, 2048

In [4]:
class CityScapesDataset(Dataset):

    def __init__(self, csv_file, phase, n_class=n_class, crop=True, flip_rate=0.):
        self.data      = pd.read_csv(csv_file)
        self.means     = means
        self.n_class   = n_class

        self.flip_rate = flip_rate
        self.crop      = crop
        if self.crop == True:
            self.flip_rate = 0.5
            self.new_h = 256
            self.new_w = 256

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name   = self.data.iloc[idx, 0]

        img = np.asarray(Image.open(img_name).convert('RGB'))
        label_name = self.data.iloc[idx, 1]
        label      = np.asarray(Image.open(label_name))

        if random.random() < self.flip_rate:
            img   = np.fliplr(img)
            label = np.fliplr(label)

        # reduce mean
        img = img[:, :, ::-1]  # switch to BGR
        img = np.transpose(img, (2, 0, 1)) / 255.
        img[0] -= self.means[0]
        img[1] -= self.means[1]
        img[2] -= self.means[2]

        # convert to tensor
        img = torch.from_numpy(img.copy()).float()
        label = torch.from_numpy(label.copy()).long()

        # create one-hot encoding
        h, w = label.shape
        target = torch.zeros(self.n_class, h, w)
        for c in range(self.n_class):
            target[c][label == c] = 1

        return img, target, label

In [5]:
train_dataset = CityScapesDataset(csv_file='train.csv', phase='train')
val_dataset = CityScapesDataset(csv_file='val.csv', phase='val')
test_dataset = CityScapesDataset(csv_file='test.csv', phase='test')
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=2,
                          num_workers=4,
                          shuffle=True)
val_loader = DataLoader(dataset=val_dataset,
                          batch_size=2,
                          num_workers=4,
                          shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                          batch_size=6,
                          num_workers=4,
                          shuffle=True)

In [6]:
#for X,Y,Z in train_loader:
#    print(torch.min(Z))
#    break

In [7]:
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(0.01)
        
epochs     = 500
#weights    = torch.Tensor([10, 50, 50, 40, 30,20, 50, 60, 60, 30, 50,50, 40, 40, 50, 30,40, 50, 40, 50, 50, 0]).cuda()
criterion = nn.CrossEntropyLoss()
#fcn_model = FCN(n_class=n_class)
#fcn_model.apply(init_weights)
fcn_model = torch.load('best_model')
optimizer = optim.Adam(fcn_model.parameters(), lr=5e-3)

In [8]:
IU_scores    = np.zeros((epochs, n_class))
pixel_scores = np.zeros(epochs)
use_gpu = torch.cuda.is_available()
#if use_gpu:
   # fcn_model = fcn_model.cuda()

def train():
    for epoch in range(epochs):
        ts = time.time()
        for iter, (X, tar, Y) in enumerate(train_loader):
            optimizer.zero_grad()

            if use_gpu:
                inputs = Variable(X).cuda()
                labels = Variable(Y).cuda().long()
            else:
                inputs, labels = Variable(X), Variable(Y).long()

            outputs = fcn_model(inputs).double()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            if iter % 10 == 0:
                print("epoch{}, iter{}, loss: {}".format(epoch, iter, loss.item()))
        
        print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        torch.save(fcn_model, 'best_model')

        val(epoch)


def val(epoch):
    fcn_model.eval()
    total_ious = []
    pixel_accs = []
    for iter, (X,tar, Y) in enumerate(val_loader):
        if use_gpu:
            inputs = Variable(X).cuda()
        else:
            inputs = Variable(X)

        output = nn.Softmax(dim=1)(fcn_model(inputs))
        output = output.data.cpu().numpy()

        N, _, h, w = output.shape
        
        pred = output.transpose(0, 2, 3, 1).reshape(-1, n_class).argmax(axis=1).reshape(N, h, w)

        target = Y.cpu().numpy().reshape(N, h, w)
        for p, t in zip(pred, target):
            total_ious.append(iou(p, t))
            pixel_accs.append(pixel_acc(p, t))

    # Calculate average IoU
    total_ious = np.array(total_ious).T  # n_class * val_len
    ious = np.nanmean(total_ious, axis=1)
    pixel_accs = np.array(pixel_accs).mean()
    print("epoch{}, pix_acc: {}, meanIoU: {}, IoUs: {}".format(epoch, pixel_accs, np.nanmean(ious), ious))
    IU_scores[epoch] = ious
   # np.save(os.path.join(score_dir, "meanIU"), IU_scores)
    pixel_scores[epoch] = pixel_accs
  #  np.save(os.path.join(score_dir, "meanPixel"), pixel_scores)

In [9]:
def iou(pred, target):
    ious = []
    for cls in range(n_class):
        pred_inds = pred == cls
        target_inds = target == cls
        intersection = pred_inds[target_inds].sum()
        union = pred_inds.sum() + target_inds.sum() - intersection
        if union == 0:
            ious.append(float('nan'))  # if there is no ground truth, do not include in evaluation
        else:
            ious.append(float(intersection) / max(union, 1))
        # print("cls", cls, pred_inds.sum(), target_inds.sum(), intersection, float(intersection) / max(union, 1))
    return ious


def pixel_acc(pred, target):
    correct = (pred == target).sum()
    total   = (target == target).sum()
    return correct / total


if __name__ == "__main__":
    #val(0)  # show the accuracy before training
    train()

epoch0, iter0, loss: 0.4463502178779811
epoch0, iter10, loss: 0.6457117483242868
epoch0, iter20, loss: 0.5607968049222563
epoch0, iter30, loss: 0.5167275849805356
epoch0, iter40, loss: 0.4563422496853593
epoch0, iter50, loss: 0.20631767234187015
epoch0, iter60, loss: 0.677944108788044
epoch0, iter70, loss: 0.660041156974156
epoch0, iter80, loss: 0.23157649643485556
epoch0, iter90, loss: 0.5799989332206382
epoch0, iter100, loss: 0.41484752096266614
epoch0, iter110, loss: 0.340049915035017
epoch0, iter120, loss: 0.24488240683953477
epoch0, iter130, loss: 0.5783911667578869
epoch0, iter140, loss: 0.3765303666804925
epoch0, iter150, loss: 0.42706544865266616
epoch0, iter160, loss: 0.30958036189881116
epoch0, iter170, loss: 1.331956568425338
epoch0, iter180, loss: 0.4070241424622282
epoch0, iter190, loss: 0.3864539001069456
epoch0, iter200, loss: 0.6465504740130822
epoch0, iter210, loss: 0.4541193064501633
epoch0, iter220, loss: 0.3928602737734199
epoch0, iter230, loss: 0.5244891888912123
e

KeyboardInterrupt: 