In [1]:
# Mounting your Google Drive is optional, and you could also simply copy and
# upload the data to your colab instance. This manula upload is also easy to do, 
# but you will have to figure out how to do it.
from google.colab import drive
drive.mount('/content/gdrive/')

KeyboardInterrupt: ignored

In [0]:
import os
if not os.path.exists("/content/gdrive/My Drive/CS_543_MP4"):
    os.makedirs("/content/gdrive/My Drive/CS_543_MP4")
os.chdir("/content/gdrive/My Drive/CS_543_MP4")

In [0]:
ls

In [0]:
import glob
import os
import numpy as np
import seaborn as sns
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, average_precision_score

from PIL import Image
import torch
from torch import nn
from torch.utils import data
from torchvision.transforms import ToTensor
from torch.autograd import Variable

import copy
import torchvision.models as models

In [0]:
torch.cuda.is_available()

In [0]:
DATASET_PATH = 'data/sbd/'
IS_GPU = True
TOTAL_CLASSES = 9

class SegmentationDataset(data.Dataset):
    """
    Data loader for the Segmentation Dataset. If data loading is a bottleneck, 
    you may want to optimize this in for faster training. Possibilities include
    pre-loading all images and annotations into memory before training, so as 
    to limit delays due to disk reads.
    """
    def __init__(self, split="train", data_dir=DATASET_PATH):
        assert(split in ["train", "val", "test"])
        self.img_dir = os.path.join(data_dir, split)
        self.classes = []
        with open(os.path.join(data_dir, 'classes.txt'), 'r') as f:
          for l in f:
            self.classes.append(l.rstrip())
        self.n_classes = len(self.classes)
        self.split = split
        self.data = glob.glob(self.img_dir + '/*.jpg') 
        self.data = [os.path.splitext(l)[0] for l in self.data]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img = Image.open(self.data[index] + '.jpg')
        gt = Image.open(self.data[index] + '.png')
        
        img = ToTensor()(img)
        gt = torch.LongTensor(np.asarray(gt)).unsqueeze(0)
        return img, gt

In [0]:
# #########
# TODO: design your own network here. The expectation is to write from scratch. But it's okay to get some inspiration 
# from conference paper. The bottom line is that you will not just copy code from other repo
# #########

import torch.nn as nn
import torch.nn.functional as F

def Conv(Cin,Cout):
    down = nn.Sequential(   
        nn.Conv2d(Cin,Cout,3,padding=1,stride=1),
        nn.BatchNorm2d(Cout),
        nn.ReLU(inplace=True), 
    )
    return down

    
class ResNet(nn.Module):

    def __init__(self): # feel free to modify input paramters
        super(ResNet, self).__init__()
        resnet18 = models.resnet18(pretrained=True)

        for param in resnet18.parameters():
          param.requires_grad = False

        resnet18 = list(resnet18.children())
        self.preprocess = Conv(3, 64)
        self.down0 = nn.Sequential(*resnet18[:3])
        self.down1 = nn.Sequential(*resnet18[3:5])
        self.down2 = resnet18[5]
        self.down3 = resnet18[6]
        self.down4 = resnet18[7]
        
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.conv1 = Conv(256 + 512, 512)
        self.conv2 = Conv(128 + 512, 256)
        self.conv3 = Conv(64 + 256, 256)
        self.conv4 = Conv(64 + 256, 128)
        self.conv5 = Conv(64 + 128, 64)
        self.conv6 = nn.Conv2d(64, 9, 1)

    def forward(self, x):
        x_raw = self.preprocess(x)

        x0 = self.down0(x)            
        x1 = self.down1(x0)
        x2 = self.down2(x1)
        x3 = self.down3(x2)        
        x4 = self.down4(x3)
        
        x = self.upsample(x4)
        x = torch.cat((x, x3), dim=1)
        x = self.conv1(x)
 
        x = self.upsample(x)
        x = torch.cat((x, x2), dim=1)
        x = self.conv2(x)

        x = self.upsample(x)
        x = torch.cat((x, x1), dim=1)
        x = self.conv3(x)

        x = self.upsample(x)
        x = torch.cat((x, x0), dim=1)
        x = self.conv4(x)
        
        x = self.upsample(x)
        x = torch.cat((x, x_raw), dim=1)
        x = self.conv5(x)        
        
        x = self.conv6(x)

        return x

In [0]:
def segmentation_eval(gts, preds, classes, plot_file_name):
    """
    @param    gts               numpy.ndarray   ground truth labels
    @param    preds             numpy.ndarray   predicted labels
    @param    classes           string          class names
    @param    plot_file_name    string          plot file names
    """
    ious, counts = compute_confusion_matrix(gts, preds)
    aps = compute_ap(gts, preds)
    plot_results(counts, ious, aps, classes, plot_file_name)
    for i in range(len(classes)):
        print('{:>20s}: AP: {:0.2f}, IoU: {:0.2f}'.format(classes[i], aps[i], ious[i]))
    print('{:>20s}: AP: {:0.2f}, IoU: {:0.2f}'.format('mean', np.mean(aps), np.mean(ious)))
    return aps, ious

def plot_results(counts, ious, aps, classes, file_name):
    fig, ax = plt.subplots(1,1)
    conf = counts / np.sum(counts, 1, keepdims=True)
    conf = np.concatenate([conf, np.array(aps).reshape(-1,1), 
                           np.array(ious).reshape(-1,1)], 1)
    conf = conf * 100.
    sns.heatmap(conf, annot=True, ax=ax, fmt='3.0f') 
    arts = [] 
    # labels, title and ticks
    _ = ax.set_xlabel('Predicted labels')
    arts.append(_)
    _ = ax.set_ylabel('True labels')
    arts.append(_)
    _ = ax.set_title('Confusion Matrix, mAP: {:5.1f}, mIoU: {:5.1f}'.format(
      np.mean(aps)*100., np.mean(ious)*100.))
    arts.append(_)
    _ = ax.xaxis.set_ticklabels(classes + ['AP', 'IoU'], rotation=90)
    arts.append(_)
    _ = ax.yaxis.set_ticklabels(classes, rotation=0)
    arts.append(_)
    fig.savefig(file_name, bbox_inches='tight')

def compute_ap(gts, preds):
    aps = []
    for i in range(preds.shape[1]):
      ap, prec, rec = calc_pr(gts == i, preds[:,i:i+1,:,:])
      aps.append(ap)
    return aps

def calc_pr(gt, out, wt=None):
    gt = gt.astype(np.float64).reshape((-1,1))
    out = out.astype(np.float64).reshape((-1,1))

    tog = np.concatenate([gt, out], axis=1)*1.
    ind = np.argsort(tog[:,1], axis=0)[::-1]
    tog = tog[ind,:]
    cumsumsortgt = np.cumsum(tog[:,0])
    cumsumsortwt = np.cumsum(tog[:,0]-tog[:,0]+1)
    prec = cumsumsortgt / cumsumsortwt
    rec = cumsumsortgt / np.sum(tog[:,0])
    ap = voc_ap(rec, prec)
    return ap, rec, prec

def voc_ap(rec, prec):
    rec = rec.reshape((-1,1))
    prec = prec.reshape((-1,1))
    z = np.zeros((1,1)) 
    o = np.ones((1,1))
    mrec = np.vstack((z, rec, o))
    mpre = np.vstack((z, prec, z))

    mpre = np.maximum.accumulate(mpre[::-1])[::-1]
    I = np.where(mrec[1:] != mrec[0:-1])[0]+1;
    ap = np.sum((mrec[I] - mrec[I-1])*mpre[I])
    return ap

def compute_confusion_matrix(gts, preds):
    preds_cls = np.argmax(preds, 1)
    gts = gts[:,0,:,:]
    conf = confusion_matrix(gts.ravel(), preds_cls.ravel())
    inter = np.diag(conf)
    union = np.sum(conf, 0) + np.sum(conf, 1) - np.diag(conf)
    union = np.maximum(union, 1)
    return inter / union, conf

In [0]:
# Colab has GPUs, you will have to move tensors and models to GPU.
device = torch.device("cuda:0")

In [0]:
#############
#TODO: initialize your model 
model = ResNet().to(device) 

In [0]:
# This is a trivial semantic segmentor. For eqch pixel location it computes the 
# distribution of the class label in the training set and uses that as the 
# prediction. Quite unsuprisingly it doesn't perform very well. Though we provide
# this code so that you can understand the data formats for the benchmarking 
# functions.
def simple_train():
    train_dataset = SegmentationDataset(split='train')
    train_dataloader = data.DataLoader(train_dataset, batch_size=1, 
                                       shuffle=True, num_workers=4, 
                                       drop_last=True)
    counts = np.zeros((train_dataset.n_classes, 224, 288))
    N = 0
    for i, batch in enumerate(tqdm(train_dataloader)):
      img, gt = batch
      gt = gt.cpu().numpy()
      for j in range(train_dataset.n_classes):
          counts[j,:,:] += gt[0,0,:,:] == j
      N += 1
    model = counts / N
    
    return model

def simple_predict(split, model):
    dataset = SegmentationDataset(split=split, data_dir=DATASET_PATH)
    dataloader = data.DataLoader(dataset, batch_size=1, shuffle=False, 
                                 num_workers=0, drop_last=False)
    gts, preds = [], []
    for i, batch in enumerate(tqdm(dataloader)):
      img, gt = batch
      gt = gt.cpu().numpy()
      gts.append(gt[0,:,:,:])
      preds.append(model)

    gts = np.array(gts)
    preds = np.array(preds)
    return gts, preds, list(dataset.classes)
  


In [0]:
def eval(model, data_loader, is_gpu):

    gts, preds = [], []
    with torch.no_grad():
      for i, batch in enumerate(tqdm(dataloader)):
        img, gt = batch
        if is_gpu:
            img = img.cuda()
            
        outputs = model(img)
        gt = gt.numpy()
        gts.append(gt[0,:,:,:])
        outputs = outputs.data.cpu().numpy()
        preds.append(outputs[0,:,:,:])

    gts = np.array(gts)
    preds = np.array(preds)
    return gts, preds, list(test_dataset.classes)


In [0]:
# Load Data
train_dataset = SegmentationDataset(split='train')
train_dataloader = data.DataLoader(train_dataset, batch_size=1, 
                                    shuffle=True, num_workers=4, 
                                    drop_last=True)

dataset = SegmentationDataset(split='val', data_dir=DATASET_PATH)
dataloader = data.DataLoader(dataset, batch_size=1, shuffle=False, 
                              num_workers=0, drop_last=False)

test_dataset = SegmentationDataset(split='test', data_dir=DATASET_PATH)
test_loader = data.DataLoader(test_dataset, batch_size=1, shuffle=False, 
                              num_workers=0, drop_last=False)


In [0]:
# 3. Define a Loss function and optimizer
import torch.optim as optim
criterion = nn.CrossEntropyLoss()

# Tune the learning rate.
# See whether the momentum is useful or not
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.005)
plt.ioff()
fig = plt.figure()
mAP_over_epochs = []
mIoU_over_epochs = []


In [0]:
########################################################################
# TODO: Implement your training cycles, make sure you evaluate on validation 
# dataset and compute evaluation metrics every so often. 
# You may also want to save models that perform well.
EPOCHS = 40

def training():
    best_loss = 100.0
    for epoch in tqdm(range(EPOCHS), total=EPOCHS):
        running_loss = 0.0
        for i, data in enumerate(train_dataloader, 0):
          inputs, labels = data
          
          if IS_GPU:
            inputs = inputs.cuda()
            labels = labels.cuda()

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = model(inputs)
          labels = labels.squeeze(1)
          # print(outputs.shape)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          # print statistics
          running_loss += loss.item()

        # Normalizing the loss by the total number of train batches
        running_loss/=len(train_dataloader)
        print('Training Epoch [%d] loss: %.3f' %
              (epoch + 1, running_loss))

        gts, preds, _ = eval(model, dataloader, IS_GPU)

        ious, counts = compute_confusion_matrix(gts, preds)
        aps = compute_ap(gts, preds)
        print('Test result on Validation images:')
        print('{:>0s}: AP: {:0.2f}, IoU: {:0.2f}'.format('mean', np.mean(aps), np.mean(ious)))

        if running_loss < best_loss:
          print("saving best model \n")
          best_loss = running_loss
          best_model_wts = copy.deepcopy(model.state_dict())


        mAP_over_epochs.append(np.mean(aps))
        mIoU_over_epochs.append(np.mean(ious))
    # -----------------------------

    # Plot train loss over epochs and val set accuracy over epochs
    # Nothing to change here
    # -------------
    plt.subplot(2, 1, 1)
    plt.ylabel('mAP')
    plt.plot(np.arange(EPOCHS), mAP_over_epochs, 'k-')
    plt.title('mAP and mIoU')
    plt.xticks(np.arange(EPOCHS, dtype=int))
    plt.grid(True)

    plt.subplot(2, 1, 2)
    plt.plot(np.arange(EPOCHS), mIoU_over_epochs, 'b-')
    plt.ylabel('mIoU')
    plt.xlabel('Epochs')
    plt.xticks(np.arange(EPOCHS, dtype=int))
    plt.grid(True)
    plt.savefig("plotq2.png")
    plt.close(fig)
    print('Finished Training')
    # -------------

    model.load_state_dict(best_model_wts)

    return model


In [0]:
# ########################################################################
# # TODO: Evaluate your result, and report Mean average precision on test dataset 
# # using provided helper function. Here we show how we can train and evaluate the 
# # simple model that we provided on the validation set. You will want to report
# # performance on the validation set for the variants you tried, and the 
# # performance of the final model on the test set.

best_model = training() 
gts, preds, classes  = eval(best_model, test_loader, IS_GPU)
aps, ious = segmentation_eval(gts, preds, classes, 'cs543-simple-val.pdf')