In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch import Tensor
from torch.optim import lr_scheduler
from torch.autograd import Variable

import torchvision
from torchvision import datasets, models, transforms, utils
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor

import time
import os
import numpy as np

import sklearn.metrics

from vocparseclslabels import PascalVOC

from typing import Callable, Optional

from PIL import Image

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [2]:
# Dataset class for getting image tensors and labels of training and validation data
class dataset_voc(Dataset):
    def __init__(self, root_dir, trvaltest, transform=None):
        self._labels = []
        self._imgfilenames = []
        self._root_dir = root_dir
        self._transform = transform
        
        trvaltest = ["train", "val", "test"][trvaltest] # going from integer to string identifier
        
        pVOC = PascalVOC(root_dir) # object for getting the correct file names
        im_dict = {}
        for i, im_class in enumerate(pVOC.list_image_sets()):
            files = pVOC.imgs_from_category_as_list(im_class, trvaltest) # all files with class im_class in tr/val/test data
            for file in files:
                if file in im_dict:
                    im_dict[file][i] = 1
                else:
                    im_dict[file] = np.zeros(20)
                    im_dict[file][i] = 1

        for key in im_dict:
            self._labels.append(im_dict[key])
            self._imgfilenames.append(key)

    def __len__(self):
        return len(self._imgfilenames)

    def __getitem__(self, idx):
        image = Variable(self._im2tensor(self._imgfilenames[idx]))
        label = Variable(torch.tensor(self._labels[idx]).float())
        sample = {"image": image, "label": label, "filename": self._imgfilenames[idx]}

        return sample
    
    def _im2tensor(self, im_name):
        img_path = self._root_dir + "JPEGImages/" + im_name +".jpg"
        image = Image.open(img_path)
        if self._transform:
            image = self._transform(image)
        else:
            transforms.ToTensor()(image)
        return image

In [None]:
# Custom loss function which is essentially a weighted binary cross entropy loss
class BCE_custom(nn.modules.loss._Loss):
    def __init__(self, reduction: str = "mean") -> None:
        super(BCE_custom, self).__init__()
        self._reduction = reduction
        self._BCE = nn.BCELoss(reduction = 'none') # Binary cross entropy for calculating cross entropy of every class prediction

    def forward(self, input_: Tensor, target: Tensor) -> Tensor:
        BCE_vec = self._BCE(input_, target)
        trues = torch.mean(BCE_vec[target.bool()]) # average loss where there should be a label
        falses = torch.mean(BCE_vec[~target.bool()]) # average loss where there should not be a label
        return trues + falses # this way guessing 0 for all is only 50% correct

In [3]:
def evaluate_meanavgprecision(model, dataloader, loss_func, device, numcl):
    model.eval() # Makes the forward pass more efficient for evaluation

    curcount = 0
    accuracy = 0
    idx_start = 0
    
    num_items = len(dataloader.dataset)
    pred_arr = torch.from_numpy(np.zeros(shape=(num_items, numcl))).to(device) # prediction scores for each class. each row is a list of scores. one score per image
    label_arr = torch.from_numpy(np.zeros(shape=(num_items, numcl))).to(device) # labels scores for each class. each row is a list of labels. one label per image
    fnames = []  # filenames as they come out of the dataloader
    losses = []
    
    with torch.no_grad(): # We need no gradients for evaluation, so this is faster
        for batch_idx, data in enumerate(dataloader):
            if (batch_idx % 100 == 0) and (batch_idx >= 100):
                print(f"at val batchindex: {batch_idx}")

            # Extracting data
            fname = data["filename"]
            inputs = data["image"].to(device)
            labels = data["label"].to(device)
            # Calculating what we need
            pred = model(inputs.to(device))
            loss = loss_func(pred, labels)
            # Storing values
            batch_size = len(fname)
            idx_end = idx_start + batch_size
            label_arr[idx_start:idx_end] = labels
            pred_arr[idx_start:idx_end] = pred
            idx_start += batch_size

            fnames += fname # concatenating lists
            losses.append(loss.item())
            
    avgprecs = np.zeros(numcl)  # average precision for each class
    for c in range(numcl):
        avgprecs[c] = sklearn.metrics.average_precision_score(y_true = label_arr[:, c].to("cpu"), y_score = pred_arr[:, c].to("cpu"))
        
    return avgprecs, np.mean(losses), label_arr, pred_arr, fnames

In [8]:
def traineval2_model_nocv(dataloader_train, dataloader_test,  model, loss_func, optimizer, scheduler, num_epochs, device, numcl):
    best_measure = 0
    best_epoch = -1

    trainlosses = []
    testlosses = []
    testperfs = []

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("----------")

        avgloss = train_epoch(model, dataloader_train, loss_func, device, optimizer)
        trainlosses.append(avgloss)

        if scheduler is not None:
            scheduler.step()

        perfmeasure, testloss, concat_labels, concat_pred, fnames = evaluate_meanavgprecision(
            model, dataloader_test, loss_func, device, numcl)
        testlosses.append(testloss)
        testperfs.append(perfmeasure)

        print(f"Epoch: {epoch + 1} \n Classwise perfmeasure: {perfmeasure}")

        avgperfmeasure = np.mean(perfmeasure)
        print(f"Avgperfmeasure: {avgperfmeasure}, train {avgloss}, test {testloss}")

        if avgperfmeasure > best_measure:
            bestweights = model.state_dict()
            best_measure = avgperfmeasure
            best_epoch = epoch

    return best_epoch, best_measure, bestweights, trainlosses, testlosses, testperfs

In [9]:
def train_epoch(model, trainloader, loss_func, device, optimizer):
    model.train()

    losses = []
    for batch_idx, data in enumerate(trainloader):
        inputs = data["image"].to(device)
        labels = data["label"].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = model(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
        
        
        losses.append(loss.item())

    return np.mean(losses)

In [10]:
# --------------------- SETUP BLOCK --------------------------
torch.manual_seed(1)
config = dict()

# True #TODO change this to True for training on the cluster, eh
config["use_gpu"] = True
config["lr"] = 0.005
config["batchsize_train"] = 16
config["batchsize_val"] = 64
config["maxnumepochs"] = 35

config["scheduler_stepsize"] = 10
config["scheduler_factor"] = 0.3

# kind of a dataset property
config["numcl"] = 20

# data augmentations
data_transforms = {
    "train": transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    "val": transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# datasets
root_dir = "data/VOCdevkit/VOC2012/"
image_datasets = {}
image_datasets["train"] = dataset_voc(root_dir = root_dir, trvaltest=0, transform=data_transforms["train"])
image_datasets["val"] = dataset_voc(root_dir = root_dir, trvaltest=1, transform=data_transforms["val"])

# dataloaders
dataloaders = {}
dataloaders["train"] = DataLoader(image_datasets["train"], num_workers=0, batch_size = config["batchsize_train"])
dataloaders["val"] = DataLoader(image_datasets["val"], num_workers=0, batch_size = config["batchsize_val"])

# device
if True == config["use_gpu"]:
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

# model
model = models.resnet18(pretrained=True) # pretrained resnet18
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Sequential( # rewriting last layer to have 20 sigmoid outputs
    nn.Linear(512, 20),
    nn.Sigmoid()
)
model = model.to(device)

#loss_func = BCE_custom()
loss_func = nn.BCELoss()

#optimizer = optim.Adam(params = model.fc.parameters(), lr = config["lr"])
optimizer = optim.SGD(params = model.fc.parameters(), lr = config["lr"])

# Decay LR by a factor of 0.3 every X epochs
lr_sc = lr_scheduler.StepLR(optimizer, step_size = config["scheduler_stepsize"], gamma = config["scheduler_factor"])

In [11]:
best_epoch, best_measure, bestweights, trainlosses, testlosses, testperfs = traineval2_model_nocv(
        dataloaders["train"], dataloaders["val"], model, loss_func, optimizer, lr_sc, num_epochs=config["maxnumepochs"], device=device, numcl=config["numcl"])

Epoch 1/35
----------
Epoch: 1 
 Classwise perfmeasure: [0.06817351 0.16913744 0.06006747 0.05396996 0.05705205 0.02850473
 0.09460633 0.06232385 0.06664164 0.04120203 0.0350299  0.09094914
 0.04432153 0.0445771  0.45258757 0.03659378 0.01801119 0.04029494
 0.04313908 0.04999609]
Avgperfmeasure: 0.07785896670335432, train 0.27723027447332216, test 0.2554746129355588
Epoch 2/35
----------


KeyboardInterrupt: 