In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import models, transforms
from tqdm import tqdm
from collections import OrderedDict
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.5.0
Torchvision Version:  0.6.0


In [2]:
#Paths
from pathlib import Path
base_folder = Path('.')
data_folder = base_folder/'til2020'
train_imgs_folder = data_folder/'train'/'train'
train_annotations = data_folder/'train.json'
val_imgs_folder = data_folder/'val'/'val'
val_annotations = data_folder/'val.json'

train_pickle = data_folder/'train.p'/'train.p'
val_pickle = data_folder/'val.p'/'val.p'

save_model_folder = base_folder/'ckpts'
load_model_folder = data_folder

In [3]:
class Config():
    input_shape = (3,224,224) #hardcoded into pretrained, cannot change
    categories = ['tops', 'trousers', 'outerwear', 'dresses', 'skirts']
    batch_size = 32
    device = 'cuda'
    epochs = 10
    wt_decay = 5e-4
    dims_list = [(7,7),(14,14)] #hardcoded into structure, cannot change
    aspect_ratios = [(1,1), (1,2), (2,1)]
    intermediate_layer = 6 #layer3 output res of pretrained (22nd repeating unit) is 14x14 for some reason almost all resnet variants have 14 at this layer

In [4]:
'''
close adaptation of the one in the example, just different backbone
padding=(1,1) enables padding in both dimensions like 'same'
padding=0 disables padding like 'valid'
purpose of kernel_regularizer is done by the optimizer in pyTorch
'''
class ObjectCNNModel(nn.Module):
    def __init__(self,conf):
        super(ObjectCNNModel,self).__init__()
        self.intermediate_layer = conf.intermediate_layer
        self.dims_list = conf.dims_list
        self.aspect_ratios = conf.aspect_ratios
        self.categories = conf.categories

        #remove last layer, gain access to intermediates
        #pretrained_model = torchvision.models.resnext101_32x8d(pretrained=True)
        pretrained_model = torchvision.models.resnext50_32x4d(pretrained=True)
        self.pretrained = nn.ModuleList(list(pretrained_model.children())[:-1])

        from torch.nn import Conv2d,BatchNorm2d,LeakyReLU,ConvTranspose2d
        def add_activation(layers):
            return [l for x in layers for l in (
                x,
                BatchNorm2d(x.out_channels),
                LeakyReLU(0.01),
            )] #[l1,bn,relu,l2,bn,relu,l3,bn,relu...]

        self.upsampler = nn.Sequential(*add_activation([
            Conv2d(2048,512,1,padding=(1,1)),
            Conv2d(512,1024,3,padding=(1,1)),
            Conv2d(1024,512,1,padding=(1,1)),
            Conv2d(512,1024,3,padding=(1,1)),
            Conv2d(1024,512,1,padding=(1,1)),
        ]))
        self.dim_7x7_layer = nn.Sequential(*add_activation([Conv2d(512,2048,3,padding=(1,1))]))
        self.transposer = nn.Sequential(*add_activation([
            Conv2d(512,256,1,padding=(1,1)),
            ConvTranspose2d(256,512,4,stride=(2,2),padding=(3,3))
        ]))
        self.dim_14x14_layer = nn.Sequential(*add_activation([
            Conv2d(1024+512,256,1,padding=0), #torch.cat((N,512,14,14),(N,1024,14,14))
            Conv2d(256,512,3,padding=0),
            Conv2d(512,256,1,padding=0),
            Conv2d(256,512,3,padding=(1,1)),
            Conv2d(512,256,1,padding=(1,1)),
            Conv2d(256,512,3,padding=(1,1)),
        ]))
        self.predictors_7x7 = nn.ModuleList()
        self.predictors_14x14 = nn.ModuleList()
        for aspect in self.aspect_ratios:
            #no activation here
            self.predictors_7x7.append(nn.ModuleList([
                Conv2d(2048,1,1), #objectness
                Conv2d(2048,len(self.categories),1), #class
                Conv2d(2048,4,1) #bbox
            ]))
            self.predictors_14x14.append(nn.ModuleList([
                Conv2d(512,1,1), #objectness
                Conv2d(512,len(self.categories),1), #class
                Conv2d(512,4,1) #bbox
            ]))
    
    def forward(self,x):
        intermediate_output = torch.zeros(x.shape[0],1024,14,14)
        for i,layer in enumerate(self.pretrained):
            x = layer(x)
            if i == self.intermediate_layer: intermediate_output = x
        backbone_output = x

        upsample = self.upsampler(backbone_output)
        tens_7x7 = torch.add(self.dim_7x7_layer(upsample),backbone_output)
        x = torch.cat((self.transposer(upsample),intermediate_output),dim=1)
        tens_14x14 = self.dim_14x14_layer(x)

        pred_7x7 = []
        for predictor in self.predictors_7x7:
            pred_7x7.append(torch.cat((
                predictor[0](tens_7x7),
                predictor[1](tens_7x7),
                predictor[2](tens_7x7),
            ),dim=1))
        pred_7x7 = torch.stack(pred_7x7,dim=4) #[32, 10, 7, 7, 3]
        pred_7x7 = pred_7x7.permute(0,2,3,4,1)

        pred_14x14 = []
        for predictor in self.predictors_14x14:
            pred_14x14.append(torch.cat((
                predictor[0](tens_14x14),
                predictor[1](tens_14x14),
                predictor[2](tens_14x14),
            ),dim=1))
        pred_14x14 = torch.stack(pred_14x14,dim=4)
        pred_14x14 = pred_14x14.permute(0,2,3,4,1)
        #final result: (N,H,W,Aspect Ratio,Losses), Losses[10] = [objectness,categories*5,bbox*4]
        return {'7x7':pred_7x7,'14x14':pred_14x14}

#ypred: (N,H,W,Aspect Ratio,Losses), Losses[10] = [objectness,categories*5,bbox*4]
#ytrue: ( batch, i, j, aspect_ratios, 1+4+numclasses+2). For a batch,i,j, we get #aspect_ratios vectors of length 9 (two more for objectness and cat/loc indicators)
def combined_loss(ytrue, ypred):
    obj_loss_weight = 1.0
    cat_loss_weight = 1.0
    loc_loss_weight = 1.0

    end_cat = len(Config.categories) + 1

    objloss_indicators = ytrue[:,:,:,:,-2:-1]
    catlocloss_indicators = ytrue[:,:,:,:,-1:] #masks of some sort

    ytrue_obj, ypred_obj = ytrue[:,:,:,:,:1], ypred[:,:,:,:,:1]
    ytrue_obj = torch.where( objloss_indicators != 0, ytrue_obj, torch.zeros_like(ytrue_obj) )
    ypred_obj = torch.where( objloss_indicators != 0, ypred_obj, torch.zeros_like(ypred_obj) )
    objectness_loss = nn.BCEWithLogitsLoss()(ypred_obj,ytrue_obj)

    ytrue_cat, ypred_cat = ytrue[:,:,:,:,1:end_cat], ypred[:,:,:,:,1:end_cat]
    ytrue_cat = torch.where( catlocloss_indicators != 0, ytrue_cat, torch.zeros_like(ytrue_cat) )
    ypred_cat = torch.where( catlocloss_indicators != 0, ypred_cat, torch.zeros_like(ypred_cat) )
    categorical_loss = nn.CrossEntropyLoss()(ypred_cat.permute(0,4,1,2,3),ytrue_cat.permute(0,4,1,2,3).max(dim=1)[1]) #torch only supports NCHW... not NHWC...
    #It is useful when training a classification problem with C classes. If provided, the optional argument weight should be a 1D Tensor assigning weight to each of the classes. This is particularly useful when you have an unbalanced training set.

    ytrue_loc, ypred_loc = ytrue[:,:,:,:,-6:-2], ypred[:,:,:,:,-4:]
    ytrue_loc = torch.where( catlocloss_indicators != 0, ytrue_loc, torch.zeros_like(ytrue_loc) )
    ypred_loc = torch.where( catlocloss_indicators != 0, ypred_loc, torch.zeros_like(ypred_loc) )
    localisation_loss = nn.SmoothL1Loss()(ypred_loc,ytrue_loc)

    return obj_loss_weight*objectness_loss + cat_loss_weight*categorical_loss + loc_loss_weight*localisation_loss

In [5]:
model = ObjectCNNModel(Config)

from torchsummaryX import summary
df = summary(backbone,torch.zeros((1,)+(Config.input_shape)))
print(df.loc[df.index.str.contains(pat = 'layer3')])
list(backbone.children())[:][6]
from torchsummaryX import summary
df = summary(model,torch.zeros((16,)+(Config.input_shape)))

In [6]:
from loader import TILSequence
from torch.utils.data import DataLoader
from sampling import iou,modified_yolo_posneg_sampling
from augment import augmenter,aug_colorbalance,aug_contrast,aug_brightness,aug_sharpness,aug_horizontal_flip,aug_crop,aug_translate
from encoder import encode_label

aug_default = augmenter(
    [aug_colorbalance,aug_contrast,aug_brightness,aug_sharpness,aug_horizontal_flip,aug_translate],
    [0.2,0.2,0.2,0.2,0.5,0.2]
)
label_encoder = lambda y: encode_label(y, Config.dims_list, Config.aspect_ratios, iou, modified_yolo_posneg_sampling, Config.categories)
preproc_fn = lambda x: x / 255.
loader = DataLoader(TILSequence(train_imgs_folder,train_annotations,Config.batch_size,aug_default,(224,224),label_encoder,preproc_fn),batch_size=None)
val_loader = DataLoader(TILSequence(val_imgs_folder,val_annotations,Config.batch_size,aug_default,(224,224),label_encoder,preproc_fn),batch_size=None)#batching done by custom dataset

In [7]:
model.train()
model.to(Config.device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)

for i in range(Config.epochs):
    epoch_loss = 0.0
    running_loss = 0.0
    ave_loss = 0.0
    tqdm.write(f'EPOCH {i}')
    runner = tqdm(loader)
    for n,batch in enumerate(runner):
        x = batch['inputs'].permute(0,3,1,2).type(torch.float).to(Config.device) #convert (N,H,W,C) to (N,C,H,W)
        y7 = batch['7x7'].to(Config.device)
        y14 = batch['14x14'].to(Config.device)

        optimizer.zero_grad()
        output = model(x)
        loss = combined_loss(output['7x7'],y7) + combined_loss(output['14x14'],y14)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        running_loss += loss.item()
        
        if(n%30==0):
            ave_loss = running_loss/30
            running_loss = 0.0
        runner.set_description(f'Ave Loss: {ave_loss}|Loss: {loss.item()}')
    
    tqdm.write(f'EPOCH LOSS: {epoch_loss/len(loader)}, VALIDATING...')

    val_loss = 0.0
    with torch.set_grad_enabled(False):
        for batch in val_loader:
            x = batch['inputs'].permute(0,3,1,2).type(torch.float).to(Config.device)
            y7 = batch['7x7'].to(Config.device)
            y14 = batch['14x14'].to(Config.device)

            output = model(x)
            loss = combined_loss(output['7x7'],y7) + combined_loss(output['14x14'],y14)
            val_loss += loss.item()*Config.batch_size
    tqdm.write(f'VAL LOSS: {val_loss/len(val_loader)}')


    


0%|          | 0/258 [00:04<?, ?it/s]


RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'target' in call to _thnn_nll_loss2d_forward

In [None]:
a = test[0]

In [None]:
type(a['inputs'])

In [None]:
a = next(loader)