In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch, os, numpy as np
import torch.optim as optim
import torch.nn as nn
import cv2 
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
import math

os.environ["CUDA_VISIBLE_DEVICES"]="1"
Device=torch.cuda.current_device()
torch.cuda.empty_cache()

from loader_weakly_image import VOC2012_Aug_Image_caffe

In [3]:
##############################################################
exper_name = 'ImageBCE_trainvalset'
batch_size = 10
EPOCH = 30
steps = [15,30,45]
gamma = 0.1
#lr = 1e-2
weight_decay = 5e-4
##############################################################
root= './VOCdevkit/VOC2012/ImageSets/Main/'
trainset = VOC2012_Aug_Image_caffe(root = root, mode='train', dataaug=True,
                                   size=(256, 256), scales=(0.75, 1., 1.25, 1.5, 1.75, 2.))

valset = VOC2012_Aug_Image_caffe(root = root, mode='val', dataaug=True,
                                   size=(256, 256), scales=(0.75, 1., 1.25, 1.5, 1.75, 2.))


print(len(trainset))
print(len(valset))
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(valset, batch_size=batch_size, shuffle=True, num_workers=4)

5717
5823


In [6]:
class VGG(nn.Module):
    def __init__(self, features, num_classes=20):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Linear(8*8*512, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )
        self._initialize_weights()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        x = self.sigmoid(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        elif v == 'A':
            layers += []
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)


cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 'A',512, 512, 512, 'M','A', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg11_bn(pretrained=False, **kwargs):
    
    model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn']))
    return model

In [24]:
model = vgg11_bn()
model.to(Device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [25]:
params = model.parameters()
optimizer = torch.optim.SGD(params, lr= 0.1)
#optimizer =torch.optim.Adagrad(params, lr=0.01, lr_decay=0)
criterion = nn.BCELoss(reduction='mean')

In [26]:
def get_accuracy(pred, target):
    acc = (pred==target)*1.0
    acc = torch.sum(acc, 1).float()
    acc = (acc- acc%20) / 20
    acc = torch.sum(acc) / 10
    return acc

def get_prediction(output):
    return torch.round(output)

In [27]:
EPOCH = 100
writer = SummaryWriter()
test_acc = []
for ep in range(1, EPOCH+1):
    test_count = 0
    accuracy = 0 
    acc = 0
    it_count = 0
    model.train()
    for it, (img, target) in enumerate(train_loader):
        
        img = img.to(Device)
        target = target.to(Device)

        optimizer.zero_grad()
        pred = model(img)
        loss = criterion(pred, target)
        loss.backward()
        optimizer.step()
        average_loss =  loss.item() 
        
        if it%100==0:
            pred = get_prediction(pred)
            acc += get_accuracy(pred, target)
            print('EPOCH : {:03d}/{:3d} | Loss : {:.4f}, training_accuracy: {:.4f}'.format(ep, EPOCH, average_loss, acc/(it_count+1)))
            #writer.add_scalars('training_loss', average_loss , it_count)
            it_count +=1
    
    #for it, (img, target)  in enumerate(val_loader):
    #    img = img.to(Device)
    #    target = target.to(Device)
    #    pred = model(img)
    #    pred = get_prediction(pred)
    #    accuracy += get_accuracy(pred, target)
    #    test_count += 1
    #accuracy /= test_count
    #test_acc.append(accuracy)
    #print('Test accuracy : {:.4f}'.format(accuracy))
    if (ep+1) == EPOCH:
        torch.save(model.state_dict(), './weights/Baseline_{}_{}.pt'.format(exper_name, ep))

EPOCH : 001/100 | Loss : 0.8204, training_accuracy: 0.0000
EPOCH : 001/100 | Loss : 0.2486, training_accuracy: 0.0000
EPOCH : 001/100 | Loss : 0.1979, training_accuracy: 0.0333
EPOCH : 001/100 | Loss : 0.2652, training_accuracy: 0.0250
EPOCH : 001/100 | Loss : 0.1834, training_accuracy: 0.0400
EPOCH : 001/100 | Loss : 0.3444, training_accuracy: 0.0333
EPOCH : 002/100 | Loss : 0.2638, training_accuracy: 0.0000
EPOCH : 002/100 | Loss : 0.2415, training_accuracy: 0.0000
EPOCH : 002/100 | Loss : 0.2565, training_accuracy: 0.0000
EPOCH : 002/100 | Loss : 0.2577, training_accuracy: 0.0000
EPOCH : 002/100 | Loss : 0.2242, training_accuracy: 0.0000
EPOCH : 002/100 | Loss : 0.2043, training_accuracy: 0.0167
EPOCH : 003/100 | Loss : 0.2486, training_accuracy: 0.1000
EPOCH : 003/100 | Loss : 0.2421, training_accuracy: 0.1000
EPOCH : 003/100 | Loss : 0.1912, training_accuracy: 0.1333
EPOCH : 003/100 | Loss : 0.2134, training_accuracy: 0.1250
EPOCH : 003/100 | Loss : 0.2473, training_accuracy: 0.12

EPOCH : 024/100 | Loss : 0.1676, training_accuracy: 0.1500
EPOCH : 024/100 | Loss : 0.1538, training_accuracy: 0.1333
EPOCH : 024/100 | Loss : 0.2032, training_accuracy: 0.1000
EPOCH : 024/100 | Loss : 0.2221, training_accuracy: 0.0800
EPOCH : 024/100 | Loss : 0.2573, training_accuracy: 0.0833
EPOCH : 025/100 | Loss : 0.2276, training_accuracy: 0.1000
EPOCH : 025/100 | Loss : 0.1338, training_accuracy: 0.2500
EPOCH : 025/100 | Loss : 0.1689, training_accuracy: 0.2000
EPOCH : 025/100 | Loss : 0.2639, training_accuracy: 0.2000
EPOCH : 025/100 | Loss : 0.1851, training_accuracy: 0.2200
EPOCH : 025/100 | Loss : 0.1568, training_accuracy: 0.2333
EPOCH : 026/100 | Loss : 0.1895, training_accuracy: 0.1000
EPOCH : 026/100 | Loss : 0.1769, training_accuracy: 0.2500
EPOCH : 026/100 | Loss : 0.2149, training_accuracy: 0.2333
EPOCH : 026/100 | Loss : 0.1622, training_accuracy: 0.2750
EPOCH : 026/100 | Loss : 0.1564, training_accuracy: 0.2600
EPOCH : 026/100 | Loss : 0.1754, training_accuracy: 0.25

EPOCH : 047/100 | Loss : 0.1326, training_accuracy: 0.3000
EPOCH : 047/100 | Loss : 0.1446, training_accuracy: 0.2500
EPOCH : 047/100 | Loss : 0.1218, training_accuracy: 0.2600
EPOCH : 047/100 | Loss : 0.1635, training_accuracy: 0.2500
EPOCH : 048/100 | Loss : 0.0457, training_accuracy: 0.6000
EPOCH : 048/100 | Loss : 0.1361, training_accuracy: 0.4500
EPOCH : 048/100 | Loss : 0.1727, training_accuracy: 0.3667
EPOCH : 048/100 | Loss : 0.1341, training_accuracy: 0.3500
EPOCH : 048/100 | Loss : 0.1029, training_accuracy: 0.4000
EPOCH : 048/100 | Loss : 0.1243, training_accuracy: 0.3833
EPOCH : 049/100 | Loss : 0.0923, training_accuracy: 0.4000
EPOCH : 049/100 | Loss : 0.1019, training_accuracy: 0.3500
EPOCH : 049/100 | Loss : 0.1004, training_accuracy: 0.4000
EPOCH : 049/100 | Loss : 0.1349, training_accuracy: 0.4250
EPOCH : 049/100 | Loss : 0.1126, training_accuracy: 0.4200
EPOCH : 049/100 | Loss : 0.0966, training_accuracy: 0.4000
EPOCH : 050/100 | Loss : 0.0972, training_accuracy: 0.20

EPOCH : 070/100 | Loss : 0.0529, training_accuracy: 0.6750
EPOCH : 070/100 | Loss : 0.0557, training_accuracy: 0.6600
EPOCH : 070/100 | Loss : 0.1152, training_accuracy: 0.6167
EPOCH : 071/100 | Loss : 0.0259, training_accuracy: 0.9000
EPOCH : 071/100 | Loss : 0.0354, training_accuracy: 0.8500
EPOCH : 071/100 | Loss : 0.1132, training_accuracy: 0.7000
EPOCH : 071/100 | Loss : 0.0416, training_accuracy: 0.7000
EPOCH : 071/100 | Loss : 0.0424, training_accuracy: 0.6800
EPOCH : 071/100 | Loss : 0.0619, training_accuracy: 0.6500
EPOCH : 072/100 | Loss : 0.0368, training_accuracy: 0.8000
EPOCH : 072/100 | Loss : 0.0567, training_accuracy: 0.7500
EPOCH : 072/100 | Loss : 0.0593, training_accuracy: 0.6667
EPOCH : 072/100 | Loss : 0.0974, training_accuracy: 0.6250
EPOCH : 072/100 | Loss : 0.0665, training_accuracy: 0.6200
EPOCH : 072/100 | Loss : 0.0687, training_accuracy: 0.6167
EPOCH : 073/100 | Loss : 0.0299, training_accuracy: 0.9000
EPOCH : 073/100 | Loss : 0.0586, training_accuracy: 0.85

EPOCH : 093/100 | Loss : 0.0168, training_accuracy: 0.8200
EPOCH : 093/100 | Loss : 0.0446, training_accuracy: 0.8000
EPOCH : 094/100 | Loss : 0.0540, training_accuracy: 0.6000
EPOCH : 094/100 | Loss : 0.0360, training_accuracy: 0.7000
EPOCH : 094/100 | Loss : 0.0888, training_accuracy: 0.6667
EPOCH : 094/100 | Loss : 0.0681, training_accuracy: 0.6750
EPOCH : 094/100 | Loss : 0.0393, training_accuracy: 0.7000
EPOCH : 094/100 | Loss : 0.0310, training_accuracy: 0.7000
EPOCH : 095/100 | Loss : 0.0391, training_accuracy: 0.8000
EPOCH : 095/100 | Loss : 0.0516, training_accuracy: 0.7000
EPOCH : 095/100 | Loss : 0.0224, training_accuracy: 0.7667
EPOCH : 095/100 | Loss : 0.0430, training_accuracy: 0.7500
EPOCH : 095/100 | Loss : 0.0399, training_accuracy: 0.7600
EPOCH : 095/100 | Loss : 0.0201, training_accuracy: 0.7833
EPOCH : 096/100 | Loss : 0.0071, training_accuracy: 1.0000
EPOCH : 096/100 | Loss : 0.0101, training_accuracy: 1.0000
EPOCH : 096/100 | Loss : 0.0222, training_accuracy: 0.93

In [28]:
params = model.parameters()
optimizer = torch.optim.SGD(params, lr= 0.01)
#optimizer =torch.optim.Adagrad(params, lr=0.01, lr_decay=0)
criterion = nn.BCELoss(reduction='mean')

In [None]:
EPOCH = 100
writer = SummaryWriter()
test_acc = []
for ep in range(1, EPOCH+1):
    test_count = 0
    accuracy = 0 
    acc = 0
    it_count = 0
    model.train()
    for it, (img, target) in enumerate(train_loader):
        
        img = img.to(Device)
        target = target.to(Device)

        optimizer.zero_grad()
        pred = model(img)
        loss = criterion(pred, target)
        loss.backward()
        optimizer.step()
        average_loss =  loss.item() 
        
        if it%100==0:
            pred = get_prediction(pred)
            acc += get_accuracy(pred, target)
            print('EPOCH : {:03d}/{:3d} | Loss : {:.4f}, training_accuracy: {:.4f}'.format(ep, EPOCH, average_loss, acc/(it_count+1)))
            #writer.add_scalars('training_loss', average_loss , it_count)
            it_count +=1
    
    #for it, (img, target)  in enumerate(val_loader):
    #    img = img.to(Device)
    #    target = target.to(Device)
    #    pred = model(img)
    #    pred = get_prediction(pred)
    #    accuracy += get_accuracy(pred, target)
    #    test_count += 1
    #accuracy /= test_count
    #test_acc.append(accuracy)
    #print('Test accuracy : {:.4f}'.format(accuracy))
    if (ep+1) == EPOCH:
        torch.save(model.state_dict(), './weights/Baseline_{}_{}.pt'.format(exper_name, 199))

EPOCH : 001/100 | Loss : 0.0170, training_accuracy: 0.9000
EPOCH : 001/100 | Loss : 0.0130, training_accuracy: 0.9000
EPOCH : 001/100 | Loss : 0.0225, training_accuracy: 0.8667
EPOCH : 001/100 | Loss : 0.0052, training_accuracy: 0.9000
EPOCH : 001/100 | Loss : 0.0112, training_accuracy: 0.9000
EPOCH : 001/100 | Loss : 0.0143, training_accuracy: 0.8833
EPOCH : 002/100 | Loss : 0.0379, training_accuracy: 0.9000
EPOCH : 002/100 | Loss : 0.0253, training_accuracy: 0.8500
EPOCH : 002/100 | Loss : 0.0252, training_accuracy: 0.8667
EPOCH : 002/100 | Loss : 0.0217, training_accuracy: 0.8500
EPOCH : 002/100 | Loss : 0.0136, training_accuracy: 0.8400
EPOCH : 002/100 | Loss : 0.0254, training_accuracy: 0.8167
EPOCH : 003/100 | Loss : 0.0090, training_accuracy: 0.9000
EPOCH : 003/100 | Loss : 0.0111, training_accuracy: 0.9500
EPOCH : 003/100 | Loss : 0.0175, training_accuracy: 0.9333
EPOCH : 003/100 | Loss : 0.0259, training_accuracy: 0.9000
