In [2]:
from data import *
from utils.augmentations import SSDAugmentation
from layers.modules import MultiBoxLoss
from ssd import build_ssd
import os
import sys
import time
import torch
from torch.autograd import Variable
import torch.nn as nn

import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torch.utils.data as data
import numpy as np
import argparse

## Argument Declare

In [3]:
DATASET_ROOT = "/home/alex/subt_ssd_training/data/subt_artifact"
DATASET_NAME = "extinguisher"
cfg = subt
BASE_NET = "./weights/vgg16_reducedfc.pth"
DATA_DETECTION = SUBTDetection
BATCH_SIZE = 2
PRETRAINED_MODEL = None
PRETRAINED_ITER = 0
SAVE_MODEL_ITER = 500
START_ITER = 0
NUM_WORKERS = 4
CUDA = True
LR = 1e-3
MOMENTUM = 0.4
WEIGHT_DECAY = 5e-4
GAMMA = 0.1
VISDOM = False
SAVE_FOLDER = "./weights/" + DATASET_NAME + "/"
if not os.path.exists(SAVE_FOLDER):
    os.makedirs(SAVE_FOLDER)

In [4]:
if torch.cuda.is_available():
    if not CUDA:
        print("WTF are u wasting your CUDA device?")
    else:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')

## Define function

In [5]:
def str2bool(v):
    return v.lower() in ("yes", "true", "t", "1")

# Initial model weights & bias
def xavier(param):
    init.xavier_uniform(param)
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        m.bias.data.zero_()

# Adjust learning rate during training
def adjust_learning_rate(optimizer, gamma, step):
    """Sets the learning rate to the initial LR decayed by 10 at every
        specified step
    # Adapted from PyTorch Imagenet example:
    # https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    lr = LR * (gamma ** (step))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
        print("Change learning rate to: ", lr)

## Create dataset

In [6]:
dataset = DATA_DETECTION(root=DATASET_ROOT, image_sets=['train'],transform=SSDAugmentation(cfg['min_dim'], MEANS))

classes = dataset.target_transform.class_to_ind
print("Class to index: \n", classes)
classes = sorted(classes.items(), key=lambda kv: kv[1])
label = []
for i in classes:
    label.append(i[0])
label.append('None')
print(label)
true_label = ['extinguisher']

Class to index: 
 {'extinguisher': 0}
['extinguisher', 'None']


## Create model

In [7]:
# Delcare SSD Network
ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
net = ssd_net
if CUDA:
    net = torch.nn.DataParallel(ssd_net)
    cudnn.benchmark = True
    
if PRETRAINED_MODEL is not None: # Use SSD pretrained model
    print('Resuming training, loading {}...'.format(PRETRAINED_MODEL))
    ssd_net.load_weights(SAVE_FOLDER + PRETRAINED_MODEL)
else:
    print('Initializing weights...')
    vgg_weights = torch.load(BASE_NET) # load vgg pretrained model
    ssd_net.vgg.load_state_dict(vgg_weights)
    ssd_net.extras.apply(weights_init) # Initial SSD model weights & bias
    ssd_net.loc.apply(weights_init)
    ssd_net.conf.apply(weights_init)

optimizer = optim.SGD(net.parameters(), lr=LR, momentum=MOMENTUM,
                weight_decay=WEIGHT_DECAY)
criterion = MultiBoxLoss(BATCH_SIZE ,cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                False, CUDA)

  self.priors = Variable(self.priorbox.forward(), volatile=True)
  init.constant(self.weight,self.gamma)


Initializing weights...


  


## Training

### Initialize

In [None]:
net.train()
# loss counters
loc_loss = 0
conf_loss = 0
epoch = 0
print('Loading the dataset...')
epoch_size = len(dataset) // BATCH_SIZE
print('Training SSD on:', DATASET_NAME)

data_loader = data.DataLoader(dataset, BATCH_SIZE,
                                num_workers=NUM_WORKERS,
                                shuffle=True, collate_fn=detection_collate,
                                pin_memory=True)
batch_iterator = iter(data_loader)

Loading the dataset...
Training SSD on: extinguisher


### Start training

In [None]:
step_index = 0
for iteration in range(START_ITER, cfg['max_iter']):
    if iteration in cfg['lr_steps']:
        step_index += 1
        adjust_learning_rate(optimizer, GAMMA, step_index)
    
    # make sure data iter not out of range
    try:
        images, targets = next(batch_iterator)
        #print(targets[0][0][4].item(), label[int(targets[0][0][4].item())])
    except StopIteration:
        batch_iterator = iter(data_loader)
        images, targets = next(batch_iterator)
    if CUDA:
        images = Variable(images.cuda())
        targets = [Variable(ann.cuda(), volatile=True) for ann in targets]
    else:
        images = Variable(images)
        targets = [Variable(ann, volatile=True) for ann in targets]
    
    # Forward
    t0 = time.time()
    out = net(images)
    # backprop
    optimizer.zero_grad()
    loss_l, loss_c = criterion(out, targets)
    loss = loss_l + loss_c
    loss.backward()
    optimizer.step()
    t1 = time.time()
    loc_loss += loss_l.item()
    conf_loss += loss_c.item()
    
    if iteration % 10 == 0:
            print('timer: %.4f sec.' % (t1 - t0))
            print('iter ' + repr(PRETRAINED_ITER + iteration) + ' || Loss: %.4f ||' % (loss.item()), end='')
    
    if iteration != 0 and iteration % SAVE_MODEL_ITER == 0:
            print('Saving state, iter:', iteration)
            torch.save(ssd_net.state_dict(), SAVE_FOLDER + DATASET_NAME + "_" +
                       repr(PRETRAINED_ITER + iteration) + '.pth')
# Save final model
torch.save(ssd_net.state_dict(),
            SAVE_FOLDER + DATASET_NAME + '.pth')

  app.launch_new_instance()


timer: 7.1010 sec.
iter 0 || Loss: 22.1698 ||timer: 0.1709 sec.
iter 10 || Loss: 8.6346 ||timer: 0.2172 sec.
iter 20 || Loss: 9.9856 ||timer: 0.1749 sec.
iter 30 || Loss: 14.6121 ||timer: 0.2011 sec.
iter 40 || Loss: 6.5524 ||timer: 0.1714 sec.
iter 50 || Loss: 8.0913 ||timer: 0.1926 sec.
iter 60 || Loss: 11.2363 ||timer: 0.1716 sec.
iter 70 || Loss: 6.6154 ||timer: 0.1712 sec.
iter 80 || Loss: 5.1468 ||timer: 0.1711 sec.
iter 90 || Loss: 7.0809 ||timer: 0.1716 sec.
iter 100 || Loss: 5.9744 ||timer: 0.1712 sec.
iter 110 || Loss: 5.5306 ||timer: 0.1815 sec.
iter 120 || Loss: 7.1210 ||timer: 0.2661 sec.
iter 130 || Loss: 5.7871 ||timer: 0.1943 sec.
iter 140 || Loss: 6.9067 ||timer: 0.1783 sec.
iter 150 || Loss: 7.9394 ||timer: 0.1711 sec.
iter 160 || Loss: 6.2403 ||timer: 0.2049 sec.
iter 170 || Loss: 5.1982 ||timer: 0.1713 sec.
iter 180 || Loss: 6.5254 ||timer: 0.1829 sec.
iter 190 || Loss: 5.8798 ||timer: 0.2031 sec.
iter 200 || Loss: 6.0682 ||timer: 0.2117 sec.
iter 210 || Loss: 7.296

iter 3470 || Loss: 2.3747 ||timer: 0.1870 sec.
iter 3480 || Loss: 1.8512 ||timer: 0.1874 sec.
iter 3490 || Loss: 3.0116 ||timer: 0.1875 sec.
iter 3500 || Loss: 2.2305 ||Saving state, iter: 3500
timer: 0.1888 sec.
iter 3510 || Loss: 1.9722 ||timer: 0.1869 sec.
iter 3520 || Loss: 9.0684 ||timer: 0.1879 sec.
iter 3530 || Loss: 3.5611 ||timer: 0.1880 sec.
iter 3540 || Loss: 2.3229 ||timer: 0.1873 sec.
iter 3550 || Loss: 2.1862 ||timer: 0.1878 sec.
iter 3560 || Loss: 1.8860 ||timer: 0.1872 sec.
iter 3570 || Loss: 2.2478 ||timer: 0.1870 sec.
iter 3580 || Loss: 1.0655 ||timer: 0.1871 sec.
iter 3590 || Loss: 4.7687 ||timer: 0.1875 sec.
iter 3600 || Loss: 1.6343 ||timer: 0.1920 sec.
iter 3610 || Loss: 2.4050 ||timer: 0.1870 sec.
iter 3620 || Loss: 4.0056 ||timer: 0.1875 sec.
iter 3630 || Loss: 3.8140 ||timer: 0.1875 sec.
iter 3640 || Loss: 1.4146 ||timer: 0.1873 sec.
iter 3650 || Loss: 2.1448 ||timer: 0.1869 sec.
iter 3660 || Loss: 5.8601 ||timer: 0.1881 sec.
iter 3670 || Loss: 1.0415 ||timer: 

iter 6910 || Loss: 1.8356 ||timer: 0.1873 sec.
iter 6920 || Loss: 1.2633 ||timer: 0.1869 sec.
iter 6930 || Loss: 1.0502 ||timer: 0.1868 sec.
iter 6940 || Loss: 0.9831 ||timer: 0.1871 sec.
iter 6950 || Loss: 1.9249 ||timer: 0.1868 sec.
iter 6960 || Loss: 1.6526 ||timer: 0.1877 sec.
iter 6970 || Loss: 1.6370 ||timer: 0.1866 sec.
iter 6980 || Loss: 2.4310 ||timer: 0.1943 sec.
iter 6990 || Loss: 0.9448 ||timer: 0.1887 sec.
iter 7000 || Loss: 4.3764 ||Saving state, iter: 7000
timer: 0.1881 sec.
iter 7010 || Loss: 2.4588 ||timer: 0.1871 sec.
iter 7020 || Loss: 1.9488 ||timer: 0.1870 sec.
iter 7030 || Loss: 2.0780 ||timer: 0.1880 sec.
iter 7040 || Loss: 1.3431 ||timer: 0.1873 sec.
iter 7050 || Loss: 1.1761 ||timer: 0.1873 sec.
iter 7060 || Loss: 1.4263 ||timer: 0.1870 sec.
iter 7070 || Loss: 0.9773 ||timer: 0.1878 sec.
iter 7080 || Loss: 1.7968 ||timer: 0.1898 sec.
iter 7090 || Loss: 2.6382 ||timer: 0.1871 sec.
iter 7100 || Loss: 1.3854 ||timer: 0.1876 sec.
iter 7110 || Loss: 0.9090 ||timer: 

iter 10340 || Loss: 1.7515 ||timer: 0.1870 sec.
iter 10350 || Loss: 0.5450 ||timer: 0.1872 sec.
iter 10360 || Loss: 0.8801 ||timer: 0.1877 sec.
iter 10370 || Loss: 2.8495 ||timer: 0.1876 sec.
iter 10380 || Loss: 1.2419 ||timer: 0.1917 sec.
iter 10390 || Loss: 1.2020 ||timer: 0.1893 sec.
iter 10400 || Loss: 2.2134 ||timer: 0.1872 sec.
iter 10410 || Loss: 1.4363 ||timer: 0.1875 sec.
iter 10420 || Loss: 1.0858 ||timer: 0.1880 sec.
iter 10430 || Loss: 2.9614 ||timer: 0.1869 sec.
iter 10440 || Loss: 1.6431 ||timer: 0.1871 sec.
iter 10450 || Loss: 1.0065 ||timer: 0.1872 sec.
iter 10460 || Loss: 0.7799 ||timer: 0.1871 sec.
iter 10470 || Loss: 1.4182 ||timer: 0.1876 sec.
iter 10480 || Loss: 1.3763 ||timer: 0.1869 sec.
iter 10490 || Loss: 1.0371 ||timer: 0.1876 sec.
iter 10500 || Loss: 3.0394 ||Saving state, iter: 10500
timer: 0.1872 sec.
iter 10510 || Loss: 1.7076 ||timer: 0.1873 sec.
iter 10520 || Loss: 0.8125 ||timer: 0.1877 sec.
iter 10530 || Loss: 0.5166 ||timer: 0.1878 sec.
iter 10540 || 