In [1]:
%%capture
!pip install neptune-client psutil
!git clone https://github.com/Cho-D-YoungRae/URP_PD.git
%cd URP_PD

In [2]:
import dataset
import object_detection
from utils import *
import eval

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.utils import make_grid
import torchvision.transforms.functional as TF
import torch.nn.functional as F

import os
import json
import numpy as np
import argparse
from tqdm.auto import tqdm
import time
from datetime import datetime

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using <{device}> device")

loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
Using <cuda> device


In [3]:
# ====== constants ======#
label_map = {'background': 0, 'person': 1}
rev_label_map = {v: k for k, v in label_map.items()} 

## setting

In [4]:
from torch.backends import cudnn
import random

cudnn.benchmark = True

# ====== Random Seed Initialization ====== #
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = False

parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.baselineID = 32

# ====== Dataset ====== #
args.ch_option = {'num_ch': 1,
                  'img_type': 'lwir',
                  'one_ch_option': 'mean'}
args.val_split = 0.1

# ====== Model ====== #
args.base_model = 'VGG16bnBase'
args.n_classes = len(label_map)
args.is_sds = True
args.usages_seg_feats = [True, True, False, False, False, False]


# ====== Optimizer & Training ====== #
args.optim = 'Adam'
args.lr = 5e-4
args.twice_b_lr = True
args.weight_decay = 5e-4

args.epochs = 150
args.train_batch_size = 32
args.test_batch_size = 64

args.decay_lr_at = [int(args.epochs/6)*4,
                    int(args.epochs/6)*5]
args.decay_lr_to = 0.1

## neptune init

In [5]:
import neptune.new as neptune

api_token = 
run = neptune.init(
    project='jodyr/urp',
    source_files=['*.py'],
    api_token=api_token,
    # run='PD-25',
    )

run["parameters"] = vars(args)


https://app.neptune.ai/jodyr/urp/e/PD-84
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


## train

In [6]:
from neptune.new.types import File

def train(train_loader, model, criterion, optimizer, run=None):
    """
    One epoch's training.

    :param train_loader: DataLoader for training data
    :param model: model
    :param criterion: MultiBox loss
    :param optimizer: optimizer
    :param epoch: epoch number
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model.train()  # training mode enables dropout

    batch_time = AverageMeter()  # forward prop. + back prop. time
    data_time = AverageMeter()  # data loading time
    losses = AverageMeter()  # loss

    print_freq = len(train_loader) // 4

    start = time.time()

    # Batches
    for i, (images, bboxes, category_ids, is_crowds, seg_labels) in enumerate(train_loader):
        data_time.update(time.time() - start)

        images = images.to(device)  # (batch_size (N), 3, 300, 300)
        bboxes = [b.to(device) for b in bboxes]
        category_ids = [c.to(device) for c in category_ids]
        seg_labels = seg_labels.to(device)

        # Forward prop.
        predicted_locs, predicted_scores, pred_segs = model(images)  # (N, 8732, 4), (N, 8732, n_classes)

        # Loss
        loss = criterion(predicted_locs, predicted_scores, bboxes, category_ids,
                         pred_segs, seg_labels)  # scalar

        # Backward prop.
        optimizer.zero_grad()
        loss.backward()

        # Update model
        optimizer.step()

        losses.update(loss.item(), images.size(0))
        batch_time.update(time.time() - start)

        start = time.time()
        
        if i == 0 and run:
            ft_names = ['conv4_3', 'conv7', 'conv8_2', 'conv9_2', 'conv10_2', 'conv11_2']
            for ft_name, pred_seg in zip(ft_names, [pred_seg.detach() 
                                                    for pred_seg in pred_segs]):
                pred_seg = F.softmax(pred_seg, dim=1)
                pred_seg_grid = make_grid(pred_seg)
                pred_seg_grid = TF.to_pil_image(pred_seg_grid[1])
                run[f'train/pred_seg_{ft_name}'].log(File.as_image(pred_seg_grid))

            img_grid = make_grid(images)
            img_grid = TF.to_pil_image(img_grid)
            run['train/input'].log(File.as_image(img_grid))
            seg_gt_grid = make_grid(seg_labels)
            seg_gt_grid = TF.to_pil_image(seg_gt_grid)
            run['train/seg_gt'].log(File.as_image(seg_gt_grid))

        # Print status
        if i % print_freq == 0:
            print(f'[{i}/{len(train_loader)}]\t'
                  f'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  f'Data Time {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  f'Loss {losses.val:.4f} ({losses.avg:.4f})\t')
    del predicted_locs, predicted_scores, images, bboxes, category_ids

    train_loss = losses.avg
    return train_loss    

## validation

In [7]:
# def validation(val_loader, model, criterion):
#     model.eval()

#     num_batches = len(val_loader)
#     losses = AverageMeter()
#     with torch.no_grad():
#         for i, (images, bboxes, category_ids, _) in enumerate(val_loader):
#             images = images.to(device)
#             bboxes = [b.to(device) for b in bboxes]
#             category_ids = [l.to(device) for l in category_ids]

#             predicted_locs, predicted_scores = model(images)
#             loss = criterion(predicted_locs, predicted_scores, bboxes, category_ids).item()

#             losses.update(loss, images.size(0))

#     val_loss = losses.avg
#     return val_loss

## checkpoint

In [8]:
checkpoint = os.path.join('/content/drive/MyDrive/2021.summer_URP/PD/checkpoint',
                          str(args.baselineID)+'.pth.tar')
checkpoint = checkpoint if os.path.isfile(checkpoint) else None
print(f"checkpoint: {checkpoint}")

checkpoint: None


In [9]:
if checkpoint is None:
    start_epoch = 1
    lr = args.lr
    model = object_detection.SDSSSD300(n_classes=args.n_classes,
                                       base=args.base_model,
                                       ch_option=args.ch_option,
                                       usages_seg_feats=args.usages_seg_feats)
    if args.twice_b_lr:
        biases = list()
        not_biases = list()
        for param_name, param in model.named_parameters():
            if param.requires_grad:
                if param_name.endswith('.bias'):
                    biases.append(param)
                else:
                    not_biases.append(param)
        optimizer = getattr(torch.optim, args.optim)(params=[{'params': biases, 'lr': 2 * lr}, 
                                                            {'params': not_biases}],
                                                     lr=lr,
                                                     weight_decay=args.weight_decay)
    else:
        optimizer = getattr(torch.optim, args.optim)(params=model.parameters(),
                                                     lr=lr,
                                                     weight_decay=args.weight_decay)

else:
    checkpoint = torch.load(checkpoint)
    start_epoch = checkpoint['epoch'] + 1
    print('\nLoaded checkpoint from epoch %d.\n' % start_epoch)
    model = checkpoint['model']
    optimizer = checkpoint['optimizer']


model = model.to(device)
criterion = object_detection.SDSMultiBoxLoss(priors_cxcy=model.priors_cxcy,
                                             usages_seg_feats=model.usages_seg_feats).to(device)

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth


HBox(children=(FloatProgress(value=0.0, max=553507836.0), HTML(value='')))



Loaded base model.





## dataset init

In [10]:
workers = 4
train_dataset = dataset.KaistPDDataset(ch_option=args.ch_option,
                                       is_sds=args.is_sds)
train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size=args.train_batch_size, 
                                           shuffle=True,
                                           collate_fn=dataset.sds_collate_fn,
                                           num_workers=workers,
                                           pin_memory=True)

  cpuset_checked))


## experiment

In [11]:
checkpoint_dir = '/content/drive/MyDrive/2021.summer_URP/PD/checkpoint'
checkpoint_path = os.path.join(checkpoint_dir,
                               str(args.baselineID)+'.pth.tar')

In [None]:
epochs = args.epochs
decay_lr_at = args.decay_lr_at
save_freq = 5
eval_freq = 10

# Epochs
for epoch in range(start_epoch, epochs+1):
    print(f"# ====== Epoch {epoch} ====== # {datetime.now()}")
    # Decay learning rate at particular epochs
    if epoch in decay_lr_at:
        adjust_learning_rate(optimizer, args.decay_lr_to)

    # One epoch's training
    train_loss = train(train_loader=train_loader, model=model,
                       criterion=criterion, optimizer=optimizer, run=run)
    run['train/loss'].log(train_loss)
    if epoch % save_freq == 0:
        save_checkpoint(epoch, model, optimizer, checkpoint_path)

    if epoch % eval_freq == 0:
        eval.evaluate(model, ch_option=args.ch_option)



  cpuset_checked))
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[0/273]	Batch Time 108.457 (108.457)	Data Time 102.677 (102.677)	Loss 15.7007 (15.7007)	
[68/273]	Batch Time 23.834 (8.779)	Data Time 23.266 (8.158)	Loss 4.8131 (6.0035)	
[136/273]	Batch Time 25.924 (8.072)	Data Time 25.366 (7.489)	Loss 4.5512 (5.3851)	
[204/273]	Batch Time 25.964 (7.815)	Data Time 25.399 (7.245)	Loss 4.5349 (5.1547)	
[272/273]	Batch Time 23.483 (7.689)	Data Time 18.878 (7.111)	Loss 4.4515 (4.9932)	
[0/273]	Batch Time 5.046 (5.046)	Data Time 4.399 (4.399)	Loss 4.5486 (4.5486)	
[68/273]	Batch Time 2.411 (1.107)	Data Time 1.778 (0.482)	Loss 4.5992 (4.4062)	
[136/273]	Batch Time 2.208 (1.073)	Data Time 1.566 (0.448)	Loss 4.1661 (4.3554)	
[204/273]	Batch Time 1.756 (1.062)	Data Time 1.150 (0.437)	Loss 4.3470 (4.3256)	
[272/273]	Batch Time 0.468 (1.046)	Data Time 0.000 (0.422)	Loss 4.1152 (4.2819)	
[0/273]	Batch Time 4.742 (4.742)	Data Time 4.097 (4.097)	Loss 4.1657 (4.1657)	
[68/273]	Batch Time 0.974 (1.082)	Data Time 0.354 (0.458)	Loss 4.1662 (4.1089)	
[136/273]	Batch Tim

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

  image_boxes.append(class_decoded_locs[1 - suppress])
  image_scores.append(class_scores[1 - suppress])



Loading and preparing results...
DONE (t=0.12s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.29s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Miss Rate  (MR) @ Reasonable         [ IoU=0.50      | height=[55:10000000000] | visibility=[none+partial_occ] ] = 100.00%
Recall: 0.0


  cpuset_checked))


[0/273]	Batch Time 4.442 (4.442)	Data Time 3.785 (3.785)	Loss 3.0557 (3.0557)	
[68/273]	Batch Time 1.771 (1.097)	Data Time 1.177 (0.472)	Loss 2.7013 (2.9743)	
[136/273]	Batch Time 2.242 (1.073)	Data Time 1.619 (0.448)	Loss 3.1057 (2.9827)	
[204/273]	Batch Time 1.541 (1.072)	Data Time 0.946 (0.446)	Loss 3.0558 (2.9715)	
[272/273]	Batch Time 0.469 (1.050)	Data Time 0.000 (0.426)	Loss 2.7752 (2.9718)	
[0/273]	Batch Time 4.359 (4.359)	Data Time 3.725 (3.725)	Loss 2.8709 (2.8709)	
[68/273]	Batch Time 2.483 (1.094)	Data Time 1.832 (0.466)	Loss 3.0350 (2.9112)	
[136/273]	Batch Time 1.434 (1.076)	Data Time 0.828 (0.448)	Loss 2.8344 (2.9247)	
[204/273]	Batch Time 0.629 (1.060)	Data Time 0.001 (0.432)	Loss 2.7008 (2.9219)	
[272/273]	Batch Time 0.470 (1.048)	Data Time 0.000 (0.422)	Loss 2.8844 (2.9204)	
[0/273]	Batch Time 4.742 (4.742)	Data Time 4.050 (4.050)	Loss 2.7555 (2.7555)	
[68/273]	Batch Time 1.620 (1.091)	Data Time 0.944 (0.466)	Loss 2.7818 (2.9091)	
[136/273]	Batch Time 1.697 (1.073)	Da

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.08s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Miss Rate  (MR) @ Reasonable         [ IoU=0.50      | height=[55:10000000000] | visibility=[none+partial_occ] ] = -100.00%
[Error] cannot evaluate by cocoEval. 


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
[Error] Exception type : IndexError 
[Error] Exception message : list index out of range 
[Error] (Stack trace) File : /content/URP_PD/Evaluation_official.py , Line : 26, Func.Name : evaluate_coco, Message : cocoEval.draw_figure(ax_test, rstFile.replace('json', 'jpg'))
[Error] (Stack trace) File : /content/URP_PD/torchcv/evaluations/eval_MR_multisetup.py , Line : 492, Func.Name : draw_figure, Message : ax.plot( xx[0], yy[0], linewidth=3, label='{:.2f}%, {:s}'.format(mean_s, os.path.basename(filename)) )


[0/273]	Batch Time 4.650 (4.650)	Data Time 4.024 (4.024)	Loss 2.7741 (2.7741)	
[68/273]	Batch Time 1.936 (1.094)	Data Time 1.327 (0.468)	Loss 2.9029 (2.8306)	
[136/273]	Batch Time 0.618 (1.074)	Data Time 0.001 (0.447)	Loss 2.4585 (2.8059)	
[204/273]	Batch Time 0.617 (1.056)	Data Time 0.004 (0.428)	Loss 3.1595 (2.8032)	
[272/273]	Batch Time 0.470 (1.048)	Data Time 0.000 (0.423)	Loss 3.1519 (2.7948)	
[0/273]	Batch Time 4.250 (4.250)	Data Time 3.601 (3.601)	Loss 2.6252 (2.6252)	
[68/273]	Batch Time 1.018 (1.071)	Data Time 0.396 (0.445)	Loss 2.5836 (2.7655)	
[136/273]	Batch Time 1.835 (1.063)	Data Time 1.221 (0.436)	Loss 2.9002 (2.7714)	
