In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import sys
import numpy as np
import argparse
import time

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim

import torchvision.transforms as T
from torch.utils.data import sampler
from torch.utils.data import DataLoader



In [None]:
def parse_args(argv):
    parser = argparse.ArgumentParser()
    
    parser.add_argument('--dataset_dir', default='/home/lzhang/pascal/VOC2012/', type=str, help='training dataset dir')
    parser.add_argument('--dataset_split', default='trainval', type=str, help='training dataset split')
    parser.add_argument('--net', default='vgg16', type=str, help='vgg16, res101')
    parser.add_argument('--min_size', default=600, type=int, help='min image resize')
    parser.add_argument('--max_size', default=1000, type=int, help='max image resize')
    parser.add_argument('--num_workers', default=8, type=int, help='number of worker to load data')
    
    parser.add_argument('--rpn_sigma', default=3, type=int, help='rpn sigma for l1_smooth_loss')
    parser.add_argument('--roi_sigma', default=1, type=int, help='roi sigma for l1_smooth_loss')
    
    parser.add_argument('--lr', default=0.001, type=float, help='starting learning rate')
    parser.add_argument('--lr_decay_step', default=5, type=int, help='epoch to do learning rate decay')
    parser.add_argument('--lr_decay_gamma', default=0.1, type=float, help='learning rate decay ratio')
    parser.add_argument('--weight_decay', default=0.0005, type=float, help='weight decay ratio')
    parser.add_argument('--epochs', default=20, type=int, help='number of epochs to train')
    parser.add_argument('--optimizer', default="sgd", type=str, help='training optimizer')
    parser.add_argument('--batch_size', default=1, type=int, help='batch_size')    
    parser.add_argument('--cuda', action='store_true', help='whether use CUDA')
    
    parser.add_argument('--visdom_env', default='faster_rcnn', type=str, help='visdom env')
    parser.add_argument('--visdom_port', default='8097', type=str, help='visdom port')
    
    parser.add_argument('--plot_every', default=100, type=int, help='number of iterations to plot')
    parser.add_argument('--save_ckpt_every', default=10000, type=int, help='number of iterations to save checkpoint.')
    parser.add_argument('--save_dir', default="/home/lzhang/pytorch/models", type=str, help='directory to save models')

    parser.add_argument('--debug', action='store_true', help='if print debug msg')   

    return parser.parse_args()


In [1]:
from model.faster_rcnn_vgg16 import FasterRCNNVGG16
from model.feature_extraction_network import FeatureExtractionNetwork
from model.region_proposal_network import RegionProposalNetwork
from model.anchor_generator import AnchorGenerator, AnchorTargetGenerator
from model.proposal_generator import ProposalGenerator
from data.dataset import VOCBboxDataSet

import cv2
from data import data_util
import torch as t
import numpy as np
import xml.etree.ElementTree as ET

%load_ext autoreload
%autoreload 2

img_file = '/home/lzhang/tmp/000001.jpg'
anno_file = '/home/lzhang/tmp/000001.xml'

def get_bbox(anno):
    bbox = []
    for obj in anno.findall('object'):
        bndbox_anno = obj.find('bndbox')
        bbox.append([int(bndbox_anno.find(tag).text) - 1 for tag in ('xmin', 'ymin', 'xmax', 'ymax')])

    bbox = np.stack(bbox).astype(np.float32)
    return bbox

def train(args):
    img = cv2.imread(img_file)
    print(img.shape)
    img, scale = data_util.resize_img(img)
    print(img.shape)
    height, width = img.shape[0], img.shape[1]
    img_size = (height, width)
    
#     vgg16 = FeatureExtractionNetwork()
#     extractor = vgg16.features
#     features = extractor(t.from_numpy(np.expand_dims(img, axis=0).transpose((0, 3, 1, 2))).type(t.float))
#     print(features.shape)
    
    anno = ET.parse(anno_file)
    bbox = get_bbox(anno)
    
    anchor_generator = AnchorGenerator()
    anchors = anchor_generator(img_size)
    
    anchor_reg_target_generator = AnchorTargetGenerator()
    anchor_reg_target_generator(img_size, anchors, bbox)
    
    faster_rcnn_vgg16 = FasterRCNNVGG16()
    faster_rcnn_vgg16(img, bbox, scale)
    
#     proposal_generator = ProposalGenerator()
#     proposal_generator()

train(None)

(500, 353, 3)
(850, 600, 3)
torch.Size([1, 18, 53, 37])
torch.Size([1, 36, 53, 37])


In [None]:
from model.utils import bbox2reg, reg2bbox, bbox_transform, bbox_transform_inv
import numpy as np
%load_ext autoreload
%autoreload 2

src_bbox = np.array([1.0, 2.0, 3.0, 4.0]).reshape(-1, 4)
dst_bbox = np.array([5.4, 6.24, 12.1, 13.1]).reshape(-1, 4)

reg = bbox2reg(src_bbox, dst_bbox)
dst_bbox2 = reg2bbox(src_bbox, reg)
# print(reg)
print(dst_bbox2)

In [None]:
import numpy as np
import time
from model.utils import non_maximum_suppression
%load_ext autoreload
%autoreload 2

np.random.seed( 1 )   # keep fixed
num_rois = 6000
minxy = np.random.randint(50,145,size=(num_rois ,2))
maxxy = np.random.randint(150,200,size=(num_rois ,2))

score = 0.8*np.random.random_sample((num_rois ,1))+0.2
order = score.ravel().argsort()[::-1]

boxes_new = np.concatenate((minxy, maxxy), axis=1).astype(np.float32)
boxes_new = boxes_new[order, :]

keep = non_maximum_suppression(boxes_new, thresh=0.7)
print(len(keep))

In [None]:
import numpy as np
import time
from model.utils import non_maximum_suppression, py_cpu_nms
%load_ext autoreload
%autoreload 2


np.random.seed( 1 )   # keep fixed
num_rois = 6000
minxy = np.random.randint(50,145,size=(num_rois ,2))
maxxy = np.random.randint(150,200,size=(num_rois ,2))
score = 0.8*np.random.random_sample((num_rois ,1))+0.2

boxes_new = np.concatenate((minxy,maxxy,score), axis=1).astype(np.float32)

def nms_test_time(boxes_new):

    thresh = [0.7,0.8,0.9]
    T = 1
    for i in range(len(thresh)):
        since = time.time()
        for t in range(T):
            keep = py_cpu_nms(boxes_new, thresh=thresh[i])     # for cpu
            print(len(keep))
        print("thresh={:.1f}, time wastes:{:.4f}".format(thresh[i], (time.time()-since)/T))
    return keep

nms_test_time(boxes_new)


In [1]:
from argparse import Namespace
args = Namespace(batch_size=1, cuda=False, dataset_dir='/home/lzhang/pascal2/VOC2007/', dataset_split='train_test', training=True, debug=False, epochs=20, lr=0.001, lr_decay_gamma=0.1, lr_decay_step=5, max_size=1000, min_size=600, net='vgg16', num_workers=8, optimizer='sgd', plot_every=100, random_hflip_ratio=0.5, return_difficult=False, roi_sigma=1, rpn_sigma=3, save_ckpt_every=10000, save_dir='/home/lzhang/pytorch/models', use_data_aug=False, use_difficult=False, visdom_env='faster_rcnn', visdom_port='8097', weight_decay=0.0005)

from data.dataset import VOCBboxDataSet
from model.faster_rcnn_vgg16 import FasterRCNNVGG16

%load_ext autoreload
%autoreload 2

dataset = VOCBboxDataSet(args)
data = dataset[0]
print(data.anchors.shape)

net = FasterRCNNVGG16()
res = net(data)



(17649, 4)
(17649, 4)
(14690, 4)
(12000, 4)
4230
(2000, 4)
torch.Size([2000, 21])
torch.Size([2000, 84])


In [13]:
import numpy as np

a = np.asarray([1, 2, 3, 4, 5, 6]).reshape((2, 3))
b = np.where(a > 4)
print(a[b[1][0], b[1][1]])

6
