-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Luke.taek
committed
Jan 19, 2018
1 parent
18d2a50
commit 3b9012f
Showing
10 changed files
with
1,879 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
'''Encode object boxes and labels. | ||
Reference : | ||
https://github.com/kuangliu/pytorch-retinanet/blob/master/encoder.py | ||
''' | ||
|
||
|
||
import math | ||
import torch | ||
|
||
from utils import meshgrid, box_iou, box_nms, change_box_order | ||
|
||
|
||
class DataEncoder: | ||
def __init__(self): | ||
self.anchor_areas = [16*16., 32*32., 48.*48., 64*64., 128*128., 256*256.] # same with num. of feature maps used to predict | ||
self.aspect_ratios = [1/2., 1/1., 2/1.] | ||
self.scale_ratios = [1., pow(2,1/3.), pow(2,2/3.)] | ||
self.anchor_wh = self._get_anchor_wh() | ||
|
||
def _get_anchor_wh(self): | ||
'''Compute anchor width and height for each feature map. | ||
Returns: | ||
anchor_wh: (tensor) anchor wh, sized [#fm, #anchors_per_cell, 2]. | ||
''' | ||
anchor_wh = [] | ||
for s in self.anchor_areas: | ||
for ar in self.aspect_ratios: # w/h = ar | ||
h = math.sqrt(s/ar) | ||
w = ar * h | ||
for sr in self.scale_ratios: # scale | ||
anchor_h = h*sr | ||
anchor_w = w*sr | ||
anchor_wh.append([anchor_w, anchor_h]) | ||
num_fms = len(self.anchor_areas) | ||
return torch.Tensor(anchor_wh).view(num_fms, -1, 2) | ||
|
||
def _get_anchor_boxes(self, input_size): | ||
'''Compute anchor boxes for each feature map. | ||
Args: | ||
input_size: (tensor) model input size of (w,h). | ||
Returns: | ||
boxes: (list) anchor boxes for each feature map. Each of size [#anchors,4], | ||
where #anchors = fmw * fmh * #anchors_per_cell | ||
''' | ||
num_fms = len(self.anchor_areas) | ||
downsample_cnt = 3 | ||
# fm_sizes = [(input_size / pow(2., i + downsample_cnt)).ceil() for i in range(num_fms)] # p3 -> p7 feature map sizes | ||
|
||
fm_sizes = [] | ||
for i in range(num_fms): | ||
if i >= 4: | ||
fm_sizes.append((input_size / pow(2., 3 + downsample_cnt)) - (2. * (i-3))) | ||
else: | ||
fm_sizes.append(input_size / pow(2., i + downsample_cnt)) | ||
|
||
boxes = [] | ||
for i in range(num_fms): | ||
fm_size = fm_sizes[i] | ||
grid_size = input_size / fm_size | ||
fm_w, fm_h = int(fm_size[0]), int(fm_size[1]) | ||
xy = meshgrid(fm_w,fm_h) + 0.5 # [fm_h*fm_w, 2] | ||
xy = (xy*grid_size).view(fm_h,fm_w,1,2).expand(fm_h,fm_w,9,2) | ||
wh = self.anchor_wh[i].view(1,1,9,2).expand(fm_h,fm_w,9,2) | ||
box = torch.cat([xy,wh], 3) # [x,y,w,h] | ||
boxes.append(box.view(-1,4)) | ||
return torch.cat(boxes, 0) | ||
|
||
def encode(self, boxes, labels, input_size): | ||
'''Encode target bounding boxes and class labels. | ||
We obey the Faster RCNN box coder: | ||
tx = (x - anchor_x) / anchor_w | ||
ty = (y - anchor_y) / anchor_h | ||
tw = log(w / anchor_w) | ||
th = log(h / anchor_h) | ||
Args: | ||
boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. | ||
labels: (tensor) object class labels, sized [#obj,]. | ||
input_size: (int/tuple) model input size of (w,h). | ||
Returns: | ||
loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. | ||
cls_targets: (tensor) encoded class labels, sized [#anchors,]. | ||
''' | ||
input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ | ||
else torch.Tensor(input_size) | ||
anchor_boxes = self._get_anchor_boxes(input_size) | ||
boxes = change_box_order(boxes, 'xyxy2xywh') | ||
|
||
ious = box_iou(anchor_boxes, boxes, order='xywh') | ||
max_ious, max_ids = ious.max(1) | ||
boxes = boxes[max_ids] | ||
|
||
loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:] | ||
loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:]) | ||
loc_targets = torch.cat([loc_xy,loc_wh], 1) | ||
cls_targets = labels[max_ids] | ||
|
||
cls_targets[max_ious<0.5] = 0 | ||
ignore = (max_ious>0.4) & (max_ious<0.5) # ignore ious between [0.4,0.5] | ||
cls_targets[ignore] = -1 # for now just mark ignored to -1 | ||
return loc_targets, cls_targets | ||
|
||
def decode(self, loc_preds, cls_preds, input_size): | ||
'''Decode outputs back to bouding box locations and class labels. | ||
Args: | ||
loc_preds: (tensor) predicted locations, sized [#anchors, 4]. | ||
cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. | ||
input_size: (int/tuple) model input size of (w,h). | ||
Returns: | ||
boxes: (tensor) decode box locations, sized [#obj,4]. | ||
labels: (tensor) class labels for each box, sized [#obj,]. | ||
''' | ||
CLS_THRESH = 0.5 | ||
NMS_THRESH = 0.5 | ||
|
||
input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ | ||
else torch.Tensor(input_size) | ||
anchor_boxes = self._get_anchor_boxes(input_size) | ||
|
||
loc_xy = loc_preds[:,:2] | ||
loc_wh = loc_preds[:,2:] | ||
|
||
xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2] | ||
wh = loc_wh.exp() * anchor_boxes[:,2:] | ||
boxes = torch.cat([xy-wh/2, xy+wh/2], 1) # [#anchors,4] | ||
|
||
score, labels = cls_preds.sigmoid().max(1) # [#anchors,] | ||
ids = score > CLS_THRESH | ||
ids = ids.nonzero().squeeze() # [#obj,] | ||
|
||
if ids.dim() == 0: | ||
return [], [] | ||
|
||
keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) | ||
return boxes[ids][keep], labels[ids][keep] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
import torch | ||
|
||
from torch.autograd import Variable | ||
|
||
import torchvision.models as models | ||
|
||
class BottleneckA(nn.Module): | ||
def __init__(self, input_dims): | ||
super(BottleneckA, self).__init__() | ||
|
||
self.conv1 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0, bias=False) | ||
self.conv1_bn = nn.BatchNorm2d(128) | ||
self.conv2_dilation = nn.Conv2d(input_dims, 128, kernel_size=3, stride=1, padding=2, dilation=2, bias=False) | ||
self.conv2_bn = nn.BatchNorm2d(128) | ||
self.conv3 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0, bias=False) | ||
self.conv3_bn = nn.BatchNorm2d(128) | ||
self.conv4_1 = nn.Conv2d(input_dims, 128, kernel_size=(1,5), stride=1, padding=(0, 4), dilation=2, bias=False) | ||
self.conv4_1_bn = nn.BatchNorm2d(128) | ||
self.conv4_2 = nn.Conv2d(128, 128, kernel_size=(5,1), stride=1, padding=(4, 0), dilation=2, bias=False) | ||
self.conv4_2_bn = nn.BatchNorm2d(128) | ||
|
||
def forward(self, x): | ||
c1_out = F.relu(self.conv1_bn(self.conv1(x))) | ||
c2_out = F.relu(self.conv2_bn(self.conv2_dilation(x))) | ||
c3_out = F.relu(self.conv3_bn(self.conv3(F.max_pool2d(x, kernel_size=3, stride=1, padding=1)))) | ||
c4_out = F.relu(self.conv4_1_bn(self.conv4_1(x))) | ||
c4_out = F.relu(self.conv4_2_bn(self.conv4_2(c4_out))) | ||
|
||
output = [c1_out, c2_out, c3_out, c4_out] | ||
|
||
return torch.cat(output, dim=1) | ||
|
||
class BottleneckB(nn.Module): | ||
def __init__(self, input_dims): | ||
super(BottleneckB, self).__init__() | ||
|
||
self.conv1 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0) | ||
self.conv2_dilation = nn.Conv2d(input_dims, 128, kernel_size=3, stride=1, padding=2, dilation=2) | ||
self.conv3 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0) | ||
self.conv4_1 = nn.Conv2d(input_dims, 128, kernel_size=(1,5), stride=1, padding=(0, 2)) | ||
self.conv4_2 = nn.Conv2d(128, 128, kernel_size=(5,1), stride=1, padding=(2, 0)) | ||
|
||
def forward(self, x): | ||
c1_out = F.relu(self.conv1(x)) | ||
c2_out = F.relu(self.conv2_dilation(x)) | ||
c3_out = F.relu(self.conv3(F.max_pool2d(x, kernel_size=3, stride=1, padding=1))) | ||
c4_out = F.relu(self.conv4_1(x)) | ||
c4_out = F.relu(self.conv4_2(c4_out)) | ||
|
||
output = [c1_out, c2_out, c3_out, c4_out] | ||
|
||
return torch.cat(output, dim=1) | ||
|
||
class Inception(nn.Module): | ||
|
||
def __init__(self, blockA, BlockB): | ||
super(Inception, self).__init__() | ||
|
||
self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1) | ||
self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1) | ||
self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1) | ||
self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) | ||
self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1) | ||
self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) | ||
self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) | ||
self.conv3_3_inception = blockA(256) | ||
|
||
self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1) | ||
self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) | ||
self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) | ||
self.conv4_3_inception = blockA(512) | ||
|
||
self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) | ||
self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) | ||
self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) | ||
|
||
self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=6, dilation=6) | ||
self.fc7 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1) | ||
self.fc7_inception = blockA(1024) | ||
|
||
self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=3, stride=2, padding=1) | ||
self.conv6_1_inception = BlockB(256) | ||
self.conv7_1 = nn.Conv2d(512, 256, kernel_size=3, stride=2, padding=1) | ||
self.conv7_1_inception = BlockB(256) | ||
self.conv8_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0) | ||
self.conv8_1_inception = BlockB(256) | ||
self.conv9_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0) | ||
self.conv9_1_inception = BlockB(256) | ||
self.conv10_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0) | ||
self.conv10_1_inception = BlockB(256) | ||
|
||
def forward(self, x): | ||
out = F.relu(self.conv1_1(x)) | ||
out = F.relu(self.conv1_2(out)) | ||
out = F.max_pool2d(out, kernel_size=2, stride=2) | ||
out = F.relu(self.conv2_1(out)) | ||
out = F.relu(self.conv2_2(out)) | ||
out = F.max_pool2d(out, kernel_size=2, stride=2) | ||
out = F.relu(self.conv3_1(out)) | ||
out = F.relu(self.conv3_2(out)) | ||
out = F.relu(self.conv3_3(out)) | ||
conv3_inception = self.conv3_3_inception(out) | ||
out = F.max_pool2d(out, kernel_size=2, stride=2) | ||
out = F.relu(self.conv4_1(out)) | ||
out = F.relu(self.conv4_2(out)) | ||
out = F.relu(self.conv4_3(out)) | ||
conv4_inception = self.conv4_3_inception(out) | ||
out = F.max_pool2d(out, kernel_size=2, stride=2) | ||
out = F.relu(self.conv5_1(out)) | ||
out = F.relu(self.conv5_2(out)) | ||
out = F.relu(self.conv5_3(out)) | ||
out = F.max_pool2d(out, kernel_size=3, stride=1, padding=1) | ||
out = F.relu(self.fc6(out)) | ||
out = F.relu(self.fc7(out)) | ||
fc7_inception = self.fc7_inception(out) | ||
out = F.relu(self.conv6_1(out)) | ||
conv6_inception = self.conv6_1_inception(out) | ||
out = F.relu(self.conv7_1(conv6_inception)) | ||
conv7_inception = self.conv7_1_inception(out) | ||
out = F.relu(self.conv8_1(conv7_inception)) | ||
conv8_inception = self.conv8_1_inception(out) | ||
out = F.relu(self.conv9_1(conv8_inception)) | ||
conv9_inception = self.conv9_1_inception(out) | ||
out = F.relu(self.conv10_1(conv9_inception)) | ||
conv10_inception = self.conv10_1_inception(out) | ||
|
||
return conv3_inception, conv4_inception, fc7_inception, conv6_inception, conv7_inception, conv8_inception, conv9_inception, conv10_inception | ||
|
||
def load_inception(using_pretrained): | ||
net = Inception(blockA=BottleneckA, BlockB=BottleneckB) | ||
|
||
if using_pretrained is True: | ||
pre_trained_vgg16 = models.vgg16(pretrained=True) | ||
pre_trained_feature = pre_trained_vgg16.features | ||
|
||
net.conv1_1.weight = pre_trained_feature[0].weight | ||
net.conv1_1.bias = pre_trained_feature[0].bias | ||
net.conv1_2.weight = pre_trained_feature[2].weight | ||
net.conv1_2.bias = pre_trained_feature[2].bias | ||
|
||
net.conv2_1.weight = pre_trained_feature[5].weight | ||
net.conv2_1.bias = pre_trained_feature[5].bias | ||
net.conv2_2.weight = pre_trained_feature[7].weight | ||
net.conv2_2.bias = pre_trained_feature[7].bias | ||
|
||
net.conv3_1.weight = pre_trained_feature[10].weight | ||
net.conv3_1.bias = pre_trained_feature[10].bias | ||
net.conv3_2.weight = pre_trained_feature[12].weight | ||
net.conv3_2.bias = pre_trained_feature[12].bias | ||
net.conv3_3.weight = pre_trained_feature[14].weight | ||
net.conv3_3.bias = pre_trained_feature[14].bias | ||
|
||
net.conv4_1.weight = pre_trained_feature[17].weight | ||
net.conv4_1.bias = pre_trained_feature[17].bias | ||
net.conv4_2.weight = pre_trained_feature[19].weight | ||
net.conv4_2.bias = pre_trained_feature[19].bias | ||
net.conv4_3.weight = pre_trained_feature[21].weight | ||
net.conv4_3.bias = pre_trained_feature[21].bias | ||
|
||
net.conv5_1.weight = pre_trained_feature[24].weight | ||
net.conv5_1.bias = pre_trained_feature[24].bias | ||
net.conv5_2.weight = pre_trained_feature[26].weight | ||
net.conv5_2.bias = pre_trained_feature[26].bias | ||
net.conv5_3.weight = pre_trained_feature[28].weight | ||
net.conv5_3.bias = pre_trained_feature[28].bias | ||
|
||
return net | ||
|
||
|
||
def test(): | ||
net = load_inception(using_pretrained=True) | ||
|
||
num_parameters = 0. | ||
for param in net.parameters(): | ||
sizes = param.size() | ||
|
||
num_layer_param = 1. | ||
for size in sizes: | ||
num_layer_param *= size | ||
num_parameters += num_layer_param | ||
|
||
print(net) | ||
print("num. of parameters : " + str(num_parameters)) | ||
|
||
fms = net(Variable(torch.randn(1,3,512,512))) | ||
for fm in fms: | ||
print(fm.size()) | ||
|
||
# test() |
Oops, something went wrong.