Skip to content

Commit

Permalink
init commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Luke.taek committed Jan 19, 2018
1 parent 18d2a50 commit 3b9012f
Show file tree
Hide file tree
Showing 10 changed files with 1,879 additions and 0 deletions.
Binary file added class_label_map.xlsx
Binary file not shown.
540 changes: 540 additions & 0 deletions datagen.py

Large diffs are not rendered by default.

143 changes: 143 additions & 0 deletions encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
'''Encode object boxes and labels.
Reference :
https://github.com/kuangliu/pytorch-retinanet/blob/master/encoder.py
'''


import math
import torch

from utils import meshgrid, box_iou, box_nms, change_box_order


class DataEncoder:
def __init__(self):
self.anchor_areas = [16*16., 32*32., 48.*48., 64*64., 128*128., 256*256.] # same with num. of feature maps used to predict
self.aspect_ratios = [1/2., 1/1., 2/1.]
self.scale_ratios = [1., pow(2,1/3.), pow(2,2/3.)]
self.anchor_wh = self._get_anchor_wh()

def _get_anchor_wh(self):
'''Compute anchor width and height for each feature map.
Returns:
anchor_wh: (tensor) anchor wh, sized [#fm, #anchors_per_cell, 2].
'''
anchor_wh = []
for s in self.anchor_areas:
for ar in self.aspect_ratios: # w/h = ar
h = math.sqrt(s/ar)
w = ar * h
for sr in self.scale_ratios: # scale
anchor_h = h*sr
anchor_w = w*sr
anchor_wh.append([anchor_w, anchor_h])
num_fms = len(self.anchor_areas)
return torch.Tensor(anchor_wh).view(num_fms, -1, 2)

def _get_anchor_boxes(self, input_size):
'''Compute anchor boxes for each feature map.
Args:
input_size: (tensor) model input size of (w,h).
Returns:
boxes: (list) anchor boxes for each feature map. Each of size [#anchors,4],
where #anchors = fmw * fmh * #anchors_per_cell
'''
num_fms = len(self.anchor_areas)
downsample_cnt = 3
# fm_sizes = [(input_size / pow(2., i + downsample_cnt)).ceil() for i in range(num_fms)] # p3 -> p7 feature map sizes

fm_sizes = []
for i in range(num_fms):
if i >= 4:
fm_sizes.append((input_size / pow(2., 3 + downsample_cnt)) - (2. * (i-3)))
else:
fm_sizes.append(input_size / pow(2., i + downsample_cnt))

boxes = []
for i in range(num_fms):
fm_size = fm_sizes[i]
grid_size = input_size / fm_size
fm_w, fm_h = int(fm_size[0]), int(fm_size[1])
xy = meshgrid(fm_w,fm_h) + 0.5 # [fm_h*fm_w, 2]
xy = (xy*grid_size).view(fm_h,fm_w,1,2).expand(fm_h,fm_w,9,2)
wh = self.anchor_wh[i].view(1,1,9,2).expand(fm_h,fm_w,9,2)
box = torch.cat([xy,wh], 3) # [x,y,w,h]
boxes.append(box.view(-1,4))
return torch.cat(boxes, 0)

def encode(self, boxes, labels, input_size):
'''Encode target bounding boxes and class labels.
We obey the Faster RCNN box coder:
tx = (x - anchor_x) / anchor_w
ty = (y - anchor_y) / anchor_h
tw = log(w / anchor_w)
th = log(h / anchor_h)
Args:
boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
labels: (tensor) object class labels, sized [#obj,].
input_size: (int/tuple) model input size of (w,h).
Returns:
loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
cls_targets: (tensor) encoded class labels, sized [#anchors,].
'''
input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
else torch.Tensor(input_size)
anchor_boxes = self._get_anchor_boxes(input_size)
boxes = change_box_order(boxes, 'xyxy2xywh')

ious = box_iou(anchor_boxes, boxes, order='xywh')
max_ious, max_ids = ious.max(1)
boxes = boxes[max_ids]

loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:]
loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:])
loc_targets = torch.cat([loc_xy,loc_wh], 1)
cls_targets = labels[max_ids]

cls_targets[max_ious<0.5] = 0
ignore = (max_ious>0.4) & (max_ious<0.5) # ignore ious between [0.4,0.5]
cls_targets[ignore] = -1 # for now just mark ignored to -1
return loc_targets, cls_targets

def decode(self, loc_preds, cls_preds, input_size):
'''Decode outputs back to bouding box locations and class labels.
Args:
loc_preds: (tensor) predicted locations, sized [#anchors, 4].
cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
input_size: (int/tuple) model input size of (w,h).
Returns:
boxes: (tensor) decode box locations, sized [#obj,4].
labels: (tensor) class labels for each box, sized [#obj,].
'''
CLS_THRESH = 0.5
NMS_THRESH = 0.5

input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
else torch.Tensor(input_size)
anchor_boxes = self._get_anchor_boxes(input_size)

loc_xy = loc_preds[:,:2]
loc_wh = loc_preds[:,2:]

xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2]
wh = loc_wh.exp() * anchor_boxes[:,2:]
boxes = torch.cat([xy-wh/2, xy+wh/2], 1) # [#anchors,4]

score, labels = cls_preds.sigmoid().max(1) # [#anchors,]
ids = score > CLS_THRESH
ids = ids.nonzero().squeeze() # [#obj,]

if ids.dim() == 0:
return [], []

keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
return boxes[ids][keep], labels[ids][keep]
191 changes: 191 additions & 0 deletions inception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
import torch.nn as nn
import torch.nn.functional as F
import torch

from torch.autograd import Variable

import torchvision.models as models

class BottleneckA(nn.Module):
def __init__(self, input_dims):
super(BottleneckA, self).__init__()

self.conv1 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0, bias=False)
self.conv1_bn = nn.BatchNorm2d(128)
self.conv2_dilation = nn.Conv2d(input_dims, 128, kernel_size=3, stride=1, padding=2, dilation=2, bias=False)
self.conv2_bn = nn.BatchNorm2d(128)
self.conv3 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0, bias=False)
self.conv3_bn = nn.BatchNorm2d(128)
self.conv4_1 = nn.Conv2d(input_dims, 128, kernel_size=(1,5), stride=1, padding=(0, 4), dilation=2, bias=False)
self.conv4_1_bn = nn.BatchNorm2d(128)
self.conv4_2 = nn.Conv2d(128, 128, kernel_size=(5,1), stride=1, padding=(4, 0), dilation=2, bias=False)
self.conv4_2_bn = nn.BatchNorm2d(128)

def forward(self, x):
c1_out = F.relu(self.conv1_bn(self.conv1(x)))
c2_out = F.relu(self.conv2_bn(self.conv2_dilation(x)))
c3_out = F.relu(self.conv3_bn(self.conv3(F.max_pool2d(x, kernel_size=3, stride=1, padding=1))))
c4_out = F.relu(self.conv4_1_bn(self.conv4_1(x)))
c4_out = F.relu(self.conv4_2_bn(self.conv4_2(c4_out)))

output = [c1_out, c2_out, c3_out, c4_out]

return torch.cat(output, dim=1)

class BottleneckB(nn.Module):
def __init__(self, input_dims):
super(BottleneckB, self).__init__()

self.conv1 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0)
self.conv2_dilation = nn.Conv2d(input_dims, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.conv3 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0)
self.conv4_1 = nn.Conv2d(input_dims, 128, kernel_size=(1,5), stride=1, padding=(0, 2))
self.conv4_2 = nn.Conv2d(128, 128, kernel_size=(5,1), stride=1, padding=(2, 0))

def forward(self, x):
c1_out = F.relu(self.conv1(x))
c2_out = F.relu(self.conv2_dilation(x))
c3_out = F.relu(self.conv3(F.max_pool2d(x, kernel_size=3, stride=1, padding=1)))
c4_out = F.relu(self.conv4_1(x))
c4_out = F.relu(self.conv4_2(c4_out))

output = [c1_out, c2_out, c3_out, c4_out]

return torch.cat(output, dim=1)

class Inception(nn.Module):

def __init__(self, blockA, BlockB):
super(Inception, self).__init__()

self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.conv3_3_inception = blockA(256)

self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv4_3_inception = blockA(512)

self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)

self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=6, dilation=6)
self.fc7 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1)
self.fc7_inception = blockA(1024)

self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=3, stride=2, padding=1)
self.conv6_1_inception = BlockB(256)
self.conv7_1 = nn.Conv2d(512, 256, kernel_size=3, stride=2, padding=1)
self.conv7_1_inception = BlockB(256)
self.conv8_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0)
self.conv8_1_inception = BlockB(256)
self.conv9_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0)
self.conv9_1_inception = BlockB(256)
self.conv10_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0)
self.conv10_1_inception = BlockB(256)

def forward(self, x):
out = F.relu(self.conv1_1(x))
out = F.relu(self.conv1_2(out))
out = F.max_pool2d(out, kernel_size=2, stride=2)
out = F.relu(self.conv2_1(out))
out = F.relu(self.conv2_2(out))
out = F.max_pool2d(out, kernel_size=2, stride=2)
out = F.relu(self.conv3_1(out))
out = F.relu(self.conv3_2(out))
out = F.relu(self.conv3_3(out))
conv3_inception = self.conv3_3_inception(out)
out = F.max_pool2d(out, kernel_size=2, stride=2)
out = F.relu(self.conv4_1(out))
out = F.relu(self.conv4_2(out))
out = F.relu(self.conv4_3(out))
conv4_inception = self.conv4_3_inception(out)
out = F.max_pool2d(out, kernel_size=2, stride=2)
out = F.relu(self.conv5_1(out))
out = F.relu(self.conv5_2(out))
out = F.relu(self.conv5_3(out))
out = F.max_pool2d(out, kernel_size=3, stride=1, padding=1)
out = F.relu(self.fc6(out))
out = F.relu(self.fc7(out))
fc7_inception = self.fc7_inception(out)
out = F.relu(self.conv6_1(out))
conv6_inception = self.conv6_1_inception(out)
out = F.relu(self.conv7_1(conv6_inception))
conv7_inception = self.conv7_1_inception(out)
out = F.relu(self.conv8_1(conv7_inception))
conv8_inception = self.conv8_1_inception(out)
out = F.relu(self.conv9_1(conv8_inception))
conv9_inception = self.conv9_1_inception(out)
out = F.relu(self.conv10_1(conv9_inception))
conv10_inception = self.conv10_1_inception(out)

return conv3_inception, conv4_inception, fc7_inception, conv6_inception, conv7_inception, conv8_inception, conv9_inception, conv10_inception

def load_inception(using_pretrained):
net = Inception(blockA=BottleneckA, BlockB=BottleneckB)

if using_pretrained is True:
pre_trained_vgg16 = models.vgg16(pretrained=True)
pre_trained_feature = pre_trained_vgg16.features

net.conv1_1.weight = pre_trained_feature[0].weight
net.conv1_1.bias = pre_trained_feature[0].bias
net.conv1_2.weight = pre_trained_feature[2].weight
net.conv1_2.bias = pre_trained_feature[2].bias

net.conv2_1.weight = pre_trained_feature[5].weight
net.conv2_1.bias = pre_trained_feature[5].bias
net.conv2_2.weight = pre_trained_feature[7].weight
net.conv2_2.bias = pre_trained_feature[7].bias

net.conv3_1.weight = pre_trained_feature[10].weight
net.conv3_1.bias = pre_trained_feature[10].bias
net.conv3_2.weight = pre_trained_feature[12].weight
net.conv3_2.bias = pre_trained_feature[12].bias
net.conv3_3.weight = pre_trained_feature[14].weight
net.conv3_3.bias = pre_trained_feature[14].bias

net.conv4_1.weight = pre_trained_feature[17].weight
net.conv4_1.bias = pre_trained_feature[17].bias
net.conv4_2.weight = pre_trained_feature[19].weight
net.conv4_2.bias = pre_trained_feature[19].bias
net.conv4_3.weight = pre_trained_feature[21].weight
net.conv4_3.bias = pre_trained_feature[21].bias

net.conv5_1.weight = pre_trained_feature[24].weight
net.conv5_1.bias = pre_trained_feature[24].bias
net.conv5_2.weight = pre_trained_feature[26].weight
net.conv5_2.bias = pre_trained_feature[26].bias
net.conv5_3.weight = pre_trained_feature[28].weight
net.conv5_3.bias = pre_trained_feature[28].bias

return net


def test():
net = load_inception(using_pretrained=True)

num_parameters = 0.
for param in net.parameters():
sizes = param.size()

num_layer_param = 1.
for size in sizes:
num_layer_param *= size
num_parameters += num_layer_param

print(net)
print("num. of parameters : " + str(num_parameters))

fms = net(Variable(torch.randn(1,3,512,512)))
for fm in fms:
print(fm.size())

# test()
Loading

0 comments on commit 3b9012f

Please sign in to comment.