init commit

HotaekHan · Jan 19, 2018 · 3b9012f · 3b9012f
1 parent 18d2a50
commit 3b9012f
Show file tree

Hide file tree

Showing 10 changed files with 1,879 additions and 0 deletions.
diff --git a/class_label_map.xlsx b/class_label_map.xlsx
diff --git a/datagen.py b/datagen.py
diff --git a/encoder.py b/encoder.py
@@ -0,0 +1,143 @@
+'''Encode object boxes and labels.
+
+Reference :
+  https://github.com/kuangliu/pytorch-retinanet/blob/master/encoder.py
+'''
+
+
+import math
+import torch
+
+from utils import meshgrid, box_iou, box_nms, change_box_order
+
+
+class DataEncoder:
+    def __init__(self):
+        self.anchor_areas = [16*16., 32*32., 48.*48., 64*64., 128*128., 256*256.]  # same with num. of feature maps used to predict
+        self.aspect_ratios = [1/2., 1/1., 2/1.]
+        self.scale_ratios = [1., pow(2,1/3.), pow(2,2/3.)]
+        self.anchor_wh = self._get_anchor_wh()
+
+    def _get_anchor_wh(self):
+        '''Compute anchor width and height for each feature map.
+
+        Returns:
+          anchor_wh: (tensor) anchor wh, sized [#fm, #anchors_per_cell, 2].
+        '''
+        anchor_wh = []
+        for s in self.anchor_areas:
+            for ar in self.aspect_ratios:  # w/h = ar
+                h = math.sqrt(s/ar)
+                w = ar * h
+                for sr in self.scale_ratios:  # scale
+                    anchor_h = h*sr
+                    anchor_w = w*sr
+                    anchor_wh.append([anchor_w, anchor_h])
+        num_fms = len(self.anchor_areas)
+        return torch.Tensor(anchor_wh).view(num_fms, -1, 2)
+
+    def _get_anchor_boxes(self, input_size):
+        '''Compute anchor boxes for each feature map.
+
+        Args:
+          input_size: (tensor) model input size of (w,h).
+
+        Returns:
+          boxes: (list) anchor boxes for each feature map. Each of size [#anchors,4],
+                        where #anchors = fmw * fmh * #anchors_per_cell
+        '''
+        num_fms = len(self.anchor_areas)
+        downsample_cnt = 3
+        # fm_sizes = [(input_size / pow(2., i + downsample_cnt)).ceil() for i in range(num_fms)]  # p3 -> p7 feature map sizes
+
+        fm_sizes = []
+        for i in range(num_fms):
+            if i >= 4:
+                fm_sizes.append((input_size / pow(2., 3 + downsample_cnt)) - (2. * (i-3)))
+            else:
+                fm_sizes.append(input_size / pow(2., i + downsample_cnt))
+
+        boxes = []
+        for i in range(num_fms):
+            fm_size = fm_sizes[i]
+            grid_size = input_size / fm_size
+            fm_w, fm_h = int(fm_size[0]), int(fm_size[1])
+            xy = meshgrid(fm_w,fm_h) + 0.5  # [fm_h*fm_w, 2]
+            xy = (xy*grid_size).view(fm_h,fm_w,1,2).expand(fm_h,fm_w,9,2)
+            wh = self.anchor_wh[i].view(1,1,9,2).expand(fm_h,fm_w,9,2)
+            box = torch.cat([xy,wh], 3)  # [x,y,w,h]
+            boxes.append(box.view(-1,4))
+        return torch.cat(boxes, 0)
+
+    def encode(self, boxes, labels, input_size):
+        '''Encode target bounding boxes and class labels.
+
+        We obey the Faster RCNN box coder:
+          tx = (x - anchor_x) / anchor_w
+          ty = (y - anchor_y) / anchor_h
+          tw = log(w / anchor_w)
+          th = log(h / anchor_h)
+
+        Args:
+          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
+          labels: (tensor) object class labels, sized [#obj,].
+          input_size: (int/tuple) model input size of (w,h).
+
+        Returns:
+          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
+          cls_targets: (tensor) encoded class labels, sized [#anchors,].
+        '''
+        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
+                     else torch.Tensor(input_size)
+        anchor_boxes = self._get_anchor_boxes(input_size)
+        boxes = change_box_order(boxes, 'xyxy2xywh')
+
+        ious = box_iou(anchor_boxes, boxes, order='xywh')
+        max_ious, max_ids = ious.max(1)
+        boxes = boxes[max_ids]
+
+        loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:]
+        loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:])
+        loc_targets = torch.cat([loc_xy,loc_wh], 1)
+        cls_targets = labels[max_ids]
+
+        cls_targets[max_ious<0.5] = 0
+        ignore = (max_ious>0.4) & (max_ious<0.5)  # ignore ious between [0.4,0.5]
+        cls_targets[ignore] = -1  # for now just mark ignored to -1
+        return loc_targets, cls_targets
+
+    def decode(self, loc_preds, cls_preds, input_size):
+        '''Decode outputs back to bouding box locations and class labels.
+
+        Args:
+          loc_preds: (tensor) predicted locations, sized [#anchors, 4].
+          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
+          input_size: (int/tuple) model input size of (w,h).
+
+        Returns:
+          boxes: (tensor) decode box locations, sized [#obj,4].
+          labels: (tensor) class labels for each box, sized [#obj,].
+        '''
+        CLS_THRESH = 0.5
+        NMS_THRESH = 0.5
+
+        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
+                     else torch.Tensor(input_size)
+        anchor_boxes = self._get_anchor_boxes(input_size)
+
+        loc_xy = loc_preds[:,:2]
+        loc_wh = loc_preds[:,2:]
+
+        xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2]
+        wh = loc_wh.exp() * anchor_boxes[:,2:]
+        boxes = torch.cat([xy-wh/2, xy+wh/2], 1)  # [#anchors,4]
+
+        score, labels = cls_preds.sigmoid().max(1)          # [#anchors,]
+        ids = score > CLS_THRESH
+        ids = ids.nonzero().squeeze()             # [#obj,]
+
+        if ids.dim() == 0:
+            return [], []
+
+        keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
+        return boxes[ids][keep], labels[ids][keep]
diff --git a/inception.py b/inception.py
@@ -0,0 +1,191 @@
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+
+from torch.autograd import Variable
+
+import torchvision.models as models
+
+class BottleneckA(nn.Module):
+    def __init__(self, input_dims):
+        super(BottleneckA, self).__init__()
+
+        self.conv1 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0, bias=False)
+        self.conv1_bn = nn.BatchNorm2d(128)
+        self.conv2_dilation = nn.Conv2d(input_dims, 128, kernel_size=3, stride=1, padding=2, dilation=2, bias=False)
+        self.conv2_bn = nn.BatchNorm2d(128)
+        self.conv3 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0, bias=False)
+        self.conv3_bn = nn.BatchNorm2d(128)
+        self.conv4_1 = nn.Conv2d(input_dims, 128, kernel_size=(1,5), stride=1, padding=(0, 4), dilation=2, bias=False)
+        self.conv4_1_bn = nn.BatchNorm2d(128)
+        self.conv4_2 = nn.Conv2d(128, 128, kernel_size=(5,1), stride=1, padding=(4, 0), dilation=2, bias=False)
+        self.conv4_2_bn = nn.BatchNorm2d(128)
+
+    def forward(self, x):
+        c1_out = F.relu(self.conv1_bn(self.conv1(x)))
+        c2_out = F.relu(self.conv2_bn(self.conv2_dilation(x)))
+        c3_out = F.relu(self.conv3_bn(self.conv3(F.max_pool2d(x, kernel_size=3, stride=1, padding=1))))
+        c4_out = F.relu(self.conv4_1_bn(self.conv4_1(x)))
+        c4_out = F.relu(self.conv4_2_bn(self.conv4_2(c4_out)))
+
+        output = [c1_out, c2_out, c3_out, c4_out]
+
+        return torch.cat(output, dim=1)
+
+class BottleneckB(nn.Module):
+    def __init__(self, input_dims):
+        super(BottleneckB, self).__init__()
+
+        self.conv1 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0)
+        self.conv2_dilation = nn.Conv2d(input_dims, 128, kernel_size=3, stride=1, padding=2, dilation=2)
+        self.conv3 = nn.Conv2d(input_dims, 128, kernel_size=1, stride=1, padding=0)
+        self.conv4_1 = nn.Conv2d(input_dims, 128, kernel_size=(1,5), stride=1, padding=(0, 2))
+        self.conv4_2 = nn.Conv2d(128, 128, kernel_size=(5,1), stride=1, padding=(2, 0))
+
+    def forward(self, x):
+        c1_out = F.relu(self.conv1(x))
+        c2_out = F.relu(self.conv2_dilation(x))
+        c3_out = F.relu(self.conv3(F.max_pool2d(x, kernel_size=3, stride=1, padding=1)))
+        c4_out = F.relu(self.conv4_1(x))
+        c4_out = F.relu(self.conv4_2(c4_out))
+
+        output = [c1_out, c2_out, c3_out, c4_out]
+
+        return torch.cat(output, dim=1)
+
+class Inception(nn.Module):
+
+    def __init__(self, blockA, BlockB):
+        super(Inception, self).__init__()
+
+        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
+        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
+        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
+        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
+        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
+        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
+        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
+        self.conv3_3_inception = blockA(256)
+
+        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
+        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
+        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
+        self.conv4_3_inception = blockA(512)
+
+        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
+        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
+        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
+
+        self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=6, dilation=6)
+        self.fc7 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1)
+        self.fc7_inception = blockA(1024)
+
+        self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=3, stride=2, padding=1)
+        self.conv6_1_inception = BlockB(256)
+        self.conv7_1 = nn.Conv2d(512, 256, kernel_size=3, stride=2, padding=1)
+        self.conv7_1_inception = BlockB(256)
+        self.conv8_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0)
+        self.conv8_1_inception = BlockB(256)
+        self.conv9_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0)
+        self.conv9_1_inception = BlockB(256)
+        self.conv10_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0)
+        self.conv10_1_inception = BlockB(256)
+
+    def forward(self, x):
+        out = F.relu(self.conv1_1(x))
+        out = F.relu(self.conv1_2(out))
+        out = F.max_pool2d(out, kernel_size=2, stride=2)
+        out = F.relu(self.conv2_1(out))
+        out = F.relu(self.conv2_2(out))
+        out = F.max_pool2d(out, kernel_size=2, stride=2)
+        out = F.relu(self.conv3_1(out))
+        out = F.relu(self.conv3_2(out))
+        out = F.relu(self.conv3_3(out))
+        conv3_inception = self.conv3_3_inception(out)
+        out = F.max_pool2d(out, kernel_size=2, stride=2)
+        out = F.relu(self.conv4_1(out))
+        out = F.relu(self.conv4_2(out))
+        out = F.relu(self.conv4_3(out))
+        conv4_inception = self.conv4_3_inception(out)
+        out = F.max_pool2d(out, kernel_size=2, stride=2)
+        out = F.relu(self.conv5_1(out))
+        out = F.relu(self.conv5_2(out))
+        out = F.relu(self.conv5_3(out))
+        out = F.max_pool2d(out, kernel_size=3, stride=1, padding=1)
+        out = F.relu(self.fc6(out))
+        out = F.relu(self.fc7(out))
+        fc7_inception = self.fc7_inception(out)
+        out = F.relu(self.conv6_1(out))
+        conv6_inception = self.conv6_1_inception(out)
+        out = F.relu(self.conv7_1(conv6_inception))
+        conv7_inception = self.conv7_1_inception(out)
+        out = F.relu(self.conv8_1(conv7_inception))
+        conv8_inception = self.conv8_1_inception(out)
+        out = F.relu(self.conv9_1(conv8_inception))
+        conv9_inception = self.conv9_1_inception(out)
+        out = F.relu(self.conv10_1(conv9_inception))
+        conv10_inception = self.conv10_1_inception(out)
+
+        return conv3_inception, conv4_inception, fc7_inception, conv6_inception, conv7_inception, conv8_inception, conv9_inception, conv10_inception
+
+def load_inception(using_pretrained):
+    net = Inception(blockA=BottleneckA, BlockB=BottleneckB)
+
+    if using_pretrained is True:
+        pre_trained_vgg16 = models.vgg16(pretrained=True)
+        pre_trained_feature = pre_trained_vgg16.features
+
+        net.conv1_1.weight = pre_trained_feature[0].weight
+        net.conv1_1.bias = pre_trained_feature[0].bias
+        net.conv1_2.weight = pre_trained_feature[2].weight
+        net.conv1_2.bias = pre_trained_feature[2].bias
+
+        net.conv2_1.weight = pre_trained_feature[5].weight
+        net.conv2_1.bias = pre_trained_feature[5].bias
+        net.conv2_2.weight = pre_trained_feature[7].weight
+        net.conv2_2.bias = pre_trained_feature[7].bias
+
+        net.conv3_1.weight = pre_trained_feature[10].weight
+        net.conv3_1.bias = pre_trained_feature[10].bias
+        net.conv3_2.weight = pre_trained_feature[12].weight
+        net.conv3_2.bias = pre_trained_feature[12].bias
+        net.conv3_3.weight = pre_trained_feature[14].weight
+        net.conv3_3.bias = pre_trained_feature[14].bias
+
+        net.conv4_1.weight = pre_trained_feature[17].weight
+        net.conv4_1.bias = pre_trained_feature[17].bias
+        net.conv4_2.weight = pre_trained_feature[19].weight
+        net.conv4_2.bias = pre_trained_feature[19].bias
+        net.conv4_3.weight = pre_trained_feature[21].weight
+        net.conv4_3.bias = pre_trained_feature[21].bias
+
+        net.conv5_1.weight = pre_trained_feature[24].weight
+        net.conv5_1.bias = pre_trained_feature[24].bias
+        net.conv5_2.weight = pre_trained_feature[26].weight
+        net.conv5_2.bias = pre_trained_feature[26].bias
+        net.conv5_3.weight = pre_trained_feature[28].weight
+        net.conv5_3.bias = pre_trained_feature[28].bias
+
+    return net
+
+
+def test():
+    net = load_inception(using_pretrained=True)
+
+    num_parameters = 0.
+    for param in net.parameters():
+        sizes = param.size()
+
+        num_layer_param = 1.
+        for size in sizes:
+            num_layer_param *= size
+        num_parameters += num_layer_param
+
+    print(net)
+    print("num. of parameters : " + str(num_parameters))
+
+    fms = net(Variable(torch.randn(1,3,512,512)))
+    for fm in fms:
+        print(fm.size())
+
+# test()