# tinynet

In [1]:
from mxnet.gluon import nn
from mxnet import contrib, nd, autograd, init, gluon, cpu

def cls_predictor(num_anchors, num_classes):
    return nn.Conv2D(num_anchors * (num_classes + 1), kernel_size=3,
                     padding=1)

### 边界框预测层
def bbox_predictor(num_anchors):
    return nn.Conv2D(num_anchors * 4, kernel_size=3, padding=1)

### 连结多尺度的预测
def flatten_pred(pred):
    return pred.transpose((0, 2, 3, 1)).flatten()

### 将预测结果转成二维的(批量大小, 高 × 宽 × 通道数)的格式，以方便之后在维度1上的连结，让后续计算更简单
def concat_preds(preds):
    return nd.concat(*[flatten_pred(p) for p in preds], dim=1)

### 高和宽减半块
def down_sample_blk(num_channels):
    blk = nn.Sequential()
    for _ in range(2):
        blk.add(nn.Conv2D(num_channels, kernel_size=3, padding=1),
                nn.BatchNorm(in_channels=num_channels),
                nn.Activation('relu'))
    blk.add(nn.MaxPool2D(2))
    return blk
  
### 基础网络块
def base_net():
    # 先试试这个小网络
    blk = nn.Sequential()
    for num_filters in [16, 32, 64]:
        blk.add(down_sample_blk(num_filters))
    return blk
### 完整的网络主体模型
def get_blk(i):
    if i == 0:
        blk = base_net()
    elif i == 4:
        blk = nn.GlobalMaxPool2D()
    else:
        blk = down_sample_blk(128)
    return blk
  
### 单个块的forward
def blk_forward(X, blk, size, ratio, cls_predictor, bbox_predictor):
    Y = blk(X)
    anchors = contrib.ndarray.MultiBoxPrior(Y, sizes=size, ratios=ratio)
    cls_preds = cls_predictor(Y)
    bbox_preds = bbox_predictor(Y)
    return (Y, anchors, cls_preds, bbox_preds)
  
### 定义锚框参数
sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79],
         [0.88, 0.961]]
ratios = [[1, 2, 0.5]] * 5
num_anchors = len(sizes[0]) + len(ratios[0]) - 1
  
### 完整ssd模型
class TinySSD(nn.Block):
    def __init__(self, num_classes, **kwargs):
        super(TinySSD, self).__init__(**kwargs)
        self.num_classes = num_classes
        for i in range(5):
            # 即赋值语句self.blk_i = get_blk(i)
            setattr(self, 'blk_%d' % i, get_blk(i))
            setattr(self, 'cls_%d' % i, cls_predictor(num_anchors,
                                                      num_classes))
            setattr(self, 'bbox_%d' % i, bbox_predictor(num_anchors))

    def forward(self, X):
      # 整个网络中的forward会逐步计算单个块的forward
        anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5
        for i in range(5):
            # getattr(self, 'blk_%d' % i)即访问self.blk_i
            X, anchors[i], cls_preds[i], bbox_preds[i] = blk_forward(
                X, getattr(self, 'blk_%d' % i), sizes[i], ratios[i],
                getattr(self, 'cls_%d' % i), getattr(self, 'bbox_%d' % i))
        # reshape函数中的0表示保持批量大小不变
        return (nd.concat(*anchors, dim=1),
                concat_preds(cls_preds).reshape(
                    (0, -1, self.num_classes + 1)), bbox_preds) # concat_preds(bbox_preds)

net1 = TinySSD(num_classes=20)
x1 = nd.ones(shape=(1,3,512,512))
net1.initialize(init=init.Xavier(), force_reinit=True)

with autograd.train_mode():
    anchors4, cls_preds4, box_preds4 = net1(x1) 

In [2]:
anchors4, cls_preds4, box_preds4
# box_preds4就是net里没有concat的，从1*16*64*64，最后两个每次除以2.到最后的1*1

(
 [[[-0.0921875  -0.0921875   0.1078125   0.1078125 ]
   [-0.12818751 -0.12818751  0.14381251  0.14381251]
   [-0.13360886 -0.06289818  0.14923386  0.07852318]
   ..., 
   [ 0.01949999  0.01949999  0.98049998  0.98049998]
   [-0.12225395  0.18887302  1.12225389  0.81112695]
   [ 0.18887302 -0.12225395  0.81112695  1.12225389]]]
 <NDArray 1x21764x4 @cpu(0)>, 
 [[[-2.58804274  0.58391654  4.78976059 ..., -0.79863942 -0.91312718
    -3.77740407]
   [-1.29662716  2.64886212 -0.30439419 ...,  1.50902975  0.19306624
     1.85081244]
   [ 0.68410778 -1.3969357   0.2646625  ...,  0.52985203 -1.90493643
     0.72151721]
   ..., 
   [-1.05918646 -0.71968275 -0.71757871 ..., -0.37706617 -0.49509549
     0.92910743]
   [-0.02113053  0.3877995  -1.50527382 ...,  2.97555757 -0.38058442
     0.68766063]
   [ 0.00488999  0.64749956  1.26955509 ..., -1.24421322  1.48371112
     1.52847433]]]
 <NDArray 1x21764x21 @cpu(0)>, [
  [[[[ 1.75551319  1.03484941  2.5248611  ...,  4.8400445  -0.42704183
      -

In [2]:
anchors, cls_preds, box_preds
#  1x21764x4, 1x21764x21, 1x87056

(
 [[[-0.0921875  -0.0921875   0.1078125   0.1078125 ]
   [-0.12818751 -0.12818751  0.14381251  0.14381251]
   [-0.13360886 -0.06289818  0.14923386  0.07852318]
   ..., 
   [ 0.01949999  0.01949999  0.98049998  0.98049998]
   [-0.12225395  0.18887302  1.12225389  0.81112695]
   [ 0.18887302 -0.12225395  0.81112695  1.12225389]]]
 <NDArray 1x21764x4 @cpu(0)>, 
 [[[-2.58804274  0.58391654  4.78976059 ..., -0.79863942 -0.91312718
    -3.77740407]
   [-1.29662716  2.64886212 -0.30439419 ...,  1.50902975  0.19306624
     1.85081244]
   [ 0.68410778 -1.3969357   0.2646625  ...,  0.52985203 -1.90493643
     0.72151721]
   ..., 
   [-1.05918646 -0.71968275 -0.71757871 ..., -0.37706617 -0.49509549
     0.92910743]
   [-0.02113053  0.3877995  -1.50527382 ...,  2.97555757 -0.38058442
     0.68766063]
   [ 0.00488999  0.64749956  1.26955509 ..., -1.24421322  1.48371112
     1.52847433]]]
 <NDArray 1x21764x21 @cpu(0)>, 
 [[ 1.75551319 -3.89638925 -1.7521292  ...,  1.04855585 -0.1984354
   -2.883903

# 看看数据集

In [3]:
from mxnet.gluon.data import DataLoader
from gluoncv.data import VOCDetection
from gluoncv.data.transforms import presets     
from gluoncv.data.batchify import Tuple, Stack, Pad
import time

st = time.time()
train_dataset = VOCDetection(root = 'e:\\dataSet\\VOCdevkit',splits=[(2012, 'trainval')])
print('train_dataset', len(train_dataset),time.time()-st,'sec') # 3秒

train_dataset 11540 3.086263418197632 sec


In [4]:
width, height = 512, 512  # 假设512*512大小
train_transform = presets.ssd.SSDDefaultTrainTransform(width, height,anchors)
batch_size = 2
num_workers = 1 # 弄小点比较好。。8个python进程占内存得很

batchify_fn = Tuple(Stack(), Stack(), Stack()) # train_transform 有三个参数，这里也需要3个
train_loader = DataLoader(
    train_dataset.transform(train_transform),
    batch_size,
    shuffle=True,
    batchify_fn=batchify_fn,
    last_batch='rollover',
    num_workers=num_workers)

In [9]:
ctx = [cpu()]
for i, bch in enumerate(train_loader):
    if i>1:
        break
    data = gluon.utils.split_and_load(bch[0], ctx_list=ctx, batch_axis=0)
    cls_targets = gluon.utils.split_and_load(bch[1], ctx_list=ctx, batch_axis=0)
    box_targets = gluon.utils.split_and_load(bch[2], ctx_list=ctx, batch_axis=0)

In [10]:
# anchors, cls_preds, box_preds
#  1x21764x4, 1x21764x21, 1x87056

data, cls_targets, box_targets # 数据本身
# 2*3*512*512  2*21764  2*21764*4

([
  [[[[ 0.          0.          0.         ...,  0.          0.          0.        ]
     [ 0.          0.          0.         ...,  0.          0.          0.        ]
     [ 0.          0.          0.         ...,  0.          0.          0.        ]
     ..., 
     [ 0.          0.          0.         ...,  0.          0.          0.        ]
     [ 0.          0.          0.         ...,  0.          0.          0.        ]
     [ 0.          0.          0.         ...,  0.          0.          0.        ]]
  
    [[ 0.          0.          0.         ...,  0.          0.          0.        ]
     [ 0.          0.          0.         ...,  0.          0.          0.        ]
     [ 0.          0.          0.         ...,  0.          0.          0.        ]
     ..., 
     [ 0.          0.          0.         ...,  0.          0.          0.        ]
     [ 0.          0.          0.         ...,  0.          0.          0.        ]
     [ 0.          0.          0.         ..., 

tiny的box_preds不应该把后面的concat
gcv的问题是net初始化问题。那就直接使用anchors1的形状

In [21]:
cls_preds1, box_preds1, anchors1
#1x24656x21, 1x24656x4, 1x24656x4

data1, cls_targets1, box_targets1 # 数据本身
# 2*3*512*512  2x24656 2x24656*4

([
  [[[[  2.60282292e-07   2.60282292e-07   2.60282292e-07 ...,
       -2.02350712e+00  -1.99764299e+00  -1.95197105e+00]
     [  0.00000000e+00   0.00000000e+00   0.00000000e+00 ...,
       -2.01677752e+00  -1.99919736e+00  -1.96340930e+00]
     [ -1.30141146e-07  -1.30141146e-07  -1.30141146e-07 ...,
       -2.00237107e+00  -2.00275254e+00  -1.98492002e+00]
     ..., 
     [ -1.30141146e-07  -1.30141146e-07  -1.30141146e-07 ...,
       -1.30141146e-07   1.30141146e-07  -1.30141146e-07]
     [  0.00000000e+00   0.00000000e+00   0.00000000e+00 ...,
        0.00000000e+00   1.30141146e-07  -2.60282292e-07]
     [  1.30141146e-07   1.30141146e-07   1.30141146e-07 ...,
       -1.30141146e-07   1.30141146e-07  -1.30141146e-07]]
  
    [[  1.33046072e-07  -1.33046072e-07   1.33046072e-07 ...,
       -1.94699001e+00  -1.94480884e+00  -1.90257537e+00]
     [  1.33046072e-07  -1.33046072e-07   0.00000000e+00 ...,
       -1.93664443e+00  -1.95038533e+00  -1.91565478e+00]
     [  0.00000000e+00

# gcvnet

In [1]:
from gluoncv import model_zoo
# net1是tiny
net = model_zoo.get_model('ssd_300_vgg16_atrous_voc', pretrained_base=False) # pretrained_baseb只表示下载好了基础网络，pretrained才是下载完??

In [7]:
from mxnet import nd, autograd, init, cpu

In [15]:
x2 = nd.ones(shape=(1,3,256,256)) # 这个net改256就不对
net.initialize(force_reinit=True)
with autograd.train_mode():
  cls_preds2, box_preds2, anchors2 = net(x2)

MXNetError: [15:36:54] C:\Jenkins\workspace\mxnet-tag\mxnet\src\operator\nn\convolution.cc:196: Check failed: dilated_ksize_y <= AddPad(dshape[2], param_.pad[0]) (3 vs. 2) kernel size exceed input

In [14]:
# init = init.Xavier(), ctx = cpu(), force_reinit=True
cls_preds2, box_preds2, anchors2

(
 [[[-0.33801937 -0.90163434 -0.3211315  ..., -0.5391925   0.63985986
     0.88860399]
   [-0.72435957 -1.40273416  1.30824351 ..., -0.53316396 -0.1637423
     0.31391099]
   [ 0.42285636  0.33687997 -0.8783868  ..., -0.39116013  0.10754344
     0.20506662]
   ..., 
   [-0.17229933  4.95779467  0.38081646 ...,  0.49214447  2.56735611
    -5.44771004]
   [-8.56417561 -9.22031689 -6.4280858  ..., -1.56126475 -3.95451164
    -5.36964321]
   [-8.04038239  1.51913095  5.43744898 ...,  1.25755858 -1.25320458
     0.51412845]]]
 <NDArray 1x24656x21 @cpu(0)>, 
 [[[ 0.20902389  0.29775959  0.07494316  0.46653274]
   [ 0.98574859 -0.03227947 -1.12873232  1.12383735]
   [ 0.07752693 -0.83551407  0.68268222  0.21806501]
   ..., 
   [ 6.00470638  0.38287973 -6.16967535  1.69958639]
   [-2.85163474  8.82500648 -5.9018259   0.12565799]
   [-7.64860868  2.96594882 -2.46782136 -1.46683955]]]
 <NDArray 1x24656x4 @cpu(0)>, 
 [[[    4.             4.            30.            30.        ]
   [    4.     

In [14]:
cls_preds1, box_preds1, anchors1 # (512)
#1x24656x21, 1x24656x4, 1x24656x4

(
 [[[ 0.42900282  0.17689246 -0.08969393 ..., -1.41578996 -0.52821094
    -0.58115882]
   [ 0.15685615 -1.27155054  1.05977714 ...,  0.11746846 -0.08594662
    -0.84995431]
   [-0.50975543  0.3863039  -0.47457641 ...,  0.42524678 -0.44772044
     0.02306525]
   ..., 
   [-2.89480162 -0.70770842 -4.20434189 ..., -4.83708429 -0.90477586
    -1.13431835]
   [-0.62764335 -1.28395724 -2.43016863 ...,  1.41274524 -4.19696569
    -0.32053542]
   [ 2.52635741  4.52784538 -0.11493516 ...,  2.3865602   0.89426917
    -2.4647913 ]]]
 <NDArray 1x24656x21 @cpu(0)>, 
 [[[-0.478672   -0.35698959  0.39089143  0.77271128]
   [ 0.33819631 -0.19513905  0.31616893  0.17680694]
   [-0.21485612  0.11544864 -0.23150179 -1.71511388]
   ..., 
   [-1.83052015  3.97280979  2.33794975  7.24629641]
   [-4.29987335 -3.64611959 -0.03536781  4.01691151]
   [-3.42061424 -4.28603172  2.9243691  -6.27342606]]]
 <NDArray 1x24656x4 @cpu(0)>, 
 [[[    4.             4.            30.            30.        ]
   [    4.    

In [19]:
train_transform1 = presets.ssd.SSDDefaultTrainTransform(width, height,anchors1)
batchify_fn = Tuple(Stack(), Stack(), Stack()) # train_transform 有三个参数，这里也需要3个
train_loader1 = DataLoader(
    train_dataset.transform(train_transform1),
    batch_size,
    shuffle=True,
    batchify_fn=batchify_fn,
    last_batch='rollover',
    num_workers=num_workers)

In [20]:
for i, bch in enumerate(train_loader1):
    if i>1:
        break
    data1 = gluon.utils.split_and_load(bch[0], ctx_list=ctx, batch_axis=0)
    cls_targets1 = gluon.utils.split_and_load(bch[1], ctx_list=ctx, batch_axis=0)
    box_targets1 = gluon.utils.split_and_load(bch[2], ctx_list=ctx, batch_axis=0)