In [75]:
import mxnet as mx
from collections import namedtuple
import cv2
import os, urllib

def save_net(model, prefix):
    model.save_params(prefix+'-0001.params')
    model.symbol.save(prefix+'-symbol.json')
    
def print_net_params(param):
    for m in sorted(param):
        print m, param[m].shape
        
def plot_network(symbol, name='network'):
    graph = mx.viz.plot_network(symbol=symbol)
    graph.format = 'png'
    graph.render('network.gv', view=True)
    
def print_inferred_shape(net, name, nch=3, size=300, node_type='rgb'):
    if node_type == 'rgb':
        ar, ou, au = net.infer_shape(rgb=(1, nch, size, size))
    if node_type == 'spectral':
        ar, ou, au = net.infer_shape(rgb=(1, nch, size, size), tir=(1, 1, size, size))
    print ou


# Batch = namedtuple('Batch', ['data'])
# import matplotlib.pyplot as plt 
# import numpy as np
# %matplotlib inline 

# def download(url,prefix=''):
#     filename = prefix+url.split("/")[-1]
#     if not os.path.exists(filename):
#         urllib.urlretrieve(url, filename)
        
# def get_image(url, show=True):
#     filename = url.split("/")[-1]
#     urllib.urlretrieve(url, filename)
#     img = cv2.imread(filename)
#     if img is None:
#         print('failed to download ' + url)
#     if show:
#         plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
#         plt.axis('off')
#     return filename

# url = 'http://writm.com/wp-content/uploads/2016/08/Cat-hd-wallpapers.jpg'
# img = get_image(url)

# size = 300
# img= cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB)

# img = cv2.resize(img, (size, size))
# img = np.swapaxes(img, 0, 2)
# img = np.swapaxes(img, 1, 2) 
# img = img[np.newaxis, :] 
# print img.shape
# batch = img

def multibox_layer(from_layers, num_classes, sizes=[.2, .95], ratios=[1], normalization=-1, num_channels=[], clip=True, interm_layer=0):
    """
    the basic aggregation module for SSD detection. Takes in multiple layers,
    generate multiple object detection targets by customized layers

    Parameters:
    ----------
    from_layers : list of mx.symbol
        generate multibox detection from layers
    num_classes : int
        number of classes excluding background, will automatically handle
        background in this function
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    num_channels : list of int
        number of input layer channels, used when normalization is enabled, the
        length of list should equals to number of normalization layers
    clip : bool
        whether to clip out-of-image boxes
    interm_layer : int
        if > 0, will add a intermediate Convolution layer

    Returns:
    ----------
    list of outputs, as [loc_preds, cls_preds, anchor_boxes]
    loc_preds : localization regression prediction
    cls_preds : classification prediction
    anchor_boxes : generated anchor boxes
    """
    assert len(from_layers) > 0, "from_layers must not be empty list"
    assert num_classes > 0,      "num_classes {} must be larger than 0".format(num_classes)

    assert len(ratios) > 0, "aspect ratios must not be empty list"
    if not isinstance(ratios[0], list):
        # provided only one ratio list, broadcast to all from_layers
        ratios = [ratios] * len(from_layers)
    assert len(ratios) == len(from_layers), \
        "ratios and from_layers must have same length"

    assert len(sizes) > 0, "sizes must not be empty list"
    if len(sizes) == 2 and not isinstance(sizes[0], list):
        # provided size range, we need to compute the sizes for each layer
         assert sizes[0] > 0 and sizes[0] < 1
         assert sizes[1] > 0 and sizes[1] < 1 and sizes[1] > sizes[0]
         tmp = np.linspace(sizes[0], sizes[1], num=(len(from_layers)-1))
         min_sizes = [start_offset] + tmp.tolist()
         max_sizes = tmp.tolist() + [tmp[-1]+start_offset]
         sizes = zip(min_sizes, max_sizes)
    assert len(sizes) == len(from_layers), \
        "sizes and from_layers must have same length"

    if not isinstance(normalization, list):
        normalization = [normalization] * len(from_layers)
    assert len(normalization) == len(from_layers)

    assert sum(x > 0 for x in normalization) == len(num_channels), \
        "must provide number of channels for each normalized layer"

    loc_pred_layers = []
    cls_pred_layers = []
    anchor_layers = []
    num_classes += 1 # always use background as label 0

    for k, from_layer in enumerate(from_layers):
        from_name = from_layer.name
        # normalize
        if normalization[k] > 0:
            from_layer = mx.symbol.L2Normalization(data=from_layer, mode="channel", name="{}_norm".format(from_name))
            scale = mx.symbol.Variable(name="{}_scale".format(from_name),shape=(1, num_channels.pop(0), 1, 1))
            from_layer = normalization[k] * mx.symbol.broadcast_mul(lhs=scale, rhs=from_layer)
        if interm_layer > 0:
            from_layer = mx.symbol.Convolution(data=from_layer, kernel=(3,3), stride=(1,1), pad=(1,1), num_filter=interm_layer, name="{}_inter_conv".format(from_name))
            from_layer = mx.symbol.Activation(data=from_layer, act_type="relu", name="{}_inter_relu".format(from_name))

        # estimate number of anchors per location
        # here I follow the original version in caffe
        # TODO: better way to shape the anchors??
        size = sizes[k]
        assert len(size) > 0, "must provide at least one size"
        size_str = "(" + ",".join([str(x) for x in size]) + ")"
        ratio = ratios[k]
        assert len(ratio) > 0, "must provide at least one ratio"
        ratio_str = "(" + ",".join([str(x) for x in ratio]) + ")"
        num_anchors = len(size) -1 + len(ratio)

        # create location prediction layer
        num_loc_pred = num_anchors * 4
        loc_pred = mx.symbol.Convolution(data=from_layer, kernel=(3,3), stride=(1,1), pad=(1,1), num_filter=num_loc_pred, name="{}_loc_pred_conv".format(from_name))
        loc_pred = mx.symbol.transpose(loc_pred, axes=(0,2,3,1))
        loc_pred = mx.symbol.Flatten(data=loc_pred)
        loc_pred_layers.append(loc_pred)

        # create class prediction layer
        num_cls_pred = num_anchors * num_classes
        cls_pred = mx.symbol.Convolution(data=from_layer, kernel=(3,3), stride=(1,1), pad=(1,1), num_filter=num_cls_pred, name="{}_cls_pred_conv".format(from_name))
        cls_pred = mx.symbol.transpose(cls_pred, axes=(0,2,3,1))
        cls_pred = mx.symbol.Flatten(data=cls_pred)
        cls_pred_layers.append(cls_pred)

        # create anchor generation layer
        anchors = mx.contrib.symbol.MultiBoxPrior(from_layer, sizes=size_str, ratios=ratio_str, clip=clip, name="{}_anchors".format(from_name))
        anchors = mx.symbol.Flatten(data=anchors)
        anchor_layers.append(anchors)

    loc_preds = mx.symbol.Concat(*loc_pred_layers, num_args=len(loc_pred_layers), dim=1, name="multibox_loc_pred")
    cls_preds = mx.symbol.Concat(*cls_pred_layers, num_args=len(cls_pred_layers), dim=1)
    cls_preds = mx.symbol.Reshape(data=cls_preds, shape=(0, -1, num_classes))
    cls_preds = mx.symbol.transpose(cls_preds, axes=(0, 2, 1), name="multibox_cls_pred")
    anchor_boxes = mx.symbol.Concat(*anchor_layers, num_args=len(anchor_layers), dim=1)
    anchor_boxes = mx.symbol.Reshape(data=anchor_boxes, shape=(0, -1, 4), name="multibox_anchors")
    return [loc_preds, cls_preds, anchor_boxes]


def bn_act_conv_layer(from_layer, name, num_filter, kernel=(1,1), pad=(0,0), stride=(1,1)):
    bn = mx.symbol.BatchNorm(data=from_layer, name="bn{}".format(name))
    relu = mx.symbol.Activation(data=bn, act_type='relu')
    conv = mx.symbol.Convolution(data=relu, kernel=kernel, pad=pad, stride=stride, num_filter=num_filter, name="conv{}".format(name))
    return conv, relu


def residual_unit(data, num_filter, stride, dim_match, name, bn_mom=0.9, workspace=256):
        bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
        act1 =  mx.symbol.Activation(data=bn1, act_type='relu', name=name + '_relu1')
        conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1), no_bias=True, workspace=workspace, name=name + '_conv1')
        bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
        act2 = mx.symbol.Activation(data=bn2, act_type='relu', name=name + '_relu2')
        conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1), no_bias=True, workspace=workspace, name=name + '_conv2')
        if dim_match:
            shortcut = data
        else:
            shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True, workspace=workspace, name=name+'_sc')
        return conv2 + shortcut
    
def cf_unit(res_unit_rgb, res_unit_tir, num_filters=64, name='fusion1_unit_1'):
    concat = mx.symbol.Concat(name=name+'_concat', *[res_unit_rgb, res_unit_tir])
    bn1 = mx.sym.BatchNorm(data=concat, fix_gamma=False, momentum=0.9, eps=2e-5, name=name + '_bn1')
    act1 =  mx.symbol.Activation(data=bn1, act_type='relu', name=name + '_relu1')
    conv1 = mx.sym.Convolution(act1, num_filter=num_filters, kernel=(3,3), stride=(1,1), pad=(1,1), workspace=256, name=name+'_conv')
    return conv1

def resnet():
    filter_list = [64, 64, 128, 256, 512]
    num_stages = 4
    units = [2, 2, 2, 2]
    bn_mom=0.9
    workspace=256
    
    rgb = mx.sym.Variable(name='rgb')
    tir = mx.sym.Variable(name='tir')
    label = mx.sym.Variable(name='label')
    
    # rgb head
    rgb = mx.sym.BatchNorm(rgb, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
    net_rgb = mx.sym.Convolution(rgb, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3),no_bias=True, name="rgb_conv0", workspace=workspace)
    net_rgb = mx.sym.BatchNorm(net_rgb, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
    net_rgb = mx.symbol.Activation(net_rgb, act_type='relu', name='relu0')
    net_rgb = mx.symbol.Pooling(net_rgb, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
    
    # tir head
    tir = mx.sym.BatchNorm(tir, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='tir_bn_data')
    net_tir = mx.sym.Convolution(tir, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3), no_bias=True, name="tir_conv0", workspace=workspace)
    net_tir = mx.sym.BatchNorm(net_tir, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='tir_bn0')
    net_tir = mx.symbol.Activation(net_tir, act_type='relu', name='tir_relu0')
    net_tir = mx.symbol.Pooling(net_tir, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
    
    # stage 1
    net_rgb = residual_unit(net_rgb, filter_list[1], (1,1), False, name='stage1_unit1', workspace=workspace)
    net_rgb = residual_unit(net_rgb, filter_list[1], (1,1), True, name='stage1_unit2', workspace=workspace)
    net_tir = residual_unit(net_tir, filter_list[1], (1,1), False, name='tir_stage1_unit1', workspace=workspace)
    net_tir = residual_unit(net_tir, filter_list[1], (1,1), True, name='tir_stage1_unit2', workspace=workspace)
    
    # stage 2
    net_rgb = residual_unit(net_rgb, filter_list[2], (2,2), False, name='rgb_stage2_unit1', workspace=workspace)
    net_rgb = residual_unit(net_rgb, filter_list[2], (1,1), True, name='rgb_stage2_unit2', workspace=workspace)
    net_tir = residual_unit(net_tir, filter_list[2], (2,2), False, name='tir_stage2_unit1', workspace=workspace)
    net_tir = residual_unit(net_tir, filter_list[2], (1,1), True, name='tir_stage2_unit2', workspace=workspace)    

    # stage 3    
    net_rgb = residual_unit(net_rgb, filter_list[3], (2,2), False, name='rgb_stage3_unit1', workspace=workspace)
    net_tir = residual_unit(net_tir, filter_list[3], (2,2), False, name='tir_stage3_unit1', workspace=workspace)
    
    # first fusion map
    fusion_3_1 = cf_unit(net_rgb.get_internals()["rgb_stage3_unit1_relu1_output"],
                         net_tir.get_internals()["tir_stage3_unit1_relu1_output"], 
                         num_filters=256, name='fusion3_1')
    
    print_inferred_shape(fusion_3_1, 'stage3_unit1', node_type='spectral')
                            
    net_rgb = residual_unit(net_rgb, filter_list[3], (1,1), True, name='rgb_stage3_unit2', workspace=workspace) 
    net_tir = residual_unit(net_tir, filter_list[3], (1,1), True, name='tir_stage3_unit2', workspace=workspace)

    # stage 4 
    net_rgb = residual_unit(net_rgb, filter_list[4], (2,2), False, name='rgb_stage4_unit1', workspace=workspace)
    net_tir = residual_unit(net_tir, filter_list[4], (2,2), False, name='tir_stage4_unit1', workspace=workspace)
    # second fusion map
    fusion_4_1 = cf_unit(net_rgb.get_internals()["rgb_stage4_unit1_relu1_output"],
                     net_tir.get_internals()["tir_stage4_unit1_relu1_output"], 
                     num_filters=512, name='fusion4_1')
    print_inferred_shape(fusion_4_1, 'stage4_unit1',  node_type='spectral')
    
    net_rgb = residual_unit(net_rgb, filter_list[4], (1,1), True, name='rgb_stage4_unit2', workspace=workspace)
#     net_tir = residual_unit(net_tir, filter_list[4], (1,1), True, name='tir_stage4_unit2', workspace=workspace)
    
    bn1 = mx.sym.BatchNorm(net_rgb, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
    relu1 = mx.symbol.Activation(bn1, act_type='relu', name='relu1')
    
    input_1 = fusion_3_1 #19x19
    input_2 = fusion_4_1 #10x10
    input_3 = relu1
    print_inferred_shape(relu1, 'stage4_unit1',  node_type='rgb')
    
    # ssd extra layers
    conv8_1, relu8_1 = bn_act_conv_layer(input_3, "8_1", 256, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
    print_inferred_shape(conv8_1, 'stage4_unit1', node_type='rgb')
    conv8_2, relu8_2 = bn_act_conv_layer(conv8_1, "8_2", 512, kernel=(3, 3), pad=(1, 1), stride=(2, 2))
    print_inferred_shape(conv8_2, 'stage4_unit1', node_type='rgb')
    conv9_1, relu9_1 = bn_act_conv_layer(conv8_2, "9_1", 128, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
    print_inferred_shape(conv9_1, 'stage4_unit1', node_type='rgb')
    conv9_2, relu9_2 = bn_act_conv_layer(conv9_1, "9_2", 256, kernel=(3, 3), pad=(1, 1), stride=(2, 2))
    print_inferred_shape(conv9_2, 'stage4_unit1', node_type='rgb')
    conv10_1, relu10_1 = bn_act_conv_layer(conv9_2, "10_1", 128, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
    print_inferred_shape(conv10_1, 'stage4_unit1', node_type='rgb')
    conv10_2, relu10_2 = bn_act_conv_layer(conv10_1, "10_2", 256, kernel=(3, 3), pad=(1, 1), stride=(2, 2))
    print_inferred_shape(conv10_2, 'stage4_unit1', node_type='rgb')

    # global Pooling
    pool10 = mx.symbol.Pooling(data=conv10_2, pool_type="avg", global_pool=True, kernel=(1, 1), name='pool10')
    print_inferred_shape(pool10, 'stage4_unit1', node_type='rgb')
    
    # with feature maps: ssd_input
    from_layers = [input_1, input_2, conv8_2, conv9_2, conv10_2, pool10]
    
    return mx.symbol.Group(from_layers)

In [76]:
plot_network(resnet())

[(1L, 256L, 38L, 38L)]
[(1L, 512L, 19L, 19L)]
[(1L, 512L, 10L, 10L)]
[(1L, 256L, 10L, 10L)]
[(1L, 512L, 5L, 5L)]
[(1L, 128L, 5L, 5L)]
[(1L, 256L, 3L, 3L)]
[(1L, 128L, 3L, 3L)]
[(1L, 256L, 2L, 2L)]
[(1L, 256L, 1L, 1L)]


In [13]:
args = dict(zip(model.symbol.list_arguments(), model.get_params()))

for name in args:
    print(name, args[name], args[name])

('data', {'conv5_bias': <NDArray 256 @cpu(0)>, 'conv4_bias': <NDArray 384 @cpu(0)>, 'conv1_bias': <NDArray 96 @cpu(0)>, 'conv3_weight': <NDArray 384x256x3x3 @cpu(0)>, 'conv5_weight': <NDArray 256x192x3x3 @cpu(0)>, 'conv4_weight': <NDArray 384x192x3x3 @cpu(0)>, 'conv2_bias': <NDArray 256 @cpu(0)>, 'conv2_weight': <NDArray 256x48x5x5 @cpu(0)>, 'conv3_bias': <NDArray 384 @cpu(0)>, 'conv1_weight': <NDArray 96x3x11x11 @cpu(0)>}, {'conv5_bias': <NDArray 256 @cpu(0)>, 'conv4_bias': <NDArray 384 @cpu(0)>, 'conv1_bias': <NDArray 96 @cpu(0)>, 'conv3_weight': <NDArray 384x256x3x3 @cpu(0)>, 'conv5_weight': <NDArray 256x192x3x3 @cpu(0)>, 'conv4_weight': <NDArray 384x192x3x3 @cpu(0)>, 'conv2_bias': <NDArray 256 @cpu(0)>, 'conv2_weight': <NDArray 256x48x5x5 @cpu(0)>, 'conv3_bias': <NDArray 384 @cpu(0)>, 'conv1_weight': <NDArray 96x3x11x11 @cpu(0)>})
('conv1_weight', {}, {})


In [187]:
# new network params
spectral_net_params = dict()

# read original resnet params
net_params = mx.initializer.Load('resnet-0000.params')

# read trained ssd-resnet 0.68
net_params_ssd = mx.initializer.Load('ssd_300-0300.params')

# initiliaze weights
for par in net_params.param:
    if par not in ['fc1_weight', 'fc1_bias', 'bn1', 'bn1_beta', 'bn1_gamma', 'bn1_moving_mean', 'bn1_moving_var']:
        spectral_net_params['rgb_' + par] = net_params_ssd.param[par]
        spectral_net_params['tir_' + par] = net_params_ssd.param[par]
    elif par in ['bn1', 'bn1_beta', 'bn1_gamma', 'bn1_moving_mean', 'bn1_moving_var']:
        spectral_net_params[par] = net_params_ssd.param[par]
        
# initiliaze fusion conv
net = resnet()
ex = net.simple_bind(ctx=mx.cpu(), rgb=(1, 3, 300, 300), tir=(1,1,300,300))
args = dict(zip(model.symbol.list_arguments(), ex.arg_arrays))

# for name in args:
#     if name.startswith('fusion'):
#         spectral_net_params[name] = mx.random.uniform(-0.1, 0.1, data.shape)
#         print(name, args[name].shape)
        
# for name in args:
#     data = args[name]
#     if 'weight' in name:
#         data[:] = mx.random.uniform(-0.1, 0.1, data.shape)
#     if 'bias' in name:
#         data[:] = 0
#     spectral_net_params[name] = data


# take all values for red chanell since it close to TIR
spectral_net_params['tir_conv0_weight'] = mx.nd.array(spectral_net_params['tir_conv0_weight'].asnumpy()[:, :1, :, :])
spectral_net_params['tir_bn_data_gamma'] = mx.nd.array(spectral_net_params['tir_bn_data_gamma'].asnumpy()[:1])
spectral_net_params['tir_bn_data_beta'] = mx.nd.array(spectral_net_params['tir_bn_data_beta'].asnumpy()[:1])
spectral_net_params['tir_bn_data_moving_var'] = mx.nd.array(spectral_net_params['tir_bn_data_moving_var'].asnumpy()[:1])
spectral_net_params['tir_bn_data_moving_mean'] = mx.nd.array(spectral_net_params['tir_bn_data_moving_mean'].asnumpy()[:1])

net_params.param = spectral_net_params

In [77]:
model = mx.mod.Module(symbol=resnet(), data_names=['rgb', 'tir'])
model.bind(data_shapes=[('rgb', (1, 3, 300, 300)), ('tir', (1, 1, 300, 300))])
model.init_params(arg_params=net_params.param, allow_missing=True, initializer=mx.initializer.Xavier())


[(1L, 256L, 38L, 38L)]
[(1L, 512L, 19L, 19L)]
[(1L, 512L, 10L, 10L)]
[(1L, 256L, 10L, 10L)]
[(1L, 512L, 5L, 5L)]
[(1L, 128L, 5L, 5L)]
[(1L, 256L, 3L, 3L)]
[(1L, 128L, 3L, 3L)]
[(1L, 256L, 2L, 2L)]
[(1L, 256L, 1L, 1L)]


	rgb
	tir[0m


In [4]:
net_params_spectral = mx.initializer.Load('ssd_300-0028.params')

model = mx.mod.Module(symbol=resnet(), data_names=['rgb', 'tir'])
model.bind(data_shapes=[('rgb', (1, 3, 300, 300)), ('tir', (1, 1, 300, 300))])
model.init_params(net_params_spectral)

	rgb
	tir[0m


In [194]:
save_net(model, 'multispectral_resnet')

## Pre-trained for SSD

In [13]:
net_params = mx.initializer.Load('NETS/caltech_kaist/ssd_300-0806.params')

In [14]:
print_net_params(net_params.param)

bn0_beta (64L,)
bn0_gamma (64L,)
bn0_moving_mean (64L,)
bn0_moving_var (64L,)
bn10_1_beta (256L,)
bn10_1_gamma (256L,)
bn10_1_moving_mean (256L,)
bn10_1_moving_var (256L,)
bn10_2_beta (128L,)
bn10_2_gamma (128L,)
bn10_2_moving_mean (128L,)
bn10_2_moving_var (128L,)
bn1_beta (512L,)
bn1_gamma (512L,)
bn1_moving_mean (512L,)
bn1_moving_var (512L,)
bn8_1_beta (512L,)
bn8_1_gamma (512L,)
bn8_1_moving_mean (512L,)
bn8_1_moving_var (512L,)
bn8_2_beta (256L,)
bn8_2_gamma (256L,)
bn8_2_moving_mean (256L,)
bn8_2_moving_var (256L,)
bn9_1_beta (512L,)
bn9_1_gamma (512L,)
bn9_1_moving_mean (512L,)
bn9_1_moving_var (512L,)
bn9_2_beta (128L,)
bn9_2_gamma (128L,)
bn9_2_moving_mean (128L,)
bn9_2_moving_var (128L,)
bn_data_beta (3L,)
bn_data_gamma (3L,)
bn_data_moving_mean (3L,)
bn_data_moving_var (3L,)
conv0_weight (64L, 3L, 7L, 7L)
conv10_1_bias (128L,)
conv10_1_weight (128L, 256L, 1L, 1L)
conv10_2_bias (256L,)
conv10_2_cls_pred_conv_bias (126L,)
conv10_2_cls_pred_conv_weight (126L, 256L, 3L, 3L)
con

In [15]:
cls_loc_weights = []
for name in net_params.param:
    if 'cls' in name.split('_'):
        cls_loc_weights.append(name)
#         print name
        
    if 'loc' in name.split('_'):
        cls_loc_weights.append(name)
#         print name

for name in cls_loc_weights:
    print name, net_params.param[name].asnumpy().shape
    del net_params.param[name]

conv8_2_loc_pred_conv_weight (24, 512, 3, 3)
conv8_2_cls_pred_conv_weight (126, 512, 3, 3)
conv10_2_loc_pred_conv_bias (24,)
stage3_unit1_relu1_loc_pred_conv_bias (12,)
conv8_2_cls_pred_conv_bias (126,)
conv10_2_cls_pred_conv_bias (126,)
conv9_2_loc_pred_conv_weight (24, 256, 3, 3)
stage4_unit1_relu1_cls_pred_conv_bias (126,)
conv10_2_loc_pred_conv_weight (24, 256, 3, 3)
conv9_2_cls_pred_conv_weight (126, 256, 3, 3)
stage4_unit1_relu1_loc_pred_conv_bias (24,)
pool10_loc_pred_conv_bias (24,)
stage3_unit1_relu1_cls_pred_conv_bias (63,)
stage3_unit1_relu1_cls_pred_conv_weight (63, 128, 3, 3)
conv8_2_loc_pred_conv_bias (24,)
pool10_cls_pred_conv_weight (126, 256, 3, 3)
conv9_2_cls_pred_conv_bias (126,)
stage4_unit1_relu1_loc_pred_conv_weight (24, 256, 3, 3)
conv10_2_cls_pred_conv_weight (126, 256, 3, 3)
pool10_cls_pred_conv_bias (126,)
pool10_loc_pred_conv_weight (24, 256, 3, 3)
stage4_unit1_relu1_cls_pred_conv_weight (126, 256, 3, 3)
stage3_unit1_relu1_loc_pred_conv_weight (12, 128, 3, 3)

In [7]:
model = mx.mod.Module(symbol=resnet())
model.bind(data_shapes=[('data', (1, 3, 300, 300))])
model.init_params(net_params)

	data[0m


In [8]:
save_net(model, 'resnet_caltech')

## TIR Resnet 

In [12]:
import mxnet as mx

def bn_act_conv_layer(from_layer, name, num_filter, kernel=(1,1), pad=(0,0), stride=(1,1)):
    bn = mx.symbol.BatchNorm(data=from_layer, name="bn{}".format(name))
    relu = mx.symbol.Activation(data=bn, act_type='relu')
    conv = mx.symbol.Convolution(data=relu, kernel=kernel, pad=pad, stride=stride, num_filter=num_filter, name="conv{}".format(name))
    return conv, relu


def residual_unit(data, num_filter, stride, dim_match, name, bn_mom=0.9, workspace=256):
    bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
    act1 = mx.symbol.Activation(data=bn1, act_type='relu', name=name + '_relu1')
    conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3, 3), stride=stride, pad=(1, 1),
                               no_bias=True, workspace=workspace, name=name + '_conv1')
    bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
    act2 = mx.symbol.Activation(data=bn2, act_type='relu', name=name + '_relu2')
    conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3, 3), stride=(1, 1), pad=(1, 1), no_bias=True, workspace=workspace, name=name + '_conv2')
    if dim_match:
        shortcut = data
    else:
        shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1, 1), stride=stride, no_bias=True, workspace=workspace, name=name + '_sc')
    return conv2 + shortcut


def resnet():
    filter_list = [64, 64, 128, 256, 512]
    bn_mom = 0.9
    workspace = 256

    data = mx.sym.Variable(name='data')
    data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
    net = mx.sym.Convolution(data, num_filter=filter_list[0], kernel=(7, 7), stride=(2, 2), pad=(3, 3), no_bias=True, name="conv0", workspace=workspace)
    net = mx.sym.BatchNorm(net, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
    net = mx.symbol.Activation(net, act_type='relu', name='relu0')
    net = mx.symbol.Pooling(net, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type='max')

    # stage 1
    net = residual_unit(net, filter_list[1], (1, 1), False, name='stage1_unit1', workspace=workspace)
    net = residual_unit(net, filter_list[1], (1, 1), True, name='stage1_unit2', workspace=workspace)
    # stage 2
    net = residual_unit(net, filter_list[2], (2, 2), False, name='stage2_unit1', workspace=workspace)
    net = residual_unit(net, filter_list[2], (1, 1), True, name='stage2_unit2', workspace=workspace)
    # stage 3
    net = residual_unit(net, filter_list[3], (2, 2), False, name='stage3_unit1', workspace=workspace)
    net = residual_unit(net, filter_list[3], (1, 1), True, name='stage3_unit2', workspace=workspace)
    # stage 4
    net = residual_unit(net, filter_list[4], (2, 2), False, name='stage4_unit1', workspace=workspace)
    net = residual_unit(net, filter_list[4], (1, 1), True, name='stage4_unit2', workspace=workspace)

    bn1 = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
    relu1 = mx.symbol.Activation(data=bn1, act_type='relu', name='relu1')
    
    input_3 = relu1 # 10x10
    internals = input_3.get_internals()
    input_1 = internals['stage3_unit1_relu1_output']  # 38x38
    input_2 = internals['stage4_unit1_relu1_output']  # 19X19
    
     # ssd extra layers
    conv8_1, elu8_1 = bn_act_conv_layer(input_3, "8_1", 256, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
    conv8_2, elu8_2 = bn_act_conv_layer(conv8_1, "8_2", 512, kernel=(3, 3), pad=(1, 1), stride=(2, 2))
    conv9_1, elu9_1 = bn_act_conv_layer(conv8_2, "9_1", 128, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
    conv9_2, elu9_2 = bn_act_conv_layer(conv9_1, "9_2", 256, kernel=(3, 3), pad=(1, 1), stride=(2, 2))
    conv10_1, elu10_1 = bn_act_conv_layer(conv9_2, "10_1", 128, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
    conv10_2, elu10_2 = bn_act_conv_layer(conv10_1, "10_2", 256, kernel=(3, 3), pad=(1, 1), stride=(2, 2))

    # global Pooling
    pool10 = mx.symbol.Pooling(data=conv10_2, pool_type="avg", global_pool=True, kernel=(1, 1), name='pool10')
    return pool10

In [3]:
# get weights of pretrained model for PASCAL VOC and adapt it to single channel input
# read trained ssd-resnet 0.68
net_params = mx.initializer.Load('ssd_300-0300.params')

# take all values for red chanell, since it closer to TIR
# net_params.param['conv0_weight'] = mx.nd.array(net_params.param['conv0_weight'].asnumpy()[:, :1, :, :])
# net_params.param['bn_data_gamma'] = mx.nd.array(net_params.param['bn_data_gamma'].asnumpy()[:1])
# net_params.param['bn_data_beta'] = mx.nd.array(net_params.param['bn_data_beta'].asnumpy()[:1])
# net_params.param['bn_data_moving_var'] = mx.nd.array(net_params.param['bn_data_moving_var'].asnumpy()[:1])
# net_params.param['bn_data_moving_mean'] = mx.nd.array(net_params.param['bn_data_moving_mean'].asnumpy()[:1])

# specify training layers names

In [7]:
for i in net_params.param:
#     print i
#     print i.split('_')
    if 'loc' in i.split('_') or 'pred' in i.split('_'):
        print i, net_params.param[i].asnumpy().shape

conv8_2_loc_pred_conv_weight (24, 512, 3, 3)
conv8_2_cls_pred_conv_weight (126, 512, 3, 3)
conv10_2_loc_pred_conv_bias (24,)
stage3_unit1_relu1_loc_pred_conv_bias (12,)
conv8_2_cls_pred_conv_bias (126,)
conv10_2_cls_pred_conv_bias (126,)
conv9_2_loc_pred_conv_weight (24, 256, 3, 3)
stage4_unit1_relu1_cls_pred_conv_bias (126,)
conv10_2_loc_pred_conv_weight (24, 256, 3, 3)
conv9_2_cls_pred_conv_weight (126, 256, 3, 3)
stage4_unit1_relu1_loc_pred_conv_bias (24,)
pool10_loc_pred_conv_bias (24,)
stage3_unit1_relu1_cls_pred_conv_bias (63,)
stage3_unit1_relu1_cls_pred_conv_weight (63, 128, 3, 3)
conv8_2_loc_pred_conv_bias (24,)
pool10_cls_pred_conv_weight (126, 256, 3, 3)
conv9_2_cls_pred_conv_bias (126,)
stage4_unit1_relu1_loc_pred_conv_weight (24, 256, 3, 3)
conv10_2_cls_pred_conv_weight (126, 256, 3, 3)
pool10_cls_pred_conv_bias (126,)
pool10_loc_pred_conv_weight (24, 256, 3, 3)
stage4_unit1_relu1_cls_pred_conv_weight (126, 256, 3, 3)
stage3_unit1_relu1_loc_pred_conv_weight (12, 128, 3, 3)

In [27]:
# initialize model with new params to check correctness
model = mx.mod.Module(symbol=resnet())
model.bind(data_shapes=[('data', (1, 3, 300, 300))])
model.init_params(net_params)

# save tir model
save_net(model, 'resnet_rgb')

## RGB_TIR Resnet

In [15]:
net_params = mx.initializer.Load('resnet_tir-0001.params')

In [16]:
net_params.param

{'bn0_beta': <NDArray 64 @cpu(0)>,
 'bn0_gamma': <NDArray 64 @cpu(0)>,
 'bn0_moving_mean': <NDArray 64 @cpu(0)>,
 'bn0_moving_var': <NDArray 64 @cpu(0)>,
 'bn10_1_beta': <NDArray 256 @cpu(0)>,
 'bn10_1_gamma': <NDArray 256 @cpu(0)>,
 'bn10_1_moving_mean': <NDArray 256 @cpu(0)>,
 'bn10_1_moving_var': <NDArray 256 @cpu(0)>,
 'bn10_2_beta': <NDArray 128 @cpu(0)>,
 'bn10_2_gamma': <NDArray 128 @cpu(0)>,
 'bn10_2_moving_mean': <NDArray 128 @cpu(0)>,
 'bn10_2_moving_var': <NDArray 128 @cpu(0)>,
 'bn1_beta': <NDArray 512 @cpu(0)>,
 'bn1_gamma': <NDArray 512 @cpu(0)>,
 'bn1_moving_mean': <NDArray 512 @cpu(0)>,
 'bn1_moving_var': <NDArray 512 @cpu(0)>,
 'bn8_1_beta': <NDArray 512 @cpu(0)>,
 'bn8_1_gamma': <NDArray 512 @cpu(0)>,
 'bn8_1_moving_mean': <NDArray 512 @cpu(0)>,
 'bn8_1_moving_var': <NDArray 512 @cpu(0)>,
 'bn8_2_beta': <NDArray 256 @cpu(0)>,
 'bn8_2_gamma': <NDArray 256 @cpu(0)>,
 'bn8_2_moving_mean': <NDArray 256 @cpu(0)>,
 'bn8_2_moving_var': <NDArray 256 @cpu(0)>,
 'bn9_1_beta': <