*Note: MXNet supplys an official implementation of deformable convolution, here tests this repo against the MXNet's implementation.*

In [1]:
import os
import torch
import numpy as np
import mxnet as mx
from torch import nn
from time import time
from pprint import pprint
from torch.autograd import Variable
from mxnet.initializer import Initializer
from deform_conv import DeformConv2D

#### Set up parameters.

In [2]:
bs, inC, ouC, H, W = 1, 1, 1, 4, 5
kH, kW = 3, 3
padding = 1

# ---------------------------------------
use_gpu = torch.cuda.is_available()
gpu_device = 0
if use_gpu:
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_device)
    print("Using gpu{}".format(os.getenv("CUDA_VISIBLE_DEVICES")))
# ---------------------------------------
raw_inputs = np.random.rand(bs, inC, H, W).astype(np.float32)
raw_labels = np.random.rand(bs, ouC, (H+2*padding-2)//1, (W+2*padding-2)//1).astype(np.float32)
# weights for conv offsets.
offset_weights = np.random.rand(18, inC, 3, 3).astype(np.float32)
# weights for deformable convolution.
conv_weights = np.random.rand(ouC, inC, 3, 3).astype(np.float32)

Using gpu0


In [3]:
print('\ninputs:')
pprint(raw_inputs)
print('\nlabels:')
pprint(raw_labels)
print('\nconv weights:')
pprint(conv_weights)


inputs:
array([[[[ 0.56075788,  0.56448251,  0.38643569,  0.13775933,  0.92719644],
         [ 0.18066591,  0.24222445,  0.29689947,  0.54874617,  0.95001829],
         [ 0.61031544,  0.84815538,  0.27238497,  0.53376287,  0.93240666],
         [ 0.54890364,  0.60794067,  0.1237376 ,  0.16012843,  0.82202536]]]], dtype=float32)

labels:
array([[[[ 0.68657815,  0.10542295,  0.04489666,  0.64058632,  0.52095002],
         [ 0.73867059,  0.91901845,  0.80943078,  0.2182935 ,  0.02595145],
         [ 0.31954384,  0.80359656,  0.53808153,  0.46827996,  0.90268624],
         [ 0.84400773,  0.5750683 ,  0.55033565,  0.11278367,  0.47512576]]]], dtype=float32)

conv weights:
array([[[[ 0.89284414,  0.98574871,  0.94764489],
         [ 0.69642198,  0.84854221,  0.98900223],
         [ 0.82735974,  0.4257046 ,  0.59915102]]]], dtype=float32)


###  Set up models of PyTorch&MXNet

#### Set PyTorch model.

In [4]:
class TestModel(nn.Module):
    def __init__(self):
        super(TestModel, self).__init__()
        self.conv_offset = nn.Conv2d(in_channels=inC, out_channels=18, kernel_size=3, padding=padding, bias=None)
        self.deform_conv = DeformConv2D(inc=inC, outc=ouC, padding=padding)

    def forward(self, x):
        offsets = self.conv_offset(x)
        out = self.deform_conv(x, offsets)
        return out

In [5]:
model = TestModel()

pt_inputs = Variable(torch.from_numpy(raw_inputs).cuda(), requires_grad=True)
pt_labels = Variable(torch.from_numpy(raw_labels).cuda(), requires_grad=False)

optimizer = torch.optim.SGD([{'params': model.parameters()}], lr=1e-1)
loss_fn = torch.nn.MSELoss(reduce=True)

#### Init weights.

In [6]:
def init_weights(m):
    if isinstance(m, torch.nn.Conv2d):
        m.weight.data = torch.from_numpy(conv_weights)
        if m.bias is not None:
            m.bias.data = torch.FloatTensor(m.bias.shape[0]).zero_()

def init_offsets_weights(m):
    if isinstance(m, torch.nn.Conv2d):
        m.weight.data = torch.from_numpy(offset_weights)
        if m.bias is not None:
            m.bias.data = torch.FloatTensor(m.bias.shape[0]).zero_()

model.deform_conv.apply(init_weights)
model.conv_offset.apply(init_offsets_weights)

Conv2d (1, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

In [7]:
if use_gpu:
    model.cuda()

#### Set MXNet model.

In [8]:
# trainiter
train_iter = mx.io.NDArrayIter(raw_inputs, raw_labels, 1, shuffle=True, data_name='data', label_name='label')

# # symbol
inputs = mx.symbol.Variable('data')
labels = mx.symbol.Variable('label')
offsets = mx.symbol.Convolution(data=inputs, kernel=(3, 3), pad=(padding, padding), num_filter=18, name='offset', no_bias=True)
net = mx.symbol.contrib.DeformableConvolution(data=inputs, offset=offsets, kernel=(3, 3), pad=(padding, padding), num_filter=ouC, name='deform', no_bias=True)
outputs = mx.symbol.MakeLoss(data=mx.symbol.mean((net-labels)**2))

In [9]:
mod = mx.mod.Module(symbol=outputs,
                    context=mx.gpu(),
                    data_names=['data'],
                    label_names=['label'])

mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label)
mod.init_params(initializer=mx.initializer.Load({'deform_weight': mx.nd.array(conv_weights),
                                                 'offset_weight': mx.nd.array(offset_weights)}))
mod.init_optimizer(optimizer='sgd', optimizer_params=(('learning_rate', 0.1),))

### Inference

#### PyTorch

In [10]:
output = model(pt_inputs)
pprint(output)

Variable containing:
(0 ,0 ,.,.) = 
  1.1062  2.6770  3.6377  2.7072  1.3894
  2.7560  3.1664  3.4931  0.8367  0.5725
  2.1425  1.1672  2.4347  1.5723  0.0000
  1.5282  0.7295  1.6247  2.0247  1.5443
[torch.cuda.FloatTensor of size 1x1x4x5 (GPU 0)]



#### MXNet

In [11]:
mx_inputs = mx.nd.array(raw_inputs, ctx=mx.gpu())
conv_weights = mx.nd.array(conv_weights, ctx=mx.gpu())
offset_weights = mx.nd.array(offset_weights, ctx=mx.gpu())
offset = mx.ndarray.Convolution(data=mx_inputs, weight=offset_weights, kernel=(3, 3), pad=(padding, padding), num_filter=18, name='offset', no_bias=True)
outputs = mx.ndarray.contrib.DeformableConvolution(data=mx_inputs, offset=offset, weight=conv_weights, kernel=(3, 3), pad=(padding, padding), num_filter=ouC, name='deform', no_bias=True)
pprint(outputs)


[[[[ 1.10622048  2.67702818  3.63765717  2.70719433  1.38943076]
   [ 2.75602031  3.16641665  3.49313188  0.83671159  0.57247651]
   [ 2.14249563  1.16722107  2.43471932  1.57227027  0.        ]
   [ 1.52822971  0.72951925  1.62470031  2.02470279  1.54425097]]]]
<NDArray 1x1x4x5 @gpu(0)>


### Train

#### PyTorch

In [12]:
for i in range(100):
    output = model(pt_inputs)
    loss = loss_fn(output, pt_labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [13]:
pprint(model(pt_inputs))

Variable containing:
(0 ,0 ,.,.) = 
  0.1410  0.3813  0.3941  0.7022  0.3028
  0.4378  0.7259  0.8587  0.3047 -0.0288
  0.5056  0.3441  0.6582  0.5480  0.0000
  0.4662  0.2311  0.4464  0.5114  0.5383
[torch.cuda.FloatTensor of size 1x1x4x5 (GPU 0)]



#### MXNet

In [14]:
for i in range(100):
    train_iter.reset()
    for batch in train_iter:
        # get outputs
#         infer_outputs = mx.mod.Module(symbol=net,
#                                      context=mx.gpu(),
#                                      data_names=['data'])
#         infer_outputs.bind(data_shapes=train_iter.provide_data)
#         infer_outputs.set_params(arg_params=mod.get_params()[0], aux_params=mod.get_params()[1], allow_extra=True)
#         outputs_value = infer_outputs.predict(train_iter)

        mod.forward(batch, is_train=True)  # compute predictions
        mod.backward()  # compute gradients
        mod.update()  # update parameters

In [15]:
mx_inputs = mx.nd.array(raw_inputs, ctx=mx.gpu())
mx_labels = mx.nd.array(raw_labels, ctx=mx.gpu())
conv_weights = mod.get_params()[0]['deform_weight'].as_in_context(mx.gpu())
offset_weights = mod.get_params()[0]['offset_weight'].as_in_context(mx.gpu())
offset = mx.ndarray.Convolution(data=mx_inputs, weight=offset_weights, kernel=(3, 3), pad=(padding, padding), num_filter=18, name='offset', no_bias=True)
outputs = mx.ndarray.contrib.DeformableConvolution(data=mx_inputs, offset=offset, weight=conv_weights, kernel=(3, 3), pad=(padding, padding), num_filter=ouC, name='deform', no_bias=True)
pprint(outputs)


[[[[ 0.14098766  0.38126716  0.39405173  0.70216376  0.30278912]
   [ 0.43777964  0.72585207  0.8587358   0.30472821 -0.02884051]
   [ 0.50558764  0.34406984  0.65824842  0.54796678  0.        ]
   [ 0.46616155  0.23111115  0.44640523  0.51136774  0.53833312]]]]
<NDArray 1x1x4x5 @gpu(0)>
