In [1]:
import os

import mxnet as mx
import mxnet.ndarray as nd
from mxnet.gluon.block import HybridBlock
from mxnet.gluon import nn
from mxnet.gluon.model_zoo import model_store
from mxnet import autograd

from viz.layers import Activation
from utils import convert_to_grayscale

import numpy as np
import utils
import cv2

class Conv2D(mx.gluon.HybridBlock):

    conv_output = None
    capture_layer_name = None

    def __init__(self, channels, kernel_size, strides=(1, 1), padding=(0, 0),
                 dilation=(1, 1), groups=1, layout='NCHW',
                 activation=None, use_bias=True, weight_initializer=None,
                 bias_initializer='zeros', in_channels=0, **kwargs):
        super(Conv2D, self).__init__(**kwargs)
        self.conv = nn.Conv2D(channels, kernel_size, strides=strides, padding=padding,
                             dilation=dilation, groups=groups, layout=layout,
                             activation=activation, use_bias=use_bias, weight_initializer=weight_initializer,
                             bias_initializer=bias_initializer, in_channels=in_channels)

    def hybrid_forward(self, F, x):
        out = self.conv(x)
        name = self._prefix[:-1]
        if name == Conv2D.capture_layer_name:
            out.attach_grad()
            Conv2D.conv_output = out
        return out

In [2]:
def _get_grad(net, image, class_id=None, conv_layer_name=None, image_grad=False):

    if image_grad:
        image.attach_grad()
        Conv2D.capture_layer_name = None
    else:
        # Tell convviz.Conv2D which layer's output and gradient needs to be recorded
        Conv2D.capture_layer_name = conv_layer_name
    
    # Run the network
    with autograd.record(train_mode=False):
        out = net(image)
    
    # If user didn't provide a class id, we'll use the class that the network predicted
    if class_id == None:
        model_output = out.asnumpy()
        target_class = np.argmax(model_output)

    # Create a one-hot target with class_id and backprop with the created target
    one_hot_target = mx.nd.one_hot(mx.nd.array([target_class]), 1000)
    out.backward(one_hot_target, train_mode=False)

    if image_grad:
        return image.grad.asnumpy()
    else:
        # Return the recorded convolution output and gradient
        conv_out = Conv2D.conv_output
        return conv_out.asnumpy(), conv_out.grad.asnumpy()


def get_conv_out_grad(net, image, class_id=None, conv_layer_name=None):
    return _get_grad(net, image, class_id, conv_layer_name, image_grad=False)

def get_image_grad(net, image, class_id=None):
    return _get_grad(net, image, class_id, image_grad=True)

In [3]:
class AlexNet(HybridBlock):
    def __init__(self, classes=1000, **kwargs):
        super(AlexNet, self).__init__(**kwargs)
        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            with self.features.name_scope():
                self.features.add(Conv2D(64, kernel_size=11, strides=4, padding=2))
                self.features.add(Activation('relu'))
                self.features.add(nn.MaxPool2D(pool_size=3, strides=2))
                
                self.features.add(Conv2D(192, kernel_size=5, padding=2))
                self.features.add(Activation('relu'))
                self.features.add(nn.MaxPool2D(pool_size=3, strides=2))
                
                self.features.add(Conv2D(384, kernel_size=3, padding=1))
                self.features.add(Activation('relu'))
                
                self.features.add(Conv2D(256, kernel_size=3, padding=1))
                self.features.add(Activation('relu'))
                
                self.features.add(Conv2D(256, kernel_size=3, padding=1))
                self.features.add(Activation('relu'))
                self.features.add(nn.MaxPool2D(pool_size=3, strides=2))
                
                self.features.add(nn.Flatten())

                self.features.add(nn.Dense(4096))
                self.features.add(Activation('relu'))
                self.features.add(nn.Dropout(0.5))

                self.features.add(nn.Dense(4096))
                self.features.add(Activation('relu'))
                self.features.add(nn.Dropout(0.5))

            self.output = nn.Dense(classes)

    def hybrid_forward(self, F, x):
        x = self.features(x)
        x = self.output(x)
        return x

# Constructor
def alexnet(pretrained=False, ctx=mx.cpu(),
            root=os.path.join('~', '.mxnet', 'models'), **kwargs):
    r"""AlexNet model from the `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.

    Parameters
    ----------
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.
    """
    net = AlexNet(**kwargs)
    if pretrained:
        net.load_params(model_store.get_model_file('alexnet', root=root), ctx=ctx)
    return net

def preprocess(data):
    data = mx.image.imresize(data, 256, 256)
    data, _ = mx.image.center_crop(data, (224, 224))
    data = data.astype(np.float32)
    data = data/255
    data = mx.image.color_normalize(data,
                                    mean=mx.nd.array([0.485, 0.456, 0.406]),
                                    std=mx.nd.array([0.229, 0.224, 0.225]))
    data = mx.nd.transpose(data, (2,0,1))
    return data

In [4]:
alexnet = alexnet(pretrained=True)

In [5]:
with open("img/snake.jpg", 'rb') as fp:
    str_image = fp.read()

image = mx.img.imdecode(str_image)
image = preprocess(image)
image = image.expand_dims(axis=0)

In [8]:
imagegrad = get_image_grad(alexnet, image)

In [9]:
imagegrad

array([[[[ -1.16196729e-03,   1.92891457e-03,   1.86992041e-03, ...,
           -4.55597874e-05,  -2.22427989e-04,  -7.47223210e-04],
         [  1.51476590e-03,   2.29049544e-03,   7.74772663e-04, ...,
            1.82397489e-04,  -3.18078150e-04,  -8.76112841e-04],
         [  9.25434520e-04,   2.49735382e-03,   2.41334271e-03, ...,
            9.01308726e-04,  -7.59505609e-04,   7.02283927e-04],
         ..., 
         [ -1.17615913e-03,  -1.70769379e-03,   2.67222669e-04, ...,
            1.45868114e-06,   3.76999989e-04,   1.27749983e-03],
         [  3.22130014e-04,  -1.53261318e-03,   1.28779351e-03, ...,
            8.79723346e-04,   1.82383496e-03,   8.68213596e-04],
         [  1.32486259e-03,   2.21321615e-03,   2.85062939e-04, ...,
            8.73604382e-04,   1.15247536e-03,   2.49702716e-04]],

        [[ -2.05393368e-03,   1.11348683e-03,   2.45549483e-04, ...,
            1.57304297e-04,   3.35914840e-04,   4.46605081e-05],
         [ -6.94551818e-06,  -2.72795267e-04,

In [16]:
conv_out, conv_out_grad = get_conv_out_grad(alexnet, image, conv_layer_name='alexnet0_conv2d4')

In [17]:
conv_out_grad

array([[[[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.00529759,
           0.        ,  0.        ],
         ..., 
         [ 0.00593547,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ]],

        [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         ..., 
         [ 0.        ,  0.      