# Implementation of the gradient class activation map: VGG16 as an example
ref. https://github.com/jacobgil/keras-grad-cam

In [1]:
import cv2
import sys
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
import keras
import keras.backend as K
#from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras.applications.vgg16 import VGG16, decode_predictions
from keras.preprocessing import image
from keras.layers.core import Lambda
from keras.models import Model

Using TensorFlow backend.


# Data preprocess

In [2]:
from tensorflow.keras.applications.vgg16 import preprocess_input
def load_image(path):
    img_path = path
    img = image.load_img(img_path, target_size=(224,224,3))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    # Normalize images with values ranging from 0 to 1 (or from -1 to +1 or mean subtraction), 
    # depending on the model called 
    x = preprocess_input(x)
    return x

In [3]:
test_path = './test/test.jpg'
preprocessed_input = load_image(test_path)
print(preprocessed_input.shape)

(1, 224, 224, 3)


In [4]:
# test only
#preprocessed_input = preprocessed_input[0,:]
#preprocessed_input.shape

In [5]:
# test only
preprocessed_input[20:30, 20:30, 0]

array([], shape=(0, 10, 3), dtype=float32)

# Model prediction

In [6]:
model = VGG16(weights='imagenet')
predictions = model.predict(preprocessed_input)
top_1 = decode_predictions(predictions)[0][0]
# top_1 structure looks like [[(class, class description, score),...,(a5,b5,c5)]]

predicted_class = np.argmax(predictions)
print('Predicted class:', predicted_class)
print('%s (%s) with probability %.2f' % (top_1[1], top_1[0], top_1[2]))
print(np.shape(predictions))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5
  6045696/553467096 [..............................] - ETA: 1:26:37

KeyboardInterrupt: 

# Grad-CAM

In [None]:
def target_category_loss(x, category_index, nb_classes):
    return tf.multiply(x, K.one_hot([category_index], nb_classes))

def target_category_loss_output_shape(input_shape):
    return input_shape

def normalize(x):
    # utility function to normalize a tensor by its L2 norm
    #return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)
    return x / (K.sqrt(K.sum(K.square(x))) + 1e-5)

In [None]:
# test only

with tf.Session() as sess:
    x = tf.constant([[1., 2.], [1., 2.]])
    ans = normalize(x)
    print(sess.run(ans))


In [None]:
def grad_cam(input_model, image, category_index, layer_name):
    # cam, heatmap = grad_cam(model, preprocessed_input, predicted_class, "block5_conv3")
    nb_classes = 1000
    target_layer = lambda x: target_category_loss(x, category_index, nb_classes)
    # Add a self-defined layer by using Lambda
    x = Lambda(target_layer, output_shape = target_category_loss_output_shape)(input_model.output)
    model = Model(inputs=input_model.input, outputs=x)
    model.summary()

    loss = K.sum(model.output)
    conv_output =  [l for l in model.layers if l.name is layer_name][0].output
    print('inside',[l for l in model.layers if l.name is layer_name][0])
    print('out',conv_output)
    
    # Compute dy/dA 
    gradient_function = K.function([model.input], [conv_output, normalize(K.gradients(loss, [conv_output])[0])])
    
    output, grads_val = gradient_function([image]) # Output is in numpy format
    output, grads_val = output[0, :], grads_val[0, :, :, :]
    
    weights = np.mean(grads_val, axis = (0, 1))
    cam = np.ones(output.shape[0 : 2], dtype = np.float32)
    # ith feature map, weight w
    for i, w in enumerate(weights):
        cam += w * output[:, :, i] # \sum w*A

    cam = cv2.resize(cam, (224, 224))
    cam = np.maximum(cam, 0)
    heatmap = cam / np.max(cam)

    #Return to BGR [0-255] from the preprocessed image
    image = image[0, :]
    image -= np.min(image)
    image = np.minimum(image, 255)

    cam = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET)
    cam = np.float32(cam) + np.float32(image)
    cam = 255 * cam / np.max(cam)
    return np.uint8(cam), heatmap

In [None]:
def register_gradient():
    if "GuidedBackProp" not in ops._gradient_registry._registry:
        @ops.RegisterGradient("GuidedBackProp")
        def _GuidedBackProp(op, grad):
            dtype = op.inputs[0].dtype
            return grad * tf.cast(grad > 0., dtype) * tf.cast(op.inputs[0] > 0., dtype)

In [None]:
def modify_backprop(model, name):
    g = tf.get_default_graph()
    with g.gradient_override_map({'Relu': name}):

        # get layers that have an activation
        layer_dict = [layer for layer in model.layers[1:] if hasattr(layer, 'activation')]
        # replace relu activation
        for layer in layer_dict:
            if layer.activation == keras.activations.relu:
                layer.activation = tf.nn.relu

        # re-instanciate a new model
        new_model = VGG16(weights='imagenet')
    return new_model

In [None]:
def compile_saliency_function(model, activation_layer='block5_conv3'):
    input_img = model.input
    layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
    layer_output = layer_dict[activation_layer].output
    max_output = K.max(layer_output, axis=3)
    saliency = K.gradients(K.sum(max_output), input_img)[0]        # saliency.shape = input_img.shape
    return K.function([input_img, K.learning_phase()], [saliency]) #test: K.learning_phase()=0; train: 1 

In [None]:
def deprocess_image(x):
    '''
    Same normalization as in:
    https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py
    '''
    if np.ndim(x) > 3:
        x = np.squeeze(x)
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    if K.common.image_dim_ordering() == 'th':
        x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x

# Main

In [None]:
# Creat grad_cam
gcam, heatmap = grad_cam(model, preprocessed_input, predicted_class, "block5_conv3")
cv2.imwrite("gradcam.jpg", gcam)

# Creat guided_grad_cam
register_gradient()
guided_model = modify_backprop(model, 'GuidedBackProp')
saliency_fn = compile_saliency_function(guided_model)
saliency = saliency_fn([preprocessed_input, 0])      # saliency.shape = (1, input_img.shape)
gradcam = saliency[0] * heatmap[..., np.newaxis]     # heatmap.shape expands to (height, width, 1)
cv2.imwrite("guided_gradcam.jpg", deprocess_image(gradcam))

In [None]:
# test only: broadcasting properties
mA = np.array([[[1.,2.],[3.,4.]]]) # (1,2,2)
print('mA.shape=', mA.shape)
mB = np.array([[1.,2.],[3.,4.]])   # (2,2)
print('mB.shape=', mB.shape)
mC = mA * mB
print('mC.shape=', mC.shape)
print('mC =', mC)
mA2 = np.array([[[[1.,1.1,1.2],[2.,2.1,2.2]],[[3.,3.1,3.2],[4.,4.1,4.2]]]]) # (1,2,2,3)
print('mA2.shape=', mA2.shape)
mB2 = mB[..., np.newaxis]
print('mB2.shape=', mB2.shape)
mC2 = mA2 * mB2
print('mC2.shape=', mC2.shape)
print('mC2 =', mC2)