In [None]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

In [None]:
# import "Skater" related functions
%matplotlib inline
from skater.util.image_ops import load_image, show_image, normalize, add_noise, flip_pixels, image_transformation
from skater.util.image_ops import in_between, greater_than, greater_than_or_equal, equal_to
from skater.core.local_interpretation.dnni.deep_interpreter import DeepInterpreter
from skater.core.visualizer.image_relevance_visualizer import visualize

In [None]:
current_level = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)
mnist = input_data.read_data_sets("/tmp/", one_hot=True)
tf.logging.set_verbosity(current_level)

In [None]:
sess = tf.Session()

In [None]:
# Parameters
learning_rate = 0.005
num_steps = 2000
batch_size = 128

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input as tensors
X = tf.placeholder("float", [None, num_input] , name="input")
Y = tf.placeholder("float", [None, num_classes], name="output")

# weights and biases for each Layer
weights = {
    'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1], mean=0.0, stddev=0.05)),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], mean=0.0, stddev=0.05)),
    'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes], mean=0.0, stddev=0.05))
}
biases = {
    'b1': tf.Variable(tf.zeros([n_hidden_1])),
    'b2': tf.Variable(tf.zeros([n_hidden_2])),
    'out': tf.Variable(tf.zeros([num_classes]))
}

In [None]:
def model(x, act=tf.nn.relu): 
    layer_1 = act(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
    layer_2 = act(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
    out_layer = tf.add(tf.matmul(layer_2, weights['out']), biases['out'], name="absolute_output")
    return out_layer

# Construct model
logits = model(X)

In [None]:
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

In [None]:
correct_predictions = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

In [None]:
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
sess.run(init)

for step in range(1, num_steps+1):
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    # Run optimization op (backprop)
    sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
    if step % 100 == 0 or step == 1:
        # Calculate batch loss and accuracy
        loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x, Y: batch_y})
        print("Step {} Minibatch Loss= {:.4f} Training Accuracy= {:.3f}".format(step, loss, acc))

print("success")

In [None]:
# Calculate accuracy for MNIST test images
test_x = mnist.test.images
test_y = mnist.test.labels

print("Test accuracy:", sess.run(accuracy, feed_dict={X: test_x, Y: test_y}))

In [None]:
saver = tf.train.Saver()

explain_model = './explanations/models/simple_mnist-model'

import os
os.makedirs(explain_model, exist_ok=True)

saver.save(sess, explain_model, global_step=num_steps)

In [None]:
test_idx = 189
input_x_i = test_x[[test_idx]]
input_y_i = test_y[test_idx].reshape(1, 10)
with DeepInterpreter(session=sess) as di:
    # 1. Restore the persisted model
    # 2. Retrieve the input tensor from the restored model
    saver = tf.train.import_meta_graph('./explanations/models/simple_mnist-model-2000.meta')
    saver.restore(sess, tf.train.latest_checkpoint('./explanations/models/'))

    saver = tf.train.import_meta_graph('./explanations/models/simple_mnist-model-2000.meta')
    saver.restore(sess, tf.train.latest_checkpoint('./explanations/models/'))
    graph = tf.get_default_graph()
    X = graph.get_tensor_by_name("input:0")
    Y = graph.get_tensor_by_name("output:0")
    target_tensor = model(X)
    y_class = tf.argmax(target_tensor, 1)

    xs = input_x_i
    ys = input_y_i
    print("X shape: {}".format(xs.shape))
    print("Y shape: {}".format(ys.shape))
    
    # Predictions
    eval_dict = {X: xs, Y: ys}
    predicted_class = sess.run(y_class, feed_dict=eval_dict)
    print("Predicted Class: {}".format(predicted_class))
    #relevance_scores = di.explain('elrp', target_tensor * ys, X, xs, use_case='image')
    relevance_scores = {
       'elrp': di.explain('elrp', target_tensor * ys, X, xs, use_case='image'),
        'integrated gradient': di.explain('ig', target_tensor * ys, X, xs, use_case='image'),
    }

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
input_x = [input_x_i.reshape(28, 28)]
input_y = input_y_i

n_cols = int(len(relevance_scores)) + 1 # +1 to add a column for the original image
n_rows = len(input_x) 
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(6*n_cols, 6*n_rows))

# set the properties for text
font = {'family': 'avenir',
        'color':  'white',
        'weight': 'normal',
        'size': 14,
        }

fig.patch.set_facecolor('black')
for index, xi in enumerate(input_x):
    ax = axes.flatten()[index*n_cols]
    visualize(xi, cmap='gray', axis=axes[index], 
              alpha_edges=1.0, alpha_bgcolor=1).set_title('Original Image: {}'.format(input_y[index]), fontdict=font)
    for j, r_type in enumerate(relevance_scores):
        axj = axes.flatten()[index*n_cols+j+1]
        # Remember to reshape the relevance_score matrix as a 2-D array
        # Red: highlights positive relevance
        # Blue: highlights negative relevance
        visualize(relevance_scores[r_type][index].reshape(28, 28), original_input_img=xi, axis=axj, 
                  percentile=99,  alpha_edges=1.0, 
                  alpha_bgcolor=0.75).set_title('Relevance Type: "{}"'.format(r_type), fontdict=font)