In [1]:
import shap


In [7]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K


# Use TensorFlow Backend
import tensorflow as tf
#tf.set_random_seed(42) # For reproducibility

#config = tf.ConfigProto()
#config.gpu_options.visible_device_list = "0"
#config.gpu_options.allow_growth = True
#config.gpu_options.per_process_gpu_memory_fraction = 0.5
#tf.Session(config=config)

# Print out Keras version
print(keras.__version__)

2.3.1


In [9]:
# Including MLflow
import mlflow
import mlflow.keras
import os
print("MLflow Version: %s" % mlflow.__version__)

MLflow Version: 1.10.0


In [11]:
# -----------------------------------------------------------
# Hyperparameters
batch_size = 128
num_classes = 10
epochs = 12


# -----------------------------------------------------------
# Image Datasets

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [12]:
y_train[25168,:]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], dtype=float32)

In [13]:
from __future__ import print_function

# This is the extracted array for x_train = 25168 from the training matrix
xt_25168 = x_train[25168,:]

print(xt_25168)

[[[0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]]

 [[0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]]

 [[0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0. 

In [18]:
# As this is a 28 x 28 image, let's print it out this way
txt = ""
for i in range (0, 27):
    for j in range(0, 27):
        val = "%.3f" % xt_25168[i,j]
        txt += str(val).replace("[", "").replace("]", "") + ", "
   
    print(txt)
    txt = ""

0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0

In [20]:
def runCNN(activation, verbose):
  # Building up our CNN
  model = Sequential()
  
  # Convolution Layer
  model.add(Conv2D(32, kernel_size=(3, 3),
                 activation=activation,
                 input_shape=input_shape)) 
  
  # Convolution layer
  model.add(Conv2D(64, (3, 3), activation=activation))
  
  # Pooling with stride (2, 2)
  model.add(MaxPooling2D(pool_size=(2, 2)))
  
  # Delete neuron randomly while training (remain 75%)
  #   Regularization technique to avoid overfitting
  model.add(Dropout(0.25))
  
  # Flatten layer 
  model.add(Flatten())
  
  # Fully connected Layer
  model.add(Dense(128, activation=activation))
  
  # Delete neuron randomly while training (remain 50%) 
  #   Regularization technique to avoid overfitting
  model.add(Dropout(0.5))
  
  # Apply Softmax
  model.add(Dense(num_classes, activation='softmax'))

  # Log MLflow
  #with mlflow.start_run(experiment_id = mlflow_experiment_id) as run:
  with mlflow.start_run() as run:
  
    # Loss function (crossentropy) and Optimizer (Adadelta)
    model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

    # Fit our model
    model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=verbose,
          validation_data=(x_test, y_test))

    # Evaluate our model
    score = model.evaluate(x_test, y_test, verbose=0)

    # Log Parameters
    mlflow.log_param("activation function", activation)
    mlflow.log_metric("test loss", score[0])
    mlflow.log_metric("test accuracy", score[1])
    
    # Log Model
    mlflow.keras.log_model(model, "model")
    
  # Return
  return score

In [22]:
score_sigmoid = runCNN('sigmoid', 0)
print('Test loss:', score_sigmoid[0])
print('Test accuracy:', score_sigmoid[1])


  if isinstance(loss, collections.Mapping):


Test loss: 0.10485499833747744
Test accuracy: 0.9700999855995178


In [23]:
score_tanh = runCNN('tanh', 0)
print('Test loss:', score_tanh[0])
print('Test accuracy:', score_tanh[1])

Test loss: 0.03565137563325989
Test accuracy: 0.989300012588501


In [24]:
# Building up our CNN
model = Sequential()

# Convolution Layer
model.add(Conv2D(32, kernel_size=(3, 3),
               activation='relu',
               input_shape=input_shape)) 

# Convolution layer
model.add(Conv2D(64, (3, 3), activation='relu'))

# Pooling with stride (2, 2)
model.add(MaxPooling2D(pool_size=(2, 2)))

# Delete neuron randomly while training (remain 75%)
#   Regularization technique to avoid overfitting
model.add(Dropout(0.25))

# Flatten layer 
model.add(Flatten())

# Fully connected Layer
model.add(Dense(128, activation='relu'))

# Delete neuron randomly while training (remain 50%) 
#   Regularization technique to avoid overfitting
model.add(Dropout(0.5))

# Apply Softmax
model.add(Dense(num_classes, activation='softmax'))

# Log MLflow
#with mlflow.start_run(experiment_id = mlflow_experiment_id) as run:
with mlflow.start_run() as run:

  # Loss function (crossentropy) and Optimizer (Adadelta)
  model.compile(loss=keras.losses.categorical_crossentropy,
            optimizer=keras.optimizers.Adadelta(),
            metrics=['accuracy'])

  # Fit our model
  model.fit(x_train, y_train,
        batch_size=batch_size,
        epochs=epochs,
        verbose=1,
        validation_data=(x_test, y_test))

  # Evaluate our model
  score = model.evaluate(x_test, y_test, verbose=0)

  # Log Parameters
  mlflow.log_param("activation function", 'relu')
  mlflow.log_metric("test loss", score[0])
  mlflow.log_metric("test accuracy", score[1])

  # Log Model
  mlflow.keras.log_model(model, "model")

Train on 60000 samples, validate on 10000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [25]:
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.024468238703090173
Test accuracy: 0.9922000169754028


In [None]:
#import shap
import numpy as np

# select a set of background examples to take an expectation over
background = x_train[np.random.choice(x_train.shape[0], 100, replace=False)]

# explain predictions of the model on three images
e = shap.DeepExplainer(model, background)
# ...or pass tensors directly
# e = shap.DeepExplainer((model.layers[0].input, model.layers[-1].output), background)
shap_values = e.shap_values(x_test[1:10])

In [None]:
# plot the feature attributions
shap_plot = shap.image_plot(shap_values, -x_test[1:5])
display(shap_plot)


In [None]:
# plot the feature attributions
shap_plot = shap.image_plot(shap_values, -x_test[1:10])
display(shap_plot)


In [None]:
# plot the feature attributions
shap_plot = shap.image_plot(shap_values, -x_test[11:20])
display(shap_plot)


# https://databricks.com/wp-content/uploads/2019/10/Introduction-to-Neural-Networks-MLflow-and-SHAP.html