# Analyze the performance of the classification model for 30 tree species based on LeafSnap data subset - heatmaps

### Settings

In [1]:
#!pip install keras-vis

In [2]:
import vis

In [3]:
# Imports
import warnings
warnings.simplefilter('ignore')

from matplotlib import pyplot as plt
from vis.utils import utils
from vis.visualization import visualize_cam
import seaborn as sns
import cv2


import os.path
import pandas as pd
import numpy as np

from keras.models import load_model
from keras import activations

Using TensorFlow backend.


### Load the relevant data 

In [4]:
original_data_path = "/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/"
#dataset_info_file = os.path.join(original_data_path, "leafsnap-dataset-30subset-images.txt")
dataset_info_file_enh = os.path.join(original_data_path, "leafsnap-dataset-30subset-images-enhanced.txt")

img_info = pd.read_csv(dataset_info_file_enh)
img_info.head()

Unnamed: 0,file_id,image_path,species,source,filename,labels_integer
0,55821,dataset/images/lab/Auto_cropped/acer_campestre...,Acer campestre,lab,ny1079-01-1.jpg,0
1,55822,dataset/images/lab/Auto_cropped/acer_campestre...,Acer campestre,lab,ny1079-01-2.jpg,0
2,55823,dataset/images/lab/Auto_cropped/acer_campestre...,Acer campestre,lab,ny1079-01-3.jpg,0
3,55824,dataset/images/lab/Auto_cropped/acer_campestre...,Acer campestre,lab,ny1079-01-4.jpg,0
4,55825,dataset/images/lab/Auto_cropped/acer_campestre...,Acer campestre,lab,ny1079-02-1.jpg,0


### Load the model

In [5]:
best_model_file = model_fname = os.path.join(original_data_path, 'Models','leafnet.h5')

print('Loading the best model...')
model = load_model(best_model_file)
print('Best Model loaded!')


Loading the best model...
Best Model loaded!


## Visualize interesting regions

### Gradient weighted class activation maps

We predict the class of one image with our network. From the output, we select the column corresponding to the class that the highest probability was assigned to. Next, we get the output from the final convolutional layer (the final convolutional feature map, eg (14,14,512)). We then compute the gradient of our selected class’s output with respect to the feature map (In how far does changing the values in the feature map change the output for our selected class?).

From these gradients, we select the maximum gradient per channel of the feature map (so the maximum gradient for each filter (512,) --> like a global max pooling layer: collapse each filter (each channel) so each feature that is detected (by the corresponding filter) in one node ((1,1,512) or (512,)) and multiply the first two dimensions oft he feature map with it (so multiply each of the 14x14 values per each of the 512 channels with the one maximum gradient value for the corresponding channel). From this, we obtain the weighted feature map (14, 14, 512), where each of the 14x14 values inside each channel is weighted by the maximum gradient of this channel.

We then average across the channel axis into a (14, 14) heatmap (so we have one average value for each of the first two dimensions of the weighted feature map across all channels). The higher a value in this heatmap, the more does the output for our selected class depend on it.

Then, we set all negative entries in the heatmap to zero and normalize all values by dividing them by the maximum value inside the heatmap, so that now values range from 0 to 1. Next, we resize the heatmap to the size of the image we generated the heatmap for (the input image we predicted a class for). Then, we multiply each of the heatmap’s entries by 255, to obtain a grayscale image. This is then pseudocolored. Finally, the heatmap is imposed on the original image.

From this, we can see, which parts of the image the model mainly used to come up with a classification, as
we highlight the feature map regions that cause the most change in the output.


In [6]:
def generate_heatmap(img_array, model, layer_output, layer_conv, img):
    '''
    Computes a gradient weighted class activation map for an image by weighing the final convolutional 
    feature map with the gradient of the predicted class with respect to the channel and then averaging 
    along the channel (filter) axis to visualize which image regions are important for classifying the
    image.
    
    Input:
    - img_array: normalized rgb array of an image with shape (w,h,c) as expected by the model (assuming
      the amount of training examples is given on the first axis for the model input)
    - model: model to base the class activation map on
    - layer_ouput: name of the model's softmax output layer (str)
    - layer_conv: name of the final convolutional layer (this feature map is used) (str)
    - img: filename (with path) of the image corresponding to img_array, of any size
    
    Returns:
    - img with the class activation map superimposed on it 
    '''
    
    #expand img_array to fit into model: (1,w,h,c)
    x = np.expand_dims(img_array, axis=0)
    #predict the corresponding class
    pred_class = np.argmax(model.predict(x))
    layer_output_name = layer_output
    layer_conv_name = layer_conv
    #get indices of the relevant layers    
    output_layer_idx = [idx for idx, layer in enumerate(model.layers) if layer.name == layer_output_name][0]
    conv_layer_idx = [idx for idx, layer in enumerate(model.layers) if layer.name == layer_conv_name][0]
    #change softmax layer to linear activation layer to obtain better results
    model.layers[output_layer_idx].activation = activations.linear
    #apply changes (to new model instance, original model stays untouched)
    model_maps = utils.apply_modifications(model)
    #generate class activation map
    heatmap = visualize_cam(model=model_maps, layer_idx=output_layer_idx, filter_indices=[pred_class], seed_input=x, penultimate_layer_idx=conv_layer_idx)
    #read image the map is to be superimposed on
    img = cv2.imread(img)
    #resize map to the size of img
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    #superimpose
    superimposed_img = cv2.addWeighted(img, 0.6, heatmap, 0.4, 0)
    
    return superimposed_img

Data frame containing one lab picture per class.

In [7]:
img_info_lab = img_info[img_info["source"] == "lab"]

k = 0
for index, row in img_info_lab.iterrows():
    if k == 0:
        if row["labels_integer"] == k:
            example_per_label_lab = row
            example_per_label_lab = pd.DataFrame([example_per_label_lab])
            k += 1
    elif k == 30:
        break
    else:
        if row["labels_integer"] == k:
            example_per_label_lab = example_per_label_lab.append(row)
            k +=1

Generate heatmap for each lab example (one per class).

In [8]:
output_path = os.path.join(original_data_path, "heatmaps/lab/")
col_index_path = example_per_label_lab.columns.get_loc("image_path")
col_index_filename = example_per_label_lab.columns.get_loc("filename")
col_index_label = example_per_label_lab.columns.get_loc("species")

for i in range(len(example_per_label_lab)):
  path = os.path.join(original_data_path, example_per_label_lab.iloc[i, col_index_path])
  #print(path)
  img = cv2.imread(path)
  #print(np.shape(img))
  img = cv2.resize(img, (64, 64))
  img = img/255
  heatmap = generate_heatmap(img, model, "dense_layer2", "maxpooling2d_layer2", path)
  filename = os.path.join(output_path, example_per_label_lab.iloc[i, col_index_label] + "_heatmap_" +  example_per_label_lab.iloc[i, col_index_filename])
  print(filename)    
  cv2.imwrite(filename, heatmap)

/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/lab/Acer campestre_heatmap_ny1079-01-1.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/lab/Acer platanoides_heatmap_wb1565-01-1.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/lab/Aesculus hippocastamon_heatmap_ny1016-01-1.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/lab/Betula lenta_heatmap_wb1193-01-1.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/lab/Betula nigra_heatmap_wb1037-01-1.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/lab/Celtis occidentalis_heatmap_pi0060-01-1.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/lab/Diospyros virginiana_heatmap_pi0196-01-1.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/lab/Magnolia acuminata_heatmap_wb1220-01-1.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatm

Data frame containing one field picture per class.

In [9]:
#print(output_path)

In [10]:
img_info_field = img_info[img_info["source"] == "field"]

k = 0
for index, row in img_info_field.iterrows():
    if k == 0:
        if row["labels_integer"] == k:
            example_per_label_field = row
            example_per_label_field = pd.DataFrame([example_per_label_field])
            k += 1
    elif k == 30:
        break
    else:
        if row["labels_integer"] == k:
            example_per_label_field = example_per_label_field.append(row)
            k +=1

In [None]:
output_path = os.path.join(original_data_path,"heatmaps/field/")
col_index_path = example_per_label_field.columns.get_loc("image_path")
col_index_filename = example_per_label_field.columns.get_loc("filename")
col_index_label = example_per_label_field.columns.get_loc("species")

for i in range(len(example_per_label_field)):
  path = os.path.join(original_data_path,example_per_label_field.iloc[i, col_index_path])
  img = cv2.imread(path)
  img = cv2.resize(img, (64, 64))
  img = img/255
  heatmap = generate_heatmap(img, model, "dense_layer2", "maxpooling2d_layer2", path)
  filename = os.path.join(output_path, example_per_label_field.iloc[i, col_index_label] + "_heatmap_" +  example_per_label_field.iloc[i, col_index_filename])
  print(filename)        
  cv2.imwrite(filename, heatmap)

/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/field/Acer campestre_heatmap_13291732971024.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/field/Acer platanoides_heatmap_1249060544_0000.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/field/Aesculus hippocastamon_heatmap_1248106230_0000.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/field/Betula lenta_heatmap_1248106459_0000.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/field/Betula nigra_heatmap_1258487222_0000.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/field/Celtis occidentalis_heatmap_1248106179_0000.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/field/Diospyros virginiana_heatmap_12991999785260.jpg
/home/elena/eStep/XAI/Data/LeafSnap/leafsnap-dataset-30subset/heatmaps/field/Magnolia acuminata_heatmap_13001959993766.jpg
/home/elena/eStep/XAI/