<a href="https://colab.research.google.com/github/IverMartinsen/MastersThesis/blob/main/Notebooks/greenland_halibut_pixel_relevance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###This notebook shows feature relevance attributions methods applied on the Greenland halibut model.
-----------------------------------------------------------------------------------------------------

Clone repository to gain access to modules.

In [None]:
!git clone https://github.com/IverMartinsen/MastersThesis.git

Import modules and load images and variables.

In [None]:
import sys

sys.path.append('/content/MastersThesis/Python')

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import pandas as pd
from modules.image import normalize
from modules.analysis.guided_backpropagation import build_gb_model_nonseq
from modules.analysis.integrated_gradients import generate_path_inputs, integral_approximation

# Load pretrained model
model = tf.keras.models.load_model('/content/drive/Othercomputers/Min bærbare datamaskin/UiT/Forberedende forsøk/testrun8_exception')

# Load dataframe
df = pd.read_csv('/content/drive/Othercomputers/Min bærbare datamaskin/UiT/Data/Grønlandskveiteotolitter/dataframe.csv')

# Idxs for complete data
idx = np.where(np.isnan(df['length']) == False)[0]

# Drop NaNs from dataframe
df = df.dropna()

# Load images
images = np.load('/content/drive/MyDrive/images256.npy')[idx]

Define functions for computing integrated gradients.

In [None]:
def compute_gradients(inputs):
    with tf.GradientTape() as tape:
        tape.watch(inputs)
        outputs = model(inputs)

    return tape.gradient(outputs, inputs)

@tf.function
def integrated_gradients(baseline,
                         image,
                         sex, 
                         m_steps=50):

    # Generate sequence of alphas.
    alphas = tf.linspace(start=0.0, stop=1.0, num=m_steps+1)

    # Generate interpolated images between baseline and input image.
    interpolated_path_input_batch = generate_path_inputs(
        baseline=baseline,
        image=image,
        alphas=alphas)
    
    # Compute gradients for model output wrt batch of interpolated images. 
    gradient_batch = compute_gradients((tf.constant(sex, shape=(m_steps+1, )), interpolated_path_input_batch))[1]

    # Integral approximation through averaging gradients.
    avg_gradients = integral_approximation(gradients=gradient_batch)
    
    # Scale integrated gradients with respect to input.
    integrated_gradients = tf.cast(
        image - baseline, avg_gradients.dtype) * avg_gradients

    return tf.reduce_sum(integrated_gradients, axis=2)


Define model for computing feature relevance using guided backpropagation.

In [None]:
# Create layer for mapping categorical labels to int
Index = tf.keras.layers.StringLookup()
# Fit index layer on training data
Index.adapt(tf.constant(df['sex']))

# Create layer for one-hot-encoding the categorical labels
Encoding = tf.keras.layers.CategoryEncoding(num_tokens=Index.vocabulary_size(), output_mode='one_hot')

# Define pretrained base model without classification head. Use global average pooling on output.
base_model = tf.keras.applications.xception.Xception(input_shape=model.input_shape[1][1:], include_top=False, pooling='avg')
gb_base_model = build_gb_model_nonseq(base_model, tf.keras.layers.Activation)

# Define full model. Note that by setting training=False in the base model
# we always run the model in inference mode. 
img_input = tf.keras.layers.Input(model.input_shape[1][1:])
cat_input = tf.keras.Input(shape=(1,), name='gender', dtype='string')

gender = Encoding(Index(cat_input))

# First we process the images
x = tf.keras.applications.xception.preprocess_input(img_input)
x = tf.keras.layers.RandomTranslation(0, 0.1)(x)
x = tf.keras.layers.RandomRotation(0.1, fill_mode='constant')(x)
x = gb_base_model(x, training=False)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(3, 'relu')(x)
# Then we us multiplication to get the gender conditional age predictions 
outputs = tf.keras.layers.Dot(axes=1)([x, gender])
# Finally we concatenate the age prediction with the one-hot sex matrix
gb_model = tf.keras.models.Model([cat_input, img_input], outputs)

# Transfer weights from pretrained model to gb model
gb_model.set_weights(model.get_weights())

Define function for obtaining pixel relevance attributions by guided backpropagation.

In [None]:
def compute_gb_gradients(inputs):
    with tf.GradientTape() as tape:
        tape.watch(inputs)
        outputs = gb_model(inputs)

    return tape.gradient(outputs, inputs)

Choose image to use for analysis and save indices with age and filename

In [None]:
# Idxs of images for ages 1, 2, ..., 26
idxs = np.array(
    [855, 121, 46, 62, 38, 56, 24, 7, 13, 3, 14, 1, 0, 36, 765, 753, 738, 906, 544, 563, 496, 921, 1242, 969, 916, 970]
    )

# Create input tuple for making predictions
inputs = (tf.convert_to_tensor(df['sex'].iloc[idxs]), tf.constant(images[idxs]))

# Age predicted by the model
preds = model.predict(inputs).round().astype(int).flatten()

# Save referances in a csv-file
pd.DataFrame({'filename': df.iloc[idxs]['filename'],
              'age': df.iloc[idxs]['age'],
              'predicted_age': preds,
              'index': idxs}).to_csv('file_selection.csv')

Compute pixel relevance for selection using 5 different methods

In [None]:
# Create input tuple of relevant data
inputs = (tf.convert_to_tensor(df['sex'].iloc[idxs]), tf.constant(images[idxs]))

# Create baseline images
baseline = tf.constant(0, shape=inputs[1].shape[1:], dtype=tf.float32)

# Relevance by gradients
vanilla_gradients = np.sum(compute_gradients(inputs)[1], axis=3)

# Relevance by baseline gradients
baseline_gradients = np.sum(inputs[1], axis=3) * vanilla_gradients

# Relevance by guided backpropagation
gb_gradients = np.sum(compute_gb_gradients(inputs)[1], axis=3)

# Relevance by integrated gradients
int_gradients = np.zeros(shape=inputs[1].shape[:-1])
for i in range(len(idxs)):
    int_gradients[i] = integrated_gradients(baseline, inputs[1][i], inputs[0][i].numpy())

# Relevance by guided integrated gradients
int_gb_gradients = np.zeros(shape=inputs[1].shape[:-1])
for i in range(len(idxs)):
    int_gb_gradients[i] = integrated_gradients(baseline, inputs[1][i], inputs[0][i].numpy())

Plot selection along with pixel relevance

In [None]:
# Draw images in a 4-by-7 grid
fig, axes = plt.subplots(4, 7, facecolor='black')

for i, ax in enumerate(axes.flatten()):
    try:
        # Draw integrated gradient image
        ax.imshow(normalize(np.abs(int_gradients[i])), plt.cm.hot)
        # Superimpose original image
        ax.imshow(images[idxs[i]]/255., alpha=0.2)
        # Add text with age
        ax.text(0, 0, f'{str(i+1)} ({preds[i]})', color = 'white', size=4)
    except IndexError:
        pass
    ax.axis('off')

# Save figure
plt.savefig(
    'selection.jpg', 
    dpi=300, 
    bbox_inches="tight", 
    facecolor='black')

Draw comparison using a small subset of images

In [None]:
# Collect image arrays in tuple
methods = (inputs[1]/255., vanilla_gradients, baseline_gradients, gb_gradients, int_gradients, int_gb_gradients)

# Define column titles
titles = ('Original\n\n', 'Gradients\n\n', 'Baseline\nGradients\n', 'Guided\nBackpropagation\n', 'Integrated\nGradients\n', 'Integrated\nGuided\nGradients')

# Choose colormap
cmap = plt.cm.hot

# Subset of images (age - 1)
selection = [1, 6, 15, 24]

# Draw images in a 4-by-6 grid
fig, axes = plt.subplots(4, 6, facecolor='black')

for i in range(4):
    for j in range(6):
        axes[i][j].imshow(normalize(np.abs(methods[j][selection[i]])), cmap)
        axes[i][j].axis('off')
        if i == 0:
            axes[i][j].set_title(titles[j], color='white', size=5)
        if j == 0:
            axes[i][j].text(1, 1, str(selection[i] + 1), color='white', size = 5)

plt.subplots_adjust(wspace=-0.3, hspace=0.1)

plt.savefig(
    'comparison.jpg', 
    dpi=300,
    facecolor='black',
    bbox_inches="tight")