In [None]:
import tensorflow as tf
import seaborn as sns
import numpy as np
import matplotlib

from IPython.display import Image, display
from typing import Union

%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (20, 8)

In [None]:
model_path = "ResnetV2_50.pb" # download from https://github.com/tensorflow/models/tree/master/research/slim#Pretrained
class_id = 988 # goldfish (classes are 1-indexed)

In [None]:
# Load the graph from the .pb file
with tf.io.gfile.GFile(model_path, 'rb') as f:
    graph_def = tf.compat.v1.GraphDef()
    graph_def.ParseFromString(f.read())

In [None]:
with tf.compat.v1.Session() as sess:
    # Set the graph as the default graph
    tf.compat.v1.import_graph_def(graph_def, name='')
    graph = sess.graph

In [None]:
def get_tensor(tensor_name: str) -> np.ndarray:
    with tf.compat.v1.Session() as sess:
        # Set the graph as the default graph
        tf.compat.v1.import_graph_def(graph_def, name='')

        # Get input and output tensors
        input_tensor = sess.graph.get_tensor_by_name("input:0")
        output_tensor = sess.graph.get_tensor_by_name(tensor_name)

        # Perform inference
        input_data = np.random.randn(1, 224, 224, 3)
        return sess.run(output_tensor, feed_dict={input_tensor: input_data})

In [None]:
def get_images(tensor_name: str, feature_id: int) -> Image:
    """download the images from OpenAI Microscope so they can be displayd in the notebook"""
    return Image(url=f"https://openaipublic.blob.core.windows.net/microscopeprod/2020-07-25/2020-07-25/resnetv2_50_slim/lucid.dataset_examples/_dataset_examples/dataset%3Dimagenet%26op%3D{tensor_name.replace('/', '%252F')}%253A0/channel_{feature_id}_40.png")

In [None]:
def to_op(tensor: Union[tf.Tensor, tf.Operation]) -> tf.Operation:
    """utility to convert a tensor to an operation"""
    if isinstance(tensor, tf.Tensor):
        return tensor.op
    return tensor

In [None]:
def go_backwards(layer: Union[tf.Operation, str], num_layers: int=1) -> tf.Tensor:
    """move backwards in the graph by num_layers, always selecting the first input"""
    if isinstance(layer, str):
        layer = graph.get_operation_by_name(layer)
    previous_layer = layer
    for _ in range(num_layers):
        previous_layer = list(to_op(previous_layer).inputs)[0]
        print(previous_layer)
    return to_op(previous_layer)


In [None]:
# visualise the final few operations
graph.get_operations()[-10:]

In [None]:
# load the linear layer by traversing from the end
output = graph.get_operations()[-1]
linear_layer = go_backwards(output, 5)
linear_layer

Weirdly the model has 1001 classes - the usual ImageNet classes but with 1-based indexing.  
Also, the final Conv2D is dense layer.

In [None]:
list(linear_layer.inputs)

In [None]:
weight_matrix = get_tensor(linear_layer.inputs[1].name)
weight_matrix.shape

In [None]:
# check all the weights that are relevant to the class
relevant_weights = weight_matrix[0, 0, :, class_id]
ordering = (-relevant_weights).argsort()
relevant_weights.min(), relevant_weights.max()

In [None]:
# plot a histogram of the distribution of the weights
sns.histplot(relevant_weights, bins=100)
None

Most weights are close to zero, but there are a few that are much larger.  
Large negative weights are very rare.

In [None]:
# look at the features related to the largest weights
important_features = ordering[:5]
important_features

In [None]:
# display the relevant values
relevant_weights[important_features]

In [None]:
# select the main feature to visualise
main_feature = important_features[0]
main_feature

In [None]:
# backtrack to the residual stream
residual_stream = go_backwards(linear_layer, 4)
residual_stream

In [None]:
# dataset examples that maximise the last feature activation
display(get_images(residual_stream.name, main_feature))

In [None]:
# go back in the residual stream
residual_stream = go_backwards(residual_stream)
residual_stream

In [None]:
# display the dataset examples that maximise the feature activation
display(get_images(residual_stream.name, main_feature))

In [None]:
# go back in the residual stream
residual_stream = go_backwards(residual_stream)
residual_stream

In [None]:
# display the dataset examples that maximise the feature activation
display(get_images(residual_stream.name, main_feature))

In [None]:
# find the shortcut
shortcut = go_backwards(residual_stream, 2)
weights = get_tensor(shortcut.inputs[1].name)
weights.shape

The residual stream changes shape here so the shortcut is a matrix to map from the previous shape to the new one.  
These connections seem to matter less than the simple add that happens between most layers.

In [None]:
# plot distribution of weights going into the main feature
feature_weights = weights[0, 0, :, main_feature]
sns.histplot(feature_weights, bins=100)
main_feature = feature_weights.argmax()
main_feature

In [None]:
# display the dataset examples that maximise the feature going backwards in the network
residual_stream = shortcut
for _ in range(6):
    residual_stream = go_backwards(residual_stream)
    while not residual_stream.name.endswith("add"):
        residual_stream = go_backwards(residual_stream)
    display(get_images(residual_stream.name, main_feature))

These generally seem to be less relevant to the class, suggesting there is an important change between these two layers.