## Install, Import, Load Model

In [1]:
import torch

from lucent.optvis import render, param, transform, objectives
from lucent.modelzoo import inceptionv1

from ultralytics import YOLO
from utils import NamedModuleWrapper

In [2]:
yolo = YOLO(model='./models/yolov8-traffic.pt')
device = torch.device(device="cuda" if torch.cuda.is_available() else "cpu")

## An Example

With the diversity objective, we have to submit a batch of images for optimization. The diversity objective then tries to maximize the difference in feature representations between the images in the batch.

Specifically, the objective penalizes cosine similarity between the feature representations.

In [None]:
from lucent.modelzoo.util import get_model_layers
get_model_layers(yolo.model.model)

['model',
 'model_0',
 'model_0_conv',
 'model_0_act',
 'model_1',
 'model_1_conv',
 'model_1_act',
 'model_2',
 'model_2_cv1',
 'model_2_cv1_conv',
 'model_2_cv1_act',
 'model_2_cv2',
 'model_2_cv2_conv',
 'model_2_cv2_act',
 'model_2_m',
 'model_2_m_0',
 'model_2_m_0_cv1',
 'model_2_m_0_cv1_conv',
 'model_2_m_0_cv1_act',
 'model_2_m_0_cv2',
 'model_2_m_0_cv2_conv',
 'model_2_m_0_cv2_act',
 'model_2_m_1',
 'model_2_m_1_cv1',
 'model_2_m_1_cv1_conv',
 'model_2_m_1_cv1_act',
 'model_2_m_1_cv2',
 'model_2_m_1_cv2_conv',
 'model_2_m_1_cv2_act',
 'model_3',
 'model_3_conv',
 'model_3_act',
 'model_4',
 'model_4_cv1',
 'model_4_cv1_conv',
 'model_4_cv1_act',
 'model_4_cv2',
 'model_4_cv2_conv',
 'model_4_cv2_act',
 'model_4_m',
 'model_4_m_0',
 'model_4_m_0_cv1',
 'model_4_m_0_cv1_conv',
 'model_4_m_0_cv1_act',
 'model_4_m_0_cv2',
 'model_4_m_0_cv2_conv',
 'model_4_m_0_cv2_act',
 'model_4_m_1',
 'model_4_m_1_cv1',
 'model_4_m_1_cv1_conv',
 'model_4_m_1_cv1_act',
 'model_4_m_1_cv2',
 'model_

In [4]:
model = yolo
model = model.to(device=device).eval()
_ = model(source=torch.randn(1, 3, 640, 640).to(device=device))	# 强制模型触发前向传播
batch_param_f = lambda: param.image(640, batch=8) # type: ignore
obj_f = objectives.channel('model_model_0_conv', 2)
_ = render.render_vis(model=model, objective_f=obj_f, param_f=batch_param_f, show_inline=True)


0: 640x640 (no detections), 7.1ms
Speed: 0.0ms preprocess, 7.1ms inference, 9.0ms postprocess per image at shape (1, 3, 640, 640)
<function hook_model.<locals>.hook at 0x000001B89B15FEC0>


  0%|          | 0/512 [00:00<?, ?it/s]


0: 640x640 (no detections), 8.7ms
1: 640x640 (no detections), 8.7ms
2: 640x640 (no detections), 8.7ms
3: 640x640 (no detections), 8.7ms
4: 640x640 (no detections), 8.7ms
5: 640x640 (no detections), 8.7ms
6: 640x640 (no detections), 8.7ms
7: 640x640 (no detections), 8.7ms
Speed: 0.0ms preprocess, 8.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)


  0%|          | 0/512 [00:00<?, ?it/s]


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

## 可视化某个`layer`的激活值对不同输入图片的差异：

虽然一个网络组件通常对多个物体敏感，但是原始的特征图可视化通常直针对单个图片.

`objectives.diversity`的作用就是放大某个`layer`对不同物体的激活值差异，并在多个特征图上体现

In [None]:
batch_param_f = lambda: param.image(128, batch=8) # type: ignore

# objectives.channel -> 可视化某一层中某个通道的所有神经元，__call__返回目标函数本身，目标函数返回整个通道的激活值负均值
# objectives.diversity -> 是放大某个layer对不同物体的激活值差异，并在多个特征图上体现，目标函数返回整个层得激活值负均值

obj_channel = objectives.channel("mixed4a", 97)
_ = render.render_vis(model, obj_channel, batch_param_f, show_inline=True)
obj_diversity = objectives.diversity("mixed4a")
_ = render.render_vis(model, obj_diversity, batch_param_f, show_inline=True)
_ = render.render_vis(model, obj_channel - 1e2 * obj_diversity, batch_param_f, show_inline=True)

## 可视化某个`neuron`的特征:

In [None]:
"""Visualize a single neuron of a single channel.

Defaults to the center neuron. When width and height are even numbers, we
choose the neuron in the bottom right of the center 2x2 neurons.

Odd width & height:               Even width & height:

+---+---+---+                     +---+---+---+---+
|   |   |   |                     |   |   |   |   |
+---+---+---+                     +---+---+---+---+
|   | X |   |                     |   |   |   |   |
+---+---+---+                     +---+---+---+---+
|   |   |   |                     |   |   | X |   |
+---+---+---+                     +---+---+---+---+
									|   |   |   |   |
									+---+---+---+---+

"""

batch_param_f = lambda: param.image(128, batch=8) # type: ignore
obj_neuron = objectives.neuron("mixed4a", 97, 3, 3)
print(obj_neuron(model))

In [None]:
batch_param_f = lambda: param.image(128, batch=8) # type: ignore
obj_channel = objectives.channel("mixed4a", 97)
_ = render.render_vis(model, obj_channel, batch_param_f, show_inline=True)

## More Examples

More examples reproducing results from the Distill article [Feature Visualization](https://distill.pub/2017/feature-visualization/).

In [None]:
# Different curvy facets

obj = objectives.channel("mixed4a", 97) - 1e3 * objectives.diversity("mixed4a") # here we use a higher weight on the diversity term

_ = render.render_vis(model, obj, batch_param_f, show_inline=True)

In [None]:
# Different shapes with the same fur texture

obj = objectives.channel("mixed4a", 143) - 1e2 * objectives.diversity("mixed4a")

_ = render.render_vis(model, obj, batch_param_f, show_inline=True)

In [None]:
# Cars and Cats

obj = objectives.channel("mixed4e", 55) - 1e2 * objectives.diversity("mixed4e")

_ = render.render_vis(model, obj, batch_param_f, show_inline=True)

## Try it out!

Select your favorite channel or neuron or just pick a random one! Try adjusting the weight on the diversity term to see how that makes a difference!

In [None]:
# Flowers and err... other stuff?

batch_param_f = lambda: param.image(128, batch=4, decorrelate=False) # disable channel decorrelation for more trippy images

obj = objectives.channel("mixed4d_3x3_bottleneck_pre_relu_conv", 139) - 1e3 * objectives.diversity("mixed4d_3x3_bottleneck_pre_relu_conv")

_ = render.render_vis(model, obj, batch_param_f, show_inline=True)