-
Notifications
You must be signed in to change notification settings - Fork 1
/
attention_rollout.py
66 lines (48 loc) · 1.38 KB
/
attention_rollout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
def attention_rollout(As):
"""Computes attention rollout from the given list of attention matrices.
https://arxiv.org/abs/2005.00928
"""
rollout = As[0]
for A in As[1:]:
rollout = torch.matmul(
0.5*A + 0.5*torch.eye(A.shape[1], device=A.device),
rollout
) # the computation takes care of skip connections
return rollout
def imshow(img):
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
def show_images(imgs, **kwargs):
"""Shows the images provided.
"""
imgs = imgs.clone()
im = torchvision.utils.make_grid(imgs)
# show images
fig = plt.figure(**kwargs)
imshow(im)
return fig
def show_attention(imgs, rollout, **kwargs):
"""Shows the images provided with the given attention masks.
"""
imgs = imgs.clone()
rollout = rollout.clone()
# normalize between 0 and 1
for r in rollout:
r -= r.min()
r /= r.max()
# generate a red binary mask
mask = torch.zeros_like(imgs)
mask[:,0] = (rollout > 0.2).squeeze()
# mask images
alpha = 0.5
im = torchvision.utils.make_grid(\
(1 - alpha)*imgs + alpha*((1 - mask)*imgs + mask))
# show images
fig = plt.figure(**kwargs)
imshow(im)
return fig