-
Notifications
You must be signed in to change notification settings - Fork 387
/
perceptual_path_length.py
185 lines (161 loc) · 8.53 KB
/
perceptual_path_length.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# Copyright The Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Literal, Optional, Tuple, Union
from torch import Tensor, nn
from torchmetrics.functional.image.lpips import _LPIPS
from torchmetrics.functional.image.perceptual_path_length import (
GeneratorType,
_perceptual_path_length_validate_arguments,
_validate_generator_model,
perceptual_path_length,
)
from torchmetrics.metric import Metric
from torchmetrics.utilities.imports import _TORCHVISION_AVAILABLE
if not _TORCHVISION_AVAILABLE:
__doctest_skip__ = ["PerceptualPathLength"]
class PerceptualPathLength(Metric):
r"""Computes the perceptual path length (`PPL`_) of a generator model.
The perceptual path length can be used to measure the consistency of interpolation in latent-space models. It is
defined as
.. math::
PPL = \mathbb{E}\left[\frac{1}{\epsilon^2} D(G(I(z_1, z_2, t)), G(I(z_1, z_2, t+\epsilon)))\right]
where :math:`G` is the generator, :math:`I` is the interpolation function, :math:`D` is a similarity metric,
:math:`z_1` and :math:`z_2` are two sets of latent points, and :math:`t` is a parameter between 0 and 1. The metric
thus works by interpolating between two sets of latent points, and measuring the similarity between the generated
images. The expectation is approximated by sampling :math:`z_1` and :math:`z_2` from the generator, and averaging
the calculated distanced. The similarity metric :math:`D` is by default the `LPIPS`_ metric, but can be changed by
setting the `sim_net` argument.
The provided generator model must have a `sample` method with signature `sample(num_samples: int) -> Tensor` where
the returned tensor has shape `(num_samples, z_size)`. If the generator is conditional, it must also have a
`num_classes` attribute. The `forward` method of the generator must have signature `forward(z: Tensor) -> Tensor`
if `conditional=False`, and `forward(z: Tensor, labels: Tensor) -> Tensor` if `conditional=True`. The returned
tensor should have shape `(num_samples, C, H, W)` and be scaled to the range [0, 255].
.. note:: using this metric with the default feature extractor requires that ``torchvision`` is installed.
Either install as ``pip install torchmetrics[image]`` or ``pip install torchvision``
As input to ``forward`` and ``update`` the metric accepts the following input
- ``generator`` (:class:`~torch.nn.Module`): Generator model, with specific requirements. See above.
As output of `forward` and `compute` the metric returns the following output
- ``ppl_mean`` (:class:`~torch.Tensor`): float scalar tensor with mean PPL value over distances
- ``ppl_std`` (:class:`~torch.Tensor`): float scalar tensor with std PPL value over distances
- ``ppl_raw`` (:class:`~torch.Tensor`): float scalar tensor with raw PPL distances
Args:
num_samples: Number of samples to use for the PPL computation.
conditional: Whether the generator is conditional or not (i.e. whether it takes labels as input).
batch_size: Batch size to use for the PPL computation.
interpolation_method: Interpolation method to use. Choose from 'lerp', 'slerp_any', 'slerp_unit'.
epsilon: Spacing between the points on the path between latent points.
resize: Resize images to this size before computing the similarity between generated images.
lower_discard: Lower quantile to discard from the distances, before computing the mean and standard deviation.
upper_discard: Upper quantile to discard from the distances, before computing the mean and standard deviation.
sim_net: Similarity network to use. Can be a `nn.Module` or one of 'alex', 'vgg', 'squeeze', where the three
latter options correspond to the pretrained networks from the `LPIPS`_ paper.
kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
Raises:
ModuleNotFoundError:
If ``torch-fidelity`` is not installed.
ValueError:
If ``num_samples`` is not a positive integer.
ValueError:
If `conditional` is not a boolean.
ValueError:
If ``batch_size`` is not a positive integer.
ValueError:
If ``interpolation_method`` is not one of 'lerp', 'slerp_any', 'slerp_unit'.
ValueError:
If ``epsilon`` is not a positive float.
ValueError:
If ``resize`` is not a positive integer.
ValueError:
If ``lower_discard`` is not a float between 0 and 1 or None.
ValueError:
If ``upper_discard`` is not a float between 0 and 1 or None.
Example::
>>> from torchmetrics.image import PerceptualPathLength
>>> import torch
>>> _ = torch.manual_seed(42)
>>> class DummyGenerator(torch.nn.Module):
... def __init__(self, z_size) -> None:
... super().__init__()
... self.z_size = z_size
... self.model = torch.nn.Sequential(torch.nn.Linear(z_size, 3*128*128), torch.nn.Sigmoid())
... def forward(self, z):
... return 255 * (self.model(z).reshape(-1, 3, 128, 128) + 1)
... def sample(self, num_samples):
... return torch.randn(num_samples, self.z_size)
>>> generator = DummyGenerator(2)
>>> ppl = PerceptualPathLength(num_samples=10)
>>> ppl(generator) # doctest: +SKIP
(tensor(0.2371),
tensor(0.1763),
tensor([0.3502, 0.1362, 0.2535, 0.0902, 0.1784, 0.0769, 0.5871, 0.0691, 0.3921]))
"""
is_differentiable: bool = False
higher_is_better: Optional[bool] = True
full_state_update: bool = True
net: nn.Module
feature_network: str = "net"
def __init__(
self,
num_samples: int = 10_000,
conditional: bool = False,
batch_size: int = 128,
interpolation_method: Literal["lerp", "slerp_any", "slerp_unit"] = "lerp",
epsilon: float = 1e-4,
resize: Optional[int] = 64,
lower_discard: Optional[float] = 0.01,
upper_discard: Optional[float] = 0.99,
sim_net: Union[nn.Module, Literal["alex", "vgg", "squeeze"]] = "vgg",
**kwargs: Any,
) -> None:
super().__init__(**kwargs)
if not _TORCHVISION_AVAILABLE:
raise ModuleNotFoundError(
"Metric `PerceptualPathLength` requires torchvision which is not installed."
"Install with `pip install torchvision` or `pip install torchmetrics[image]`"
)
_perceptual_path_length_validate_arguments(
num_samples, conditional, batch_size, interpolation_method, epsilon, resize, lower_discard, upper_discard
)
self.num_samples = num_samples
self.conditional = conditional
self.batch_size = batch_size
self.interpolation_method = interpolation_method
self.epsilon = epsilon
self.resize = resize
self.lower_discard = lower_discard
self.upper_discard = upper_discard
if isinstance(sim_net, nn.Module):
self.net = sim_net
elif sim_net in ["alex", "vgg", "squeeze"]:
self.net = _LPIPS(pretrained=True, net=sim_net, resize=resize)
else:
raise ValueError(f"sim_net must be a nn.Module or one of 'alex', 'vgg', 'squeeze', got {sim_net}")
def update(self, generator: GeneratorType) -> None:
"""Update the generator model."""
_validate_generator_model(generator, self.conditional)
self.generator = generator
def compute(self) -> Tuple[Tensor, Tensor, Tensor]:
"""Compute the perceptual path length."""
return perceptual_path_length(
generator=self.generator,
num_samples=self.num_samples,
conditional=self.conditional,
interpolation_method=self.interpolation_method,
epsilon=self.epsilon,
resize=self.resize,
lower_discard=self.lower_discard,
upper_discard=self.upper_discard,
sim_net=self.net,
device=self.device,
)