diff --git a/monai/networks/nets/autoencoder.py b/monai/networks/nets/autoencoder.py index b7dc309b71..f4a0451dc7 100644 --- a/monai/networks/nets/autoencoder.py +++ b/monai/networks/nets/autoencoder.py @@ -23,7 +23,69 @@ class AutoEncoder(nn.Module): """ - Base class for the architecture implementing :py:class:`monai.networks.nets.VarAutoEncoder`. + Simple definition of an autoencoder and base class for the architecture implementing + :py:class:`monai.networks.nets.VarAutoEncoder`. The network is composed of an encode sequence of blocks, followed + by an intermediary sequence of blocks, and finally a decode sequence of blocks. The encode and decode blocks are + default :py:class:`monai.networks.blocks.Convolution` instances with the encode blocks having the given stride + and the decode blocks having transpose convolutions with the same stride. If `num_res_units` is given residual + blocks are used instead. + + By default the intermediary sequence is empty but if `inter_channels` is given to specify the output channels of + blocks then this will be become a sequence of Convolution blocks or of residual blocks if `num_inter_units` is + given. The optional parameter `inter_dilations` can be used to specify the dilation values of the convolutions in + these blocks, this allows a network to use dilated kernels in this middle section. Since the intermediary section + isn't meant to change the size of the output the strides for all these kernels is 1. + + Args: + spatial_dims: number of spatial dimensions. + in_channels: number of input channels. + out_channels: number of output channels. + channels: sequence of channels. Top block first. The length of `channels` should be no less than 2. + strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`. + kernel_size: convolution kernel size, the value(s) should be odd. If sequence, + its length should equal to dimensions. Defaults to 3. + up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence, + its length should equal to dimensions. Defaults to 3. + num_res_units: number of residual units. Defaults to 0. + inter_channels: sequence of channels defining the blocks in the intermediate layer between encode and decode. + inter_dilations: defines the dilation value for each block of the intermediate layer. Defaults to 1. + num_inter_units: number of residual units for each block of the intermediate layer. Defaults to 0. + act: activation type and arguments. Defaults to PReLU. + norm: feature normalization type and arguments. Defaults to instance norm. + dropout: dropout ratio. Defaults to no dropout. + bias: whether to have a bias term in convolution blocks. Defaults to True. + According to `Performance Tuning Guide `_, + if a conv layer is directly followed by a batch norm layer, bias should be False. + + .. deprecated:: 0.6.0 + ``dimensions`` is deprecated, use ``spatial_dims`` instead. + + Examples:: + + from monai.networks.nets import AutoEncoder + + # 3 layers each down/up sampling their inputs by a factor 2 with no intermediate layer + net = AutoEncoder( + spatial_dims=2, + in_channels=1, + out_channels=1, + channels=(2, 4, 8), + strides=(2, 2, 2) + ) + + # 1 layer downsampling by 2, followed by a sequence of residual units with 2 convolutions defined by + # progressively increasing dilations, then final upsample layer + net = AutoEncoder( + spatial_dims=2, + in_channels=1, + out_channels=1, + channels=(4,), + strides=(2,), + inter_channels=(8, 8, 8), + inter_dilations=(1, 2, 4), + num_inter_units=2 + ) + """ @deprecated_arg( @@ -48,13 +110,6 @@ def __init__( bias: bool = True, dimensions: Optional[int] = None, ) -> None: - """ - Initialize the AutoEncoder. - - .. deprecated:: 0.6.0 - ``dimensions`` is deprecated, use ``spatial_dims`` instead. - - """ super().__init__() self.dimensions = spatial_dims if dimensions is None else dimensions @@ -87,6 +142,9 @@ def __init__( def _get_encode_module( self, in_channels: int, channels: Sequence[int], strides: Sequence[int] ) -> Tuple[nn.Sequential, int]: + """ + Returns the encode part of the network by building up a sequence of layers returned by `_get_encode_layer`. + """ encode = nn.Sequential() layer_channels = in_channels @@ -98,6 +156,10 @@ def _get_encode_module( return encode, layer_channels def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tuple[nn.Module, int]: + """ + Returns the intermediate block of the network which accepts input from the encoder and whose output goes + to the decoder. + """ # Define some types intermediate: nn.Module unit: nn.Module @@ -145,6 +207,9 @@ def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tu def _get_decode_module( self, in_channels: int, channels: Sequence[int], strides: Sequence[int] ) -> Tuple[nn.Sequential, int]: + """ + Returns the decode part of the network by building up a sequence of layers returned by `_get_decode_layer`. + """ decode = nn.Sequential() layer_channels = in_channels @@ -156,7 +221,9 @@ def _get_decode_module( return decode, layer_channels def _get_encode_layer(self, in_channels: int, out_channels: int, strides: int, is_last: bool) -> nn.Module: - + """ + Returns a single layer of the encoder part of the network. + """ mod: nn.Module if self.num_res_units > 0: mod = ResidualUnit( @@ -187,7 +254,9 @@ def _get_encode_layer(self, in_channels: int, out_channels: int, strides: int, i return mod def _get_decode_layer(self, in_channels: int, out_channels: int, strides: int, is_last: bool) -> nn.Sequential: - + """ + Returns a single layer of the decoder part of the network. + """ decode = nn.Sequential() conv = Convolution( diff --git a/monai/networks/nets/classifier.py b/monai/networks/nets/classifier.py index 92fee4f566..a1f913ea23 100644 --- a/monai/networks/nets/classifier.py +++ b/monai/networks/nets/classifier.py @@ -25,6 +25,19 @@ class Classifier(Regressor): Defines a classification network from Regressor by specifying the output shape as a single dimensional tensor with size equal to the number of classes to predict. The final activation function can also be specified, eg. softmax or sigmoid. + + Args: + in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension) + classes: integer stating the dimension of the final output tensor + channels: tuple of integers stating the output channels of each convolutional layer + strides: tuple of integers stating the stride (downscale factor) of each convolutional layer + kernel_size: integer or tuple of integers stating size of convolutional kernels + num_res_units: integer stating number of convolutions in residual units, 0 means no residual units + act: name or type defining activation layers + norm: name or type defining normalization layers + dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout + bias: boolean stating if convolution layers should have a bias component + last_act: name defining the last activation layer """ def __init__( @@ -41,20 +54,6 @@ def __init__( bias: bool = True, last_act: Optional[str] = None, ) -> None: - """ - Args: - in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension) - classes: integer stating the dimension of the final output tensor - channels: tuple of integers stating the output channels of each convolutional layer - strides: tuple of integers stating the stride (downscale factor) of each convolutional layer - kernel_size: integer or tuple of integers stating size of convolutional kernels - num_res_units: integer stating number of convolutions in residual units, 0 means no residual units - act: name or type defining activation layers - norm: name or type defining normalization layers - dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout - bias: boolean stating if convolution layers should have a bias component - last_act: name defining the last activation layer - """ super().__init__(in_shape, (classes,), channels, strides, kernel_size, num_res_units, act, norm, dropout, bias) if last_act is not None: @@ -68,6 +67,18 @@ class Discriminator(Classifier): """ Defines a discriminator network from Classifier with a single output value and sigmoid activation by default. This is meant for use with GANs or other applications requiring a generic discriminator network. + + Args: + in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension) + channels: tuple of integers stating the output channels of each convolutional layer + strides: tuple of integers stating the stride (downscale factor) of each convolutional layer + kernel_size: integer or tuple of integers stating size of convolutional kernels + num_res_units: integer stating number of convolutions in residual units, 0 means no residual units + act: name or type defining activation layers + norm: name or type defining normalization layers + dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout + bias: boolean stating if convolution layers should have a bias component + last_act: name defining the last activation layer """ def __init__( @@ -83,19 +94,6 @@ def __init__( bias: bool = True, last_act=Act.SIGMOID, ) -> None: - """ - Args: - in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension) - channels: tuple of integers stating the output channels of each convolutional layer - strides: tuple of integers stating the stride (downscale factor) of each convolutional layer - kernel_size: integer or tuple of integers stating size of convolutional kernels - num_res_units: integer stating number of convolutions in residual units, 0 means no residual units - act: name or type defining activation layers - norm: name or type defining normalization layers - dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout - bias: boolean stating if convolution layers should have a bias component - last_act: name defining the last activation layer - """ super().__init__(in_shape, 1, channels, strides, kernel_size, num_res_units, act, norm, dropout, bias, last_act) @@ -104,6 +102,17 @@ class Critic(Classifier): Defines a critic network from Classifier with a single output value and no final activation. The final layer is `nn.Flatten` instead of `nn.Linear`, the final result is computed as the mean over the first dimension. This is meant to be used with Wasserstein GANs. + + Args: + in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension) + channels: tuple of integers stating the output channels of each convolutional layer + strides: tuple of integers stating the stride (downscale factor) of each convolutional layer + kernel_size: integer or tuple of integers stating size of convolutional kernels + num_res_units: integer stating number of convolutions in residual units, 0 means no residual units + act: name or type defining activation layers + norm: name or type defining normalization layers + dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout + bias: boolean stating if convolution layers should have a bias component """ def __init__( @@ -118,18 +127,6 @@ def __init__( dropout: Optional[float] = 0.25, bias: bool = True, ) -> None: - """ - Args: - in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension) - channels: tuple of integers stating the output channels of each convolutional layer - strides: tuple of integers stating the stride (downscale factor) of each convolutional layer - kernel_size: integer or tuple of integers stating size of convolutional kernels - num_res_units: integer stating number of convolutions in residual units, 0 means no residual units - act: name or type defining activation layers - norm: name or type defining normalization layers - dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout - bias: boolean stating if convolution layers should have a bias component - """ super().__init__(in_shape, 1, channels, strides, kernel_size, num_res_units, act, norm, dropout, bias, None) def _get_final_layer(self, in_shape: Sequence[int]): diff --git a/monai/networks/nets/fullyconnectednet.py b/monai/networks/nets/fullyconnectednet.py index b906bab015..19197bd58d 100644 --- a/monai/networks/nets/fullyconnectednet.py +++ b/monai/networks/nets/fullyconnectednet.py @@ -30,9 +30,24 @@ def _get_adn_layer( class FullyConnectedNet(nn.Sequential): """ - Plain full-connected layer neural network + Simple full-connected layer neural network composed of a sequence of linear layers with PReLU activation and + dropout. The network accepts input with `in_channels` channels, has output with `out_channels` channels, and + hidden layer output channels given in `hidden_channels`. If `bias` is True then linear units have a bias term. + + Args: + in_channels: number of input channels. + out_channels: number of output channels. + hidden_channels: number of output channels for each hidden layer. + dropout: dropout ratio. Defaults to no dropout. + act: activation type and arguments. Defaults to PReLU. + bias: whether to have a bias term in linear units. Defaults to True. + adn_ordering: order of operations in :py:class:`monai.networks.blocks.ADN`. + + Examples:: + + # accepts 4 values and infers 3 values as output, has 3 hidden layers with 10, 20, 10 values as output + net = FullyConnectedNet(4, 3, [10, 20, 10], dropout=0.2) - The network uses dropout and, by default, PReLU activation """ def __init__( @@ -53,8 +68,11 @@ def __init__( self.in_channels = in_channels self.out_channels = out_channels self.hidden_channels = list(hidden_channels) + self.act = act + self.dropout = dropout + self.adn_ordering = adn_ordering + self.add_module("flatten", nn.Flatten()) - self.adn_layer = _get_adn_layer(act, dropout, adn_ordering) prev_channels = self.in_channels for i, c in enumerate(hidden_channels): @@ -64,13 +82,34 @@ def __init__( self.add_module("output", nn.Linear(prev_channels, out_channels, bias)) def _get_layer(self, in_channels: int, out_channels: int, bias: bool) -> nn.Sequential: - seq = nn.Sequential(nn.Linear(in_channels, out_channels, bias)) - seq.add_module("ADN", self.adn_layer) + seq = nn.Sequential( + nn.Linear(in_channels, out_channels, bias), _get_adn_layer(self.act, self.dropout, self.adn_ordering) + ) return seq class VarFullyConnectedNet(nn.Module): - """Variational fully-connected network.""" + """ + Variational fully-connected network. This is composed of an encode layer, reparameterization layer, and then a + decode layer. + + Args: + in_channels: number of input channels. + out_channels: number of output channels. + latent_size: number of latent variables to use. + encode_channels: number of output channels for each hidden layer of the encode half. + decode_channels: number of output channels for each hidden layer of the decode half. + dropout: dropout ratio. Defaults to no dropout. + act: activation type and arguments. Defaults to PReLU. + bias: whether to have a bias term in linear units. Defaults to True. + adn_ordering: order of operations in :py:class:`monai.networks.blocks.ADN`. + + Examples:: + + # accepts inputs with 4 values, uses a latent space of 2 variables, and produces outputs of 3 values + net = VarFullyConnectedNet(4, 3, 2, [5, 10], [10, 5]) + + """ def __init__( self, diff --git a/monai/networks/nets/generator.py b/monai/networks/nets/generator.py index ea05787173..90aa26cd01 100644 --- a/monai/networks/nets/generator.py +++ b/monai/networks/nets/generator.py @@ -25,13 +25,35 @@ class Generator(nn.Module): """ Defines a simple generator network accepting a latent vector and through a sequence of convolution layers constructs an output tensor of greater size and high dimensionality. The method `_get_layer` is used to - create each of these layers, override this method to define layers beyond the default Convolution or - ResidualUnit layers. + create each of these layers, override this method to define layers beyond the default + :py:class:`monai.networks.blocks.Convolution` or :py:class:`monai.networks.blocks.ResidualUnit` layers. + + The layers are constructed using the values in the `channels` and `strides` arguments, the number being defined by + the length of these (which must match). Input is first passed through a :py:class:`torch.nn.Linear` layer to + convert the input vector to an image tensor with dimensions `start_shape`. This passes through the convolution + layers and is progressively upsampled if the `strides` valus are greater than 1 using transpose convolutions. The + size of the final output is defined by the `start_shape` dimension and the amount of upsampling done through + strides. In the default definition the size of the output's spatial dimensions will be that of `start_shape` + multiplied by the product of `strides`, thus the example network below upsamples an starting size of (64, 8, 8) + to (1, 64, 64) since its `strides` are (2, 2, 2). + + Args: + latent_shape: tuple of integers stating the dimension of the input latent vector (minus batch dimension) + start_shape: tuple of integers stating the dimension of the tensor to pass to convolution subnetwork + channels: tuple of integers stating the output channels of each convolutional layer + strides: tuple of integers stating the stride (upscale factor) of each convolutional layer + kernel_size: integer or tuple of integers stating size of convolutional kernels + num_res_units: integer stating number of convolutions in residual units, 0 means no residual units + act: name or type defining activation layers + norm: name or type defining normalization layers + dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout + bias: boolean stating if convolution layers should have a bias component + + Examples:: + + # 3 layers, latent input vector of shape (42, 24), output volume of shape (1, 64, 64) + net = Generator((42, 24), (64, 8, 8), (32, 16, 1), (2, 2, 2)) - For example, a generator accepting a latent vector if shape (42,24) and producing an output volume of - shape (1,64,64) can be constructed as: - - gen = Generator((42, 24), (64, 8, 8), (32, 16, 1), (2, 2, 2)) """ def __init__( @@ -47,26 +69,6 @@ def __init__( dropout: Optional[float] = None, bias: bool = True, ) -> None: - """ - Construct the generator network with the number of layers defined by `channels` and `strides`. In the - forward pass a `nn.Linear` layer relates the input latent vector to a tensor of dimensions `start_shape`, - this is then fed forward through the sequence of convolutional layers. The number of layers is defined by - the length of `channels` and `strides` which must match, each layer having the number of output channels - given in `channels` and an upsample factor given in `strides` (ie. a transpose convolution with that stride - size). - - Args: - latent_shape: tuple of integers stating the dimension of the input latent vector (minus batch dimension) - start_shape: tuple of integers stating the dimension of the tensor to pass to convolution subnetwork - channels: tuple of integers stating the output channels of each convolutional layer - strides: tuple of integers stating the stride (upscale factor) of each convolutional layer - kernel_size: integer or tuple of integers stating size of convolutional kernels - num_res_units: integer stating number of convolutions in residual units, 0 means no residual units - act: name or type defining activation layers - norm: name or type defining normalization layers - dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout - bias: boolean stating if convolution layers should have a bias component - """ super().__init__() self.in_channels, *self.start_shape = ensure_tuple(start_shape) diff --git a/monai/networks/nets/regressor.py b/monai/networks/nets/regressor.py index 0153014902..bc8feb7527 100644 --- a/monai/networks/nets/regressor.py +++ b/monai/networks/nets/regressor.py @@ -29,6 +29,30 @@ class Regressor(nn.Module): This defines a network for relating large-sized input tensors to small output tensors, ie. regressing large values to a prediction. An output of a single dimension can be used as value regression or multi-label classification prediction, an output of a single value can be used as a discriminator or critic prediction. + + The network is constructed as a sequence of layers, either :py:class:`monai.networks.blocks.Convolution` or + :py:class:`monai.networks.blocks.ResidualUnit`, with a final fully-connected layer resizing the output from the + blocks to the final size. Each block is defined with a stride value typically used to downsample the input using + strided convolutions. In this way each block progressively condenses information from the input into a deep + representation the final fully-connected layer relates to a final result. + + Args: + in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension) + out_shape: tuple of integers stating the dimension of the final output tensor (minus batch dimension) + channels: tuple of integers stating the output channels of each convolutional layer + strides: tuple of integers stating the stride (downscale factor) of each convolutional layer + kernel_size: integer or tuple of integers stating size of convolutional kernels + num_res_units: integer stating number of convolutions in residual units, 0 means no residual units + act: name or type defining activation layers + norm: name or type defining normalization layers + dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout + bias: boolean stating if convolution layers should have a bias component + + Examples:: + + # infers a 2-value result (eg. a 2D cartesian coordinate) from a 64x64 image + net = Regressor((1, 64, 64), (2,), (2, 4, 8), (2, 2, 2)) + """ def __init__( @@ -44,23 +68,6 @@ def __init__( dropout: Optional[float] = None, bias: bool = True, ) -> None: - """ - Construct the regressor network with the number of layers defined by `channels` and `strides`. Inputs are - first passed through the convolutional layers in the forward pass, the output from this is then pass - through a fully connected layer to relate them to the final output tensor. - - Args: - in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension) - out_shape: tuple of integers stating the dimension of the final output tensor - channels: tuple of integers stating the output channels of each convolutional layer - strides: tuple of integers stating the stride (downscale factor) of each convolutional layer - kernel_size: integer or tuple of integers stating size of convolutional kernels - num_res_units: integer stating number of convolutions in residual units, 0 means no residual units - act: name or type defining activation layers - norm: name or type defining normalization layers - dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout - bias: boolean stating if convolution layers should have a bias component - """ super().__init__() self.in_channels, *self.in_shape = ensure_tuple(in_shape) diff --git a/monai/networks/nets/unet.py b/monai/networks/nets/unet.py index 1dd52455d9..7d5f979330 100644 --- a/monai/networks/nets/unet.py +++ b/monai/networks/nets/unet.py @@ -26,6 +26,85 @@ @export("monai.networks.nets") @alias("Unet") class UNet(nn.Module): + """ + Enhanced version of UNet which has residual units implemented with the ResidualUnit class. + The residual part uses a convolution to change the input dimensions to match the output dimensions + if this is necessary but will use nn.Identity if not. + Refer to: https://link.springer.com/chapter/10.1007/978-3-030-12029-0_40. + + Each layer of the network has a encode and decode path with a skip connection between them. Data in the encode path + is downsampled using strided convolutions (if `strides` is given values greater than 1) and in the decode path + upsampled using strided transpose convolutions. These down or up sampling operations occur at the beginning of each + block rather than afterwards as is typical in UNet implementations. + + To further explain this consider the first example network given below. This network has 3 layers with strides + of 2 for each of the middle layers (the last layer is the bottom connection which does not down/up sample). Input + data to this network is immediately reduced in the spatial dimensions by a factor of 2 by the first convolution of + the residual unit defining the first layer of the encode part. The last layer of the decode part will upsample its + input (data from the previous layer concatenated with data from the skip connection) in the first convolution. this + ensures the final output of the network has the same shape as the input. + + Padding values for the convolutions are chosen to ensure output sizes are even divisors/multiples of the input + sizes if the `strides` value for a layer is a factor of the input sizes. A typical case is to use `strides` values + of 2 and inputs that are multiples of powers of 2. An input can thus be downsampled evenly however many times its + dimensions can be divided by 2, so for the example network inputs would have to have dimensions that are mutliples + of 4. In the second example network given below the input to the bottom layer will have shape (1, 64, 15, 15) for + an input of shape (1, 1, 240, 240) demonstrating the input being reduced in size spatially by 2**4. + + Args: + spatial_dims: number of spatial dimensions. + in_channels: number of input channels. + out_channels: number of output channels. + channels: sequence of channels. Top block first. The length of `channels` should be no less than 2. + strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`. + kernel_size: convolution kernel size, the value(s) should be odd. If sequence, + its length should equal to dimensions. Defaults to 3. + up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence, + its length should equal to dimensions. Defaults to 3. + num_res_units: number of residual units. Defaults to 0. + act: activation type and arguments. Defaults to PReLU. + norm: feature normalization type and arguments. Defaults to instance norm. + dropout: dropout ratio. Defaults to no dropout. + bias: whether to have a bias term in convolution blocks. Defaults to True. + According to `Performance Tuning Guide `_, + if a conv layer is directly followed by a batch norm layer, bias should be False. + + Examples:: + + from monai.networks.nets import UNet + + # 3 layer network with down/upsampling by a factor of 2 at each layer with 2-convolution residual units + net = UNet( + spatial_dims=2, + in_channels=1, + out_channels=1, + channels=(4, 8, 16), + strides=(2, 2), + num_res_units=2 + ) + + # 5 layer network with simple convolution/normalization/dropout/activation blocks defining the layers + net=UNet( + spatial_dims=2, + in_channels=1, + out_channels=1, + channels=(4, 8, 16, 32, 64), + strides=(2, 2, 2, 2), + ) + + .. deprecated:: 0.6.0 + ``dimensions`` is deprecated, use ``spatial_dims`` instead. + + Note: The acceptable spatial size of input data depends on the parameters of the network, + to set appropriate spatial size, please check the tutorial for more details: + https://github.com/Project-MONAI/tutorials/blob/master/modules/UNet_input_size_constrains.ipynb. + Typically, when using a stride of 2 in down / up sampling, the output dimensions are either half of the + input when downsampling, or twice when upsampling. In this case with N numbers of layers in the network, + the inputs must have spatial dimensions that are all multiples of 2^N. + Usually, applying `resize`, `pad` or `crop` transforms can help adjust the spatial size of input data. + + """ + @deprecated_arg( name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead." ) @@ -45,42 +124,7 @@ def __init__( bias: bool = True, dimensions: Optional[int] = None, ) -> None: - """ - Enhanced version of UNet which has residual units implemented with the ResidualUnit class. - The residual part uses a convolution to change the input dimensions to match the output dimensions - if this is necessary but will use nn.Identity if not. - Refer to: https://link.springer.com/chapter/10.1007/978-3-030-12029-0_40. - Args: - spatial_dims: number of spatial dimensions. - in_channels: number of input channels. - out_channels: number of output channels. - channels: sequence of channels. Top block first. The length of `channels` should be no less than 2. - strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`. - kernel_size: convolution kernel size, the value(s) should be odd. If sequence, - its length should equal to dimensions. Defaults to 3. - up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence, - its length should equal to dimensions. Defaults to 3. - num_res_units: number of residual units. Defaults to 0. - act: activation type and arguments. Defaults to PReLU. - norm: feature normalization type and arguments. Defaults to instance norm. - dropout: dropout ratio. Defaults to no dropout. - bias: whether to have a bias term in convolution blocks. Defaults to True. - According to `Performance Tuning Guide `_, - if a conv layer is directly followed by a batch norm layer, bias should be False. - - .. deprecated:: 0.6.0 - ``dimensions`` is deprecated, use ``spatial_dims`` instead. - - Note: The acceptable spatial size of input data depends on the parameters of the network, - to set appropriate spatial size, please check the tutorial for more details: - https://github.com/Project-MONAI/tutorials/blob/master/modules/UNet_input_size_constrains.ipynb. - Typically, when using a stride of 2 in down / up sampling, the output dimensions are either half of the - input when downsampling, or twice when upsampling. In this case with N numbers of layers in the network, - the inputs must have spatial dimensions that are all multiples of 2^N. - Usually, applying `resize`, `pad` or `crop` transforms can help adjust the spatial size of input data. - - """ super().__init__() if len(channels) < 2: diff --git a/monai/networks/nets/varautoencoder.py b/monai/networks/nets/varautoencoder.py index a228efab07..b4ef8be93d 100644 --- a/monai/networks/nets/varautoencoder.py +++ b/monai/networks/nets/varautoencoder.py @@ -28,10 +28,36 @@ class VarAutoEncoder(AutoEncoder): """ Variational Autoencoder based on the paper - https://arxiv.org/abs/1312.6114 - .. code-block:: python + Args: + spatial_dims: number of spatial dimensions. + in_shape: shape of input data starting with channel dimension. + out_channels: number of output channels. + latent_size: size of the latent variable. + channels: sequence of channels. Top block first. The length of `channels` should be no less than 2. + strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`. + kernel_size: convolution kernel size, the value(s) should be odd. If sequence, + its length should equal to dimensions. Defaults to 3. + up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence, + its length should equal to dimensions. Defaults to 3. + num_res_units: number of residual units. Defaults to 0. + inter_channels: sequence of channels defining the blocks in the intermediate layer between encode and decode. + inter_dilations: defines the dilation value for each block of the intermediate layer. Defaults to 1. + num_inter_units: number of residual units for each block of the intermediate layer. Defaults to 0. + act: activation type and arguments. Defaults to PReLU. + norm: feature normalization type and arguments. Defaults to instance norm. + dropout: dropout ratio. Defaults to no dropout. + bias: whether to have a bias term in convolution blocks. Defaults to True. + According to `Performance Tuning Guide `_, + if a conv layer is directly followed by a batch norm layer, bias should be False. + + .. deprecated:: 0.6.0 + ``dimensions`` is deprecated, use ``spatial_dims`` instead. + + Examples:: from monai.networks.nets import VarAutoEncoder + # 3 layer network accepting images with dimensions (1, 32, 32) and using a latent vector with 2 values model = VarAutoEncoder( dimensions=2, in_shape=(32, 32), # image spatial shape @@ -44,10 +70,6 @@ class VarAutoEncoder(AutoEncoder): see also: - Variational autoencoder network with MedNIST Dataset https://github.com/Project-MONAI/tutorials/blob/master/modules/varautoencoder_mednist.ipynb - - .. deprecated:: 0.6.0 - ``dimensions`` is deprecated, use ``spatial_dims`` instead. - """ @deprecated_arg(