diff --git a/.actions/helpers.py b/.actions/helpers.py index e2abb58c1..2f53728e4 100644 --- a/.actions/helpers.py +++ b/.actions/helpers.py @@ -148,6 +148,7 @@ def _meta_file(folder: str) -> str: @staticmethod def augment_script(fpath: str): """Add template header and footer to the python base script. + Args: fpath: path to python script """ @@ -313,6 +314,7 @@ def parse_requirements(dir_path: str): @staticmethod def copy_notebooks(path_root: str, path_docs_ipynb: str = "docs/source/notebooks"): """Copy all notebooks from a folder to doc folder. + Args: path_root: source path to the project root in this tutorials path_docs_ipynb: destination path to the notebooks location @@ -362,7 +364,7 @@ def update_env_details(dir_path: str): req = [r.strip() for r in req] def _parse(pkg: str, keys: str = " <=>") -> str: - """Parsing just the package name""" + """Parsing just the package name.""" if any(c in pkg for c in keys): ix = min(pkg.index(c) for c in keys if c in pkg) pkg = pkg[:ix] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 638e36141..04bbd219f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,6 +29,12 @@ repos: args: [--py36-plus] name: Upgrade code + - repo: https://github.com/myint/docformatter + rev: v1.4 + hooks: + - id: docformatter + args: [--in-place, --wrap-summaries=115, --wrap-descriptions=120] + - repo: https://github.com/PyCQA/isort rev: 5.9.2 hooks: diff --git a/course_UvA-DL/autoregressive-image-modeling/Autoregressive_Image_Modeling.py b/course_UvA-DL/autoregressive-image-modeling/Autoregressive_Image_Modeling.py index 70087490d..3d970563d 100644 --- a/course_UvA-DL/autoregressive-image-modeling/Autoregressive_Image_Modeling.py +++ b/course_UvA-DL/autoregressive-image-modeling/Autoregressive_Image_Modeling.py @@ -184,14 +184,14 @@ def show_imgs(imgs): class MaskedConvolution(nn.Module): def __init__(self, c_in, c_out, mask, **kwargs): - """ - Implements a convolution with mask applied on its weights. - Inputs: - c_in - Number of input channels - c_out - Number of output channels - mask - Tensor of shape [kernel_size_H, kernel_size_W] with 0s where + """Implements a convolution with mask applied on its weights. + + Args: + c_in: Number of input channels + c_out: Number of output channels + mask: Tensor of shape [kernel_size_H, kernel_size_W] with 0s where the convolution should be masked, and 1s otherwise. - kwargs - Additional arguments for the convolution + kwargs: Additional arguments for the convolution """ super().__init__() # For simplicity: calculate padding automatically @@ -290,12 +290,12 @@ def __init__(self, c_in, c_out, kernel_size=3, mask_center=False, **kwargs): def show_center_recep_field(img, out): - """ - Calculates the gradients of the input with respect to the output center pixel, - and visualizes the overall receptive field. - Inputs: - img - Input image for which we want to calculate the receptive field on. - out - Output features/loss which is used for backpropagation, and should be + """Calculates the gradients of the input with respect to the output center pixel, and visualizes the overall + receptive field. + + Args: + img: Input image for which we want to calculate the receptive field on. + out: Output features/loss which is used for backpropagation, and should be the output of the network/computation graph. """ # Determine gradients @@ -476,9 +476,7 @@ def show_center_recep_field(img, out): class GatedMaskedConv(nn.Module): def __init__(self, c_in, **kwargs): - """ - Gated Convolution block implemented the computation graph shown above. - """ + """Gated Convolution block implemented the computation graph shown above.""" super().__init__() self.conv_vert = VerticalStackConvolution(c_in, c_out=2 * c_in, **kwargs) self.conv_horiz = HorizontalStackConvolution(c_in, c_out=2 * c_in, **kwargs) @@ -558,10 +556,10 @@ def __init__(self, c_in, c_hidden): self.example_input_array = train_set[0][0][None] def forward(self, x): - """ - Forward image through model and return logits for each pixel. - Inputs: - x - Image tensor with integer values between 0 and 255. + """Forward image through model and return logits for each pixel. + + Args: + x: Image tensor with integer values between 0 and 255. """ # Scale input from 0 to 255 back to -1 to 1 x = (x.float() / 255.0) * 2 - 1 @@ -589,11 +587,11 @@ def calc_likelihood(self, x): @torch.no_grad() def sample(self, img_shape, img=None): - """ - Sampling function for the autoregressive model. - Inputs: - img_shape - Shape of the image to generate (B,C,H,W) - img (optional) - If given, this tensor will be used as + """Sampling function for the autoregressive model. + + Args: + img_shape: Shape of the image to generate (B,C,H,W) + img (optional): If given, this tensor will be used as a starting image. The pixels to fill should be -1 in the input tensor. """ diff --git a/course_UvA-DL/deep-autoencoders/Deep_Autoencoders.py b/course_UvA-DL/deep-autoencoders/Deep_Autoencoders.py index 43d60a024..0cbaf4c53 100644 --- a/course_UvA-DL/deep-autoencoders/Deep_Autoencoders.py +++ b/course_UvA-DL/deep-autoencoders/Deep_Autoencoders.py @@ -133,11 +133,11 @@ def __init__( self, num_input_channels: int, base_channel_size: int, latent_dim: int, act_fn: object = nn.GELU ): """ - Inputs: - - num_input_channels : Number of input channels of the image. For CIFAR, this parameter is 3 - - base_channel_size : Number of channels we use in the first convolutional layers. Deeper layers might use a duplicate of it. - - latent_dim : Dimensionality of latent representation z - - act_fn : Activation function used throughout the encoder network + Args: + num_input_channels : Number of input channels of the image. For CIFAR, this parameter is 3 + base_channel_size : Number of channels we use in the first convolutional layers. Deeper layers might use a duplicate of it. + latent_dim : Dimensionality of latent representation z + act_fn : Activation function used throughout the encoder network """ super().__init__() c_hid = base_channel_size @@ -195,11 +195,11 @@ def __init__( self, num_input_channels: int, base_channel_size: int, latent_dim: int, act_fn: object = nn.GELU ): """ - Inputs: - - num_input_channels : Number of channels of the image to reconstruct. For CIFAR, this parameter is 3 - - base_channel_size : Number of channels we use in the last convolutional layers. Early layers might use a duplicate of it. - - latent_dim : Dimensionality of latent representation z - - act_fn : Activation function used throughout the decoder network + Args: + num_input_channels : Number of channels of the image to reconstruct. For CIFAR, this parameter is 3 + base_channel_size : Number of channels we use in the last convolutional layers. Early layers might use a duplicate of it. + latent_dim : Dimensionality of latent representation z + act_fn : Activation function used throughout the decoder network """ super().__init__() c_hid = base_channel_size @@ -263,17 +263,13 @@ def __init__( self.example_input_array = torch.zeros(2, num_input_channels, width, height) def forward(self, x): - """ - The forward function takes in an image and returns the reconstructed image - """ + """The forward function takes in an image and returns the reconstructed image.""" z = self.encoder(x) x_hat = self.decoder(z) return x_hat def _get_reconstruction_loss(self, batch): - """ - Given a batch of images, this function returns the reconstruction loss (MSE in our case) - """ + """Given a batch of images, this function returns the reconstruction loss (MSE in our case)""" x, _ = batch # We do not need the labels x_hat = self.forward(x) loss = F.mse_loss(x, x_hat, reduction="none") diff --git a/course_UvA-DL/deep-energy-based-generative-models/Deep_Energy_Models.py b/course_UvA-DL/deep-energy-based-generative-models/Deep_Energy_Models.py index fedeff1cb..ecb5d5491 100644 --- a/course_UvA-DL/deep-energy-based-generative-models/Deep_Energy_Models.py +++ b/course_UvA-DL/deep-energy-based-generative-models/Deep_Energy_Models.py @@ -333,11 +333,11 @@ class Sampler: def __init__(self, model, img_shape, sample_size, max_len=8192): """ - Inputs: - model - Neural network to use for modeling E_theta - img_shape - Shape of the images to model - sample_size - Batch size of the samples - max_len - Maximum number of data points to keep in the buffer + Args: + model: Neural network to use for modeling E_theta + img_shape: Shape of the images to model + sample_size: Batch size of the samples + max_len: Maximum number of data points to keep in the buffer """ super().__init__() self.model = model @@ -347,11 +347,11 @@ def __init__(self, model, img_shape, sample_size, max_len=8192): self.examples = [(torch.rand((1, ) + img_shape) * 2 - 1) for _ in range(self.sample_size)] def sample_new_exmps(self, steps=60, step_size=10): - """ - Function for getting a new batch of "fake" images. - Inputs: - steps - Number of iterations in the MCMC algorithm - step_size - Learning rate nu in the algorithm above + """Function for getting a new batch of "fake" images. + + Args: + steps: Number of iterations in the MCMC algorithm + step_size: Learning rate nu in the algorithm above """ # Choose 95% of the batch from the buffer, 5% generate from scratch n_new = np.random.binomial(self.sample_size, 0.05) @@ -369,14 +369,14 @@ def sample_new_exmps(self, steps=60, step_size=10): @staticmethod def generate_samples(model, inp_imgs, steps=60, step_size=10, return_img_per_step=False): - """ - Function for sampling images for a given model. - Inputs: - model - Neural network to use for modeling E_theta - inp_imgs - Images to start from for sampling. If you want to generate new images, enter noise between -1 and 1. - steps - Number of iterations in the MCMC algorithm. - step_size - Learning rate nu in the algorithm above - return_img_per_step - If True, we return the sample at every iteration of the MCMC + """Function for sampling images for a given model. + + Args: + model: Neural network to use for modeling E_theta + inp_imgs: Images to start from for sampling. If you want to generate new images, enter noise between -1 and 1. + steps: Number of iterations in the MCMC algorithm. + step_size: Learning rate nu in the algorithm above + return_img_per_step: If True, we return the sample at every iteration of the MCMC """ # Before MCMC: set model parameters to "required_grad=False" # because we are only interested in the gradients of the input. diff --git a/course_UvA-DL/graph-neural-networks/GNN_overview.py b/course_UvA-DL/graph-neural-networks/GNN_overview.py index 460d434a5..5dd76395e 100644 --- a/course_UvA-DL/graph-neural-networks/GNN_overview.py +++ b/course_UvA-DL/graph-neural-networks/GNN_overview.py @@ -172,9 +172,9 @@ def __init__(self, c_in, c_out): def forward(self, node_feats, adj_matrix): """ - Inputs: - node_feats - Tensor with node features of shape [batch_size, num_nodes, c_in] - adj_matrix - Batch of adjacency matrices of the graph. If there is an edge from i to j, + Args: + node_feats: Tensor with node features of shape [batch_size, num_nodes, c_in] + adj_matrix: Batch of adjacency matrices of the graph. If there is an edge from i to j, adj_matrix[b,i,j]=1 else 0. Supports directed edges by non-symmetric matrices. Assumes to already have added the identity connections. Shape: [batch_size, num_nodes, num_nodes] @@ -302,13 +302,13 @@ class GATLayer(nn.Module): def __init__(self, c_in, c_out, num_heads=1, concat_heads=True, alpha=0.2): """ - Inputs: - c_in - Dimensionality of input features - c_out - Dimensionality of output features - num_heads - Number of heads, i.e. attention mechanisms to apply in parallel. The + Args: + c_in: Dimensionality of input features + c_out: Dimensionality of output features + num_heads: Number of heads, i.e. attention mechanisms to apply in parallel. The output features are equally split up over the heads if concat_heads=True. - concat_heads - If True, the output of the different heads is concatenated instead of averaged. - alpha - Negative slope of the LeakyReLU activation. + concat_heads: If True, the output of the different heads is concatenated instead of averaged. + alpha: Negative slope of the LeakyReLU activation. """ super().__init__() self.num_heads = num_heads @@ -328,10 +328,10 @@ def __init__(self, c_in, c_out, num_heads=1, concat_heads=True, alpha=0.2): def forward(self, node_feats, adj_matrix, print_attn_probs=False): """ - Inputs: - node_feats - Input features of the node. Shape: [batch_size, c_in] - adj_matrix - Adjacency matrix including self-connections. Shape: [batch_size, num_nodes, num_nodes] - print_attn_probs - If True, the attention weights are printed during the forward pass + Args: + node_feats: Input features of the node. Shape: [batch_size, c_in] + adj_matrix: Adjacency matrix including self-connections. Shape: [batch_size, num_nodes, num_nodes] + print_attn_probs: If True, the attention weights are printed during the forward pass (for debugging purposes) """ batch_size, num_nodes = node_feats.size(0), node_feats.size(1) @@ -507,14 +507,14 @@ def __init__( **kwargs, ): """ - Inputs: - c_in - Dimension of input features - c_hidden - Dimension of hidden features - c_out - Dimension of the output features. Usually number of classes in classification - num_layers - Number of "hidden" graph layers - layer_name - String of the graph layer to use - dp_rate - Dropout rate to apply throughout the network - kwargs - Additional arguments for the graph layer (e.g. number of heads for GAT) + Args: + c_in: Dimension of input features + c_hidden: Dimension of hidden features + c_out: Dimension of the output features. Usually number of classes in classification + num_layers: Number of "hidden" graph layers + layer_name: String of the graph layer to use + dp_rate: Dropout rate to apply throughout the network + kwargs: Additional arguments for the graph layer (e.g. number of heads for GAT) """ super().__init__() gnn_layer = gnn_layer_by_name[layer_name] @@ -533,9 +533,9 @@ def __init__( def forward(self, x, edge_index): """ - Inputs: - x - Input features per node - edge_index - List of vertex index pairs representing the edges in the graph (PyTorch geometric notation) + Args: + x: Input features per node + edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation) """ for layer in self.layers: # For graph layers, we need to add the "edge_index" tensor as additional input @@ -560,12 +560,12 @@ class MLPModel(nn.Module): def __init__(self, c_in, c_hidden, c_out, num_layers=2, dp_rate=0.1): """ - Inputs: - c_in - Dimension of input features - c_hidden - Dimension of hidden features - c_out - Dimension of the output features. Usually number of classes in classification - num_layers - Number of hidden layers - dp_rate - Dropout rate to apply throughout the network + Args: + c_in: Dimension of input features + c_hidden: Dimension of hidden features + c_out: Dimension of the output features. Usually number of classes in classification + num_layers: Number of hidden layers + dp_rate: Dropout rate to apply throughout the network """ super().__init__() layers = [] @@ -578,8 +578,8 @@ def __init__(self, c_in, c_hidden, c_out, num_layers=2, dp_rate=0.1): def forward(self, x, *args, **kwargs): """ - Inputs: - x - Input features per node + Args: + x: Input features per node """ return self.layers(x) @@ -858,12 +858,12 @@ class GraphGNNModel(nn.Module): def __init__(self, c_in, c_hidden, c_out, dp_rate_linear=0.5, **kwargs): """ - Inputs: - c_in - Dimension of input features - c_hidden - Dimension of hidden features - c_out - Dimension of output features (usually number of classes) - dp_rate_linear - Dropout rate before the linear layer (usually much higher than inside the GNN) - kwargs - Additional arguments for the GNNModel object + Args: + c_in: Dimension of input features + c_hidden: Dimension of hidden features + c_out: Dimension of output features (usually number of classes) + dp_rate_linear: Dropout rate before the linear layer (usually much higher than inside the GNN) + kwargs: Additional arguments for the GNNModel object """ super().__init__() self.GNN = GNNModel( @@ -876,10 +876,10 @@ def __init__(self, c_in, c_hidden, c_out, dp_rate_linear=0.5, **kwargs): def forward(self, x, edge_index, batch_idx): """ - Inputs: - x - Input features per node - edge_index - List of vertex index pairs representing the edges in the graph (PyTorch geometric notation) - batch_idx - Index of batch element for each node + Args: + x: Input features per node + edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation) + batch_idx: Index of batch element for each node """ x = self.GNN(x, edge_index) x = geom_nn.global_mean_pool(x, batch_idx) # Average pooling diff --git a/course_UvA-DL/normalizing-flows/NF_image_modeling.py b/course_UvA-DL/normalizing-flows/NF_image_modeling.py index 188881966..2ef9eb727 100644 --- a/course_UvA-DL/normalizing-flows/NF_image_modeling.py +++ b/course_UvA-DL/normalizing-flows/NF_image_modeling.py @@ -258,9 +258,9 @@ class ImageFlow(pl.LightningModule): def __init__(self, flows, import_samples=8): """ - Inputs: - flows - A list of flows (each a nn.Module) that should be applied on the images. - import_samples - Number of importance samples to use during testing (see explanation below). Can be changed at any time + Args: + flows: A list of flows (each a nn.Module) that should be applied on the images. + import_samples: Number of importance samples to use during testing (see explanation below). Can be changed at any time """ super().__init__() self.flows = nn.ModuleList(flows) @@ -282,10 +282,10 @@ def encode(self, imgs): return z, ldj def _get_likelihood(self, imgs, return_ll=False): - """ - Given a batch of images, return the likelihood of those. - If return_ll is True, this function returns the log likelihood of the input. - Otherwise, the ouptut metric is bits per dimension (scaled negative log likelihood) + """Given a batch of images, return the likelihood of those. + + If return_ll is True, this function returns the log likelihood of the input. Otherwise, the ouptut metric is + bits per dimension (scaled negative log likelihood) """ z, ldj = self.encode(imgs) log_pz = self.prior.log_prob(z).sum(dim=[1, 2, 3]) @@ -297,9 +297,7 @@ def _get_likelihood(self, imgs, return_ll=False): @torch.no_grad() def sample(self, img_shape, z_init=None): - """ - Sample a batch of images from the flow. - """ + """Sample a batch of images from the flow.""" # Sample latent representation from prior if z_init is None: z = self.prior.sample(sample_shape=img_shape).to(device) @@ -401,10 +399,10 @@ class Dequantization(nn.Module): def __init__(self, alpha=1e-5, quants=256): """ - Inputs: - alpha - small constant that is used to scale the original input. + Args: + alpha: small constant that is used to scale the original input. Prevents dealing with values very close to 0 and 1 when inverting the sigmoid - quants - Number of possible discrete values (usually 256 for 8-bit image) + quants: Number of possible discrete values (usually 256 for 8-bit image) """ super().__init__() self.alpha = alpha @@ -484,9 +482,7 @@ def dequant(self, z, ldj): def visualize_dequantization(quants, prior=None): - """ - Function for visualizing the dequantization values of discrete values in continuous space - """ + """Function for visualizing the dequantization values of discrete values in continuous space.""" # Prior over discrete values. If not given, a uniform is assumed if prior is None: prior = np.ones(quants, dtype=np.float32) / quants @@ -592,9 +588,9 @@ class VariationalDequantization(Dequantization): def __init__(self, var_flows, alpha=1e-5): """ - Inputs: - var_flows - A list of flow transformations to use for modeling q(u|x) - alpha - Small constant, see Dequantization for details + Args: + var_flows: A list of flow transformations to use for modeling q(u|x) + alpha: Small constant, see Dequantization for details """ super().__init__(alpha=alpha) self.flows = nn.ModuleList(var_flows) @@ -658,14 +654,14 @@ def dequant(self, z, ldj): class CouplingLayer(nn.Module): def __init__(self, network, mask, c_in): - """ - Coupling layer inside a normalizing flow. - Inputs: - network - A PyTorch nn.Module constituting the deep neural network for mu and sigma. + """Coupling layer inside a normalizing flow. + + Args: + network: A PyTorch nn.Module constituting the deep neural network for mu and sigma. Output shape should be twice the channel size as the input. - mask - Binary mask (0 or 1) where 0 denotes that the element should be transformed, + mask: Binary mask (0 or 1) where 0 denotes that the element should be transformed, while 1 means the latent will be used as input to the NN. - c_in - Number of input channels + c_in: Number of input channels """ super().__init__() self.network = network @@ -676,12 +672,12 @@ def __init__(self, network, mask, c_in): def forward(self, z, ldj, reverse=False, orig_img=None): """ - Inputs: - z - Latent input to the flow - ldj - The current ldj of the previous flows. + Args: + z: Latent input to the flow + ldj: The current ldj of the previous flows. The ldj of this layer will be added to this tensor. - reverse - If True, we apply the inverse of the layer. - orig_img (optional) - Only needed in VarDeq. Allows external + reverse: If True, we apply the inverse of the layer. + orig_img (optional): Only needed in VarDeq. Allows external input to condition the flow on (e.g. original image) """ # Apply network to masked input @@ -794,8 +790,8 @@ def create_channel_mask(c_in, invert=False): # %% class ConcatELU(nn.Module): - """ - Activation function that applies ELU in both direction (inverted and plain). + """Activation function that applies ELU in both direction (inverted and plain). + Allows non-linearity while providing strong gradients for any input (important for final convolution) """ @@ -806,10 +802,11 @@ def forward(self, x): class LayerNormChannels(nn.Module): def __init__(self, c_in): - """ - This module applies layer norm across channels in an image. Has been shown to work well with ResNet connections. - Inputs: - c_in - Number of channels of the input + """This module applies layer norm across channels in an image. + + Has been shown to work well with ResNet connections. + Args: + c_in: Number of channels of the input """ super().__init__() self.layer_norm = nn.LayerNorm(c_in) @@ -826,9 +823,9 @@ class GatedConv(nn.Module): def __init__(self, c_in, c_hidden): """ This module applies a two-layer convolutional ResNet block with input gate - Inputs: - c_in - Number of channels of the input - c_hidden - Number of hidden dimensions we want to model (usually similar to c_in) + Args: + c_in: Number of channels of the input + c_hidden: Number of hidden dimensions we want to model (usually similar to c_in) """ super().__init__() self.net = nn.Sequential( @@ -845,13 +842,13 @@ def forward(self, x): class GatedConvNet(nn.Module): def __init__(self, c_in, c_hidden=32, c_out=-1, num_layers=3): - """ - Module that summarizes the previous blocks to a full convolutional neural network. - Inputs: - c_in - Number of input channels - c_hidden - Number of hidden dimensions to use within the network - c_out - Number of output channels. If -1, 2 times the input channels are used (affine coupling) - num_layers - Number of gated ResNet blocks to apply + """Module that summarizes the previous blocks to a full convolutional neural network. + + Args: + c_in: Number of input channels + c_hidden: Number of hidden dimensions to use within the network + c_out: Number of output channels. If -1, 2 times the input channels are used (affine coupling) + num_layers: Number of gated ResNet blocks to apply """ super().__init__() c_out = c_out if c_out > 0 else 2 * c_in @@ -1257,10 +1254,10 @@ def print_num_params(model): @torch.no_grad() def interpolate(model, img1, img2, num_steps=8): """ - Inputs: - model - object of ImageFlow class that represents the (trained) flow model - img1, img2 - Image tensors of shape [1, 28, 28]. Images between which should be interpolated. - num_steps - Number of interpolation steps. 8 interpolation steps mean 6 intermediate pictures besides img1 and img2 + Args: + model: object of ImageFlow class that represents the (trained) flow model + img1, img2: Image tensors of shape [1, 28, 28]. Images between which should be interpolated. + num_steps: Number of interpolation steps. 8 interpolation steps mean 6 intermediate pictures besides img1 and img2 """ imgs = torch.stack([img1, img2], dim=0).to(model.device) z, _ = model.encode(imgs) @@ -1331,9 +1328,9 @@ def interpolate(model, img1, img2, num_steps=8): # %% def visualize_dequant_distribution(model: ImageFlow, imgs: torch.Tensor, title: str = None): """ - Inputs: - model - The flow of which we want to visualize the dequantization distribution - imgs - Example training images of which we want to visualize the dequantization distribution + Args: + model: The flow of which we want to visualize the dequantization distribution + imgs: Example training images of which we want to visualize the dequantization distribution """ imgs = imgs.to(device) ldj = torch.zeros(imgs.shape[0], dtype=torch.float32).to(device) diff --git a/course_UvA-DL/transformers-and-MH-attention/Transformers_MHAttention.py b/course_UvA-DL/transformers-and-MH-attention/Transformers_MHAttention.py index a04e1474e..85215af77 100644 --- a/course_UvA-DL/transformers-and-MH-attention/Transformers_MHAttention.py +++ b/course_UvA-DL/transformers-and-MH-attention/Transformers_MHAttention.py @@ -459,11 +459,11 @@ class EncoderBlock(nn.Module): def __init__(self, input_dim, num_heads, dim_feedforward, dropout=0.0): """ - Inputs: - input_dim - Dimensionality of the input - num_heads - Number of heads to use in the attention block - dim_feedforward - Dimensionality of the hidden layer in the MLP - dropout - Dropout probability to use in the dropout layers + Args: + input_dim: Dimensionality of the input + num_heads: Number of heads to use in the attention block + dim_feedforward: Dimensionality of the hidden layer in the MLP + dropout: Dropout probability to use in the dropout layers """ super().__init__() @@ -568,9 +568,9 @@ class PositionalEncoding(nn.Module): def __init__(self, d_model, max_len=5000): """ - Inputs - d_model - Hidden dimensionality of the input. - max_len - Maximum length of a sequence to expect. + Args + d_model: Hidden dimensionality of the input. + max_len: Maximum length of a sequence to expect. """ super().__init__() @@ -759,17 +759,17 @@ def __init__( input_dropout=0.0 ): """ - Inputs: - input_dim - Hidden dimensionality of the input - model_dim - Hidden dimensionality to use inside the Transformer - num_classes - Number of classes to predict per sequence element - num_heads - Number of heads to use in the Multi-Head Attention blocks - num_layers - Number of encoder blocks to use. - lr - Learning rate in the optimizer - warmup - Number of warmup steps. Usually between 50 and 500 - max_iters - Number of maximum iterations the model is trained for. This is needed for the CosineWarmup scheduler - dropout - Dropout to apply inside the model - input_dropout - Dropout to apply on the input features + Args: + input_dim: Hidden dimensionality of the input + model_dim: Hidden dimensionality to use inside the Transformer + num_classes: Number of classes to predict per sequence element + num_heads: Number of heads to use in the Multi-Head Attention blocks + num_layers: Number of encoder blocks to use. + lr: Learning rate in the optimizer + warmup: Number of warmup steps. Usually between 50 and 500 + max_iters: Number of maximum iterations the model is trained for. This is needed for the CosineWarmup scheduler + dropout: Dropout to apply inside the model + input_dropout: Dropout to apply on the input features """ super().__init__() self.save_hyperparameters() @@ -799,10 +799,10 @@ def _create_model(self): def forward(self, x, mask=None, add_positional_encoding=True): """ - Inputs: - x - Input features of shape [Batch, SeqLen, input_dim] - mask - Mask to apply on the attention outputs (optional) - add_positional_encoding - If True, we add the positional encoding to the input. + Args: + x: Input features of shape [Batch, SeqLen, input_dim] + mask: Mask to apply on the attention outputs (optional) + add_positional_encoding: If True, we add the positional encoding to the input. Might not be desired for some tasks. """ x = self.input_net(x) @@ -814,8 +814,8 @@ def forward(self, x, mask=None, add_positional_encoding=True): @torch.no_grad() def get_attention_maps(self, x, mask=None, add_positional_encoding=True): - """ - Function for extracting the attention matrices of the whole Transformer for a single batch. + """Function for extracting the attention matrices of the whole Transformer for a single batch. + Input arguments same as the forward pass. """ x = self.input_net(x) @@ -1280,11 +1280,11 @@ class SetAnomalyDataset(data.Dataset): def __init__(self, img_feats, labels, set_size=10, train=True): """ - Inputs: - img_feats - Tensor of shape [num_imgs, img_dim]. Represents the high-level features. - labels - Tensor of shape [num_imgs], containing the class labels for the images - set_size - Number of elements in a set. N-1 are sampled from one class, and one from another one. - train - If True, a new set will be sampled every time __getitem__ is called. + Args: + img_feats: Tensor of shape [num_imgs, img_dim]. Represents the high-level features. + labels: Tensor of shape [num_imgs], containing the class labels for the images + set_size: Number of elements in a set. N-1 are sampled from one class, and one from another one. + train: If True, a new set will be sampled every time __getitem__ is called. """ super().__init__() self.img_feats = img_feats @@ -1309,8 +1309,8 @@ def _create_test_sets(self): return test_sets def sample_img_set(self, anomaly_label): - """ - Samples a new set of images, given the label of the anomaly. + """Samples a new set of images, given the label of the anomaly. + The sampled images come from a different class than anomaly_label """ # Sample class from 0,...,num_classes-1 while skipping anomaly_label as class diff --git a/lightning_examples/reinforce-learning-DQN/dqn.py b/lightning_examples/reinforce-learning-DQN/dqn.py index 26176d00d..112916da3 100644 --- a/lightning_examples/reinforce-learning-DQN/dqn.py +++ b/lightning_examples/reinforce-learning-DQN/dqn.py @@ -7,6 +7,7 @@ import numpy as np import torch from pytorch_lightning import LightningModule, Trainer +from pytorch_lightning.utilities import DistributedType from torch import nn, Tensor from torch.optim import Adam, Optimizer from torch.utils.data import DataLoader @@ -18,9 +19,7 @@ # %% class DQN(nn.Module): - """ - Simple MLP network - """ + """Simple MLP network.""" def __init__(self, obs_size: int, n_actions: int, hidden_size: int = 128): """ @@ -54,8 +53,7 @@ def forward(self, x): # %% class ReplayBuffer: - """ - Replay Buffer for storing past experiences allowing the agent to learn from them + """Replay Buffer for storing past experiences allowing the agent to learn from them. Args: capacity: size of the buffer @@ -68,8 +66,7 @@ def __len__(self) -> None: return len(self.buffer) def append(self, experience: Experience) -> None: - """ - Add experience to the buffer + """Add experience to the buffer. Args: experience: tuple (state, action, reward, done, new_state) @@ -91,9 +88,7 @@ def sample(self, batch_size: int) -> Tuple: # %% class RLDataset(IterableDataset): - """ - Iterable Dataset containing the ExperienceBuffer - which will be updated with new experiences during training + """Iterable Dataset containing the ExperienceBuffer which will be updated with new experiences during training. Args: buffer: replay buffer @@ -116,9 +111,7 @@ def __iter__(self) -> Tuple: # %% class Agent: - """ - Base Agent class handeling the interaction with the environment - """ + """Base Agent class handeling the interaction with the environment.""" def __init__(self, env: gym.Env, replay_buffer: ReplayBuffer) -> None: """ @@ -132,12 +125,11 @@ def __init__(self, env: gym.Env, replay_buffer: ReplayBuffer) -> None: self.state = self.env.reset() def reset(self) -> None: - """ Resents the environment and updates the state""" + """Resents the environment and updates the state.""" self.state = self.env.reset() def get_action(self, net: nn.Module, epsilon: float, device: str) -> int: - """Using the given network, decide what action to carry out - using an epsilon-greedy policy + """Using the given network, decide what action to carry out using an epsilon-greedy policy. Args: net: DQN network @@ -168,7 +160,7 @@ def play_step( epsilon: float = 0.0, device: str = 'cpu', ) -> Tuple[float, bool]: - """Carries out a single interaction step between the agent and the environment + """Carries out a single interaction step between the agent and the environment. Args: net: DQN network @@ -200,7 +192,7 @@ def play_step( # %% class DQNLightning(LightningModule): - """ Basic DQN Model """ + """Basic DQN Model.""" def __init__( self, @@ -249,9 +241,8 @@ def __init__( self.populate(self.hparams.warm_start_steps) def populate(self, steps: int = 1000) -> None: - """ - Carries out several random steps through the environment to initially fill - up the replay buffer with experiences + """Carries out several random steps through the environment to initially fill up the replay buffer with + experiences. Args: steps: number of random steps to populate the buffer with @@ -260,8 +251,7 @@ def populate(self, steps: int = 1000) -> None: self.agent.play_step(self.net, epsilon=1.0) def forward(self, x: Tensor) -> Tensor: - """ - Passes in a state x through the network and gets the q_values of each action as an output + """Passes in a state x through the network and gets the q_values of each action as an output. Args: x: environment state @@ -273,8 +263,7 @@ def forward(self, x: Tensor) -> Tensor: return output def dqn_mse_loss(self, batch: Tuple[Tensor, Tensor]) -> Tensor: - """ - Calculates the mse loss using a mini batch from the replay buffer + """Calculates the mse loss using a mini batch from the replay buffer. Args: batch: current mini batch of replay data @@ -296,9 +285,8 @@ def dqn_mse_loss(self, batch: Tuple[Tensor, Tensor]) -> Tensor: return nn.MSELoss()(state_action_values, expected_state_action_values) def training_step(self, batch: Tuple[Tensor, Tensor], nb_batch) -> OrderedDict: - """ - Carries out a single step through the environment to update the replay buffer. - Then calculates loss based on the minibatch recieved + """Carries out a single step through the environment to update the replay buffer. Then calculates loss + based on the minibatch recieved. Args: batch: current mini batch of replay data @@ -320,7 +308,7 @@ def training_step(self, batch: Tuple[Tensor, Tensor], nb_batch) -> OrderedDict: # calculates training loss loss = self.dqn_mse_loss(batch) - if self.trainer.use_dp or self.trainer.use_ddp2: + if self.trainer._distrib_type in {DistributedType.DP, DistributedType.DDP2}: loss = loss.unsqueeze(0) if done: @@ -344,12 +332,12 @@ def training_step(self, batch: Tuple[Tensor, Tensor], nb_batch) -> OrderedDict: return OrderedDict({'loss': loss, 'log': log, 'progress_bar': status}) def configure_optimizers(self) -> List[Optimizer]: - """ Initialize Adam optimizer""" + """Initialize Adam optimizer.""" optimizer = Adam(self.net.parameters(), lr=self.hparams.lr) return [optimizer] def __dataloader(self) -> DataLoader: - """Initialize the Replay Buffer dataset used for retrieving experiences""" + """Initialize the Replay Buffer dataset used for retrieving experiences.""" dataset = RLDataset(self.buffer, self.hparams.episode_length) dataloader = DataLoader( dataset=dataset, @@ -358,11 +346,11 @@ def __dataloader(self) -> DataLoader: return dataloader def train_dataloader(self) -> DataLoader: - """Get train loader""" + """Get train loader.""" return self.__dataloader() def get_device(self, batch) -> str: - """Retrieve device currently being used by minibatch""" + """Retrieve device currently being used by minibatch.""" return batch[0].device.index if self.on_gpu else 'cpu'