In [7]:
import torch.nn.functional as F
import torch

##### CLASSES
    Tensor

##### FUNCTIONS
    _adaptive_max_pool1d
    _adaptive_max_pool2d
    _adaptive_max_pool3d
    _add_docstr
    _fractional_max_pool2d
    _fractional_max_pool3d
    _get_softmax_dim
    _in_projection
    _in_projection_packed
    _infer_size
    _list_with_default
    _max_pool1d
    _max_pool2d
    _max_pool3d
    _no_grad_embedding_renorm_
    _overload
    _pad
    _pad_circular
    _pair
    _scaled_dot_product_attention
    _single
    _threshold
    _triple
    _unpool_output_size
    _verify_batch_size
    _verify_spatial_size
    adaptive_avg_pool1d
    adaptive_avg_pool2d
    adaptive_avg_pool3d
    adaptive_max_pool1d
    adaptive_max_pool1d_with_indices
    adaptive_max_pool2d
    adaptive_max_pool2d_with_indices
    adaptive_max_pool3d
    adaptive_max_pool3d_with_indices
    affine_grid
    alpha_dropout
    assert_int_or_pair
    avg_pool1d
    avg_pool2d
    avg_pool3d
    batch_norm
    bilinear
    binary_cross_entropy
    binary_cross_entropy_with_logits
    boolean_dispatch
    celu
    celu_
    channel_shuffle
    conv1d
    conv2d
    conv3d
    conv_tbc
    conv_transpose1d
    conv_transpose2d
    conv_transpose3d
    cosine_embedding_loss
    cosine_similarity
    cross_entropy
    ctc_loss
    dropout
    dropout2d
    dropout3d
    elu
    elu_
    embedding
    embedding_bag
    feature_alpha_dropout
    fold
    fractional_max_pool2d
    fractional_max_pool2d_with_indices
    fractional_max_pool3d
    fractional_max_pool3d_with_indices
    gaussian_nll_loss
    gelu
    glu
    grid_sample
    group_norm
    gumbel_softmax
    handle_torch_function
    hardshrink
    hardsigmoid
    hardswish
    hardtanh
    hardtanh_
    has_torch_function
    has_torch_function_unary
    has_torch_function_variadic
    hinge_embedding_loss
    huber_loss
    instance_norm
    interpolate
    kl_div
    l1_loss
    layer_norm
    leaky_relu
    leaky_relu_
    linear
    local_response_norm
    log_softmax
    logsigmoid
    lp_pool1d
    lp_pool2d
    margin_ranking_loss
    max_pool1d
    max_pool1d_with_indices
    max_pool2d
    max_pool2d_with_indices
    max_pool3d
    max_pool3d_with_indices
    max_unpool1d
    max_unpool2d
    max_unpool3d
    mish
    mse_loss
    multi_head_attention_forward
    multi_margin_loss
    multilabel_margin_loss
    multilabel_soft_margin_loss
    nll_loss
    normalize
    one_hot
    pad
    pairwise_distance
    pdist
    pixel_shuffle
    pixel_unshuffle
    poisson_nll_loss
    prelu
    relu
    relu6
    relu_
    rrelu
    rrelu_
    selu
    selu_
    sigmoid
    silu
    smooth_l1_loss
    soft_margin_loss
    softmax
    softmin
    softplus
    softshrink
    softsign
    tanh
    tanhshrink
    threshold
    threshold_
    triplet_margin_loss
    triplet_margin_with_distance_loss
    unfold
    upsample
    upsample_bilinear
    upsample_nearest

##### DATA
    BroadcastingList1 = <torch._jit_internal.BroadcastingListCls object>
    BroadcastingList2 = <torch._jit_internal.BroadcastingListCls object>
    BroadcastingList3 = <torch._jit_internal.BroadcastingListCls object>
    Callable = typing.Callable
    GRID_SAMPLE_INTERPOLATION_MODES = {'bicubic': 2, 'bilinear': 0, ...
    GRID_SAMPLE_PADDING_MODES = {'border': 1, 'reflection': 2, 'zeros': 0}
    List = typing.List
    Optional = typing.Optional
    Tuple = typing.Tuple
    reproducibility_notes = {'backward_reproducibility_note': 'This...
    tf32_notes = {'tf32_note': 'This operator supports :ref:...

# F.avg_pool2d()
```python
avg_pool2d(
    input,
    kernel_size,
    stride=None,
    padding=0,
    ceil_mode=False,
    count_include_pad=True,
    divisor_override=None
) -> Tensor
```
**Docstring**

以步长为 $S_h \times S_w$ 对一个 2 维特征图的每个 $k_h \times k_w$ 区域进行平均池化，输出特征图数量与输入特征图数量相等，更多细节参见`torch.nn.AvgPool2d`

**Args**:

- input: 形状为 $(N_i \times C_i \times H_i \times W_i)$ 的张量

- kernel_size: 可以为单一的整数，或为元祖`(k_h, k_w)`

- stride: 可以为单一的整数，或为元祖`(s_h, s_w)`，默认与`kernel_size`相同

- padding: 可以为单一的整数，或为元祖`(pad_h, pad_w)`，默认为 0

- ceil_mode: 决定在步长不为一时输出特征图的形状，True 时进行向上取整，否则进行向下取整；默认为 False

- count_include_pad: True 时平均池化的运算会将边界扩充的元素考虑在内，默认 True

- divisor_override: 指明时将使用该参数作为除数（即分母），否则会使用池化区域的大小

**Type**:      builtin_function_or_method

# 

# 

## F.conv2d()
```python
F.conv2d(
    input,
    weight,
    bias=None,
    stride=1,
    padding=0,
    dilation=1,
    groups=1
)
```

对输入进行 2D 卷积，该算子支持`TensorFloat32<tf32_on_ampere>`；更多细节参见`torch.nn.Conv2d`的说明文档；

需要说明的是，对于需要使用CUDA的CuDNN backend 的情况，该操作可能会选择一个具有不确定性的算法以提高性能；若需要算法保持稳定，可设置`torch.backends.cudnn.deterministic =True`，但这同时可能会损失一定的性能；更多背景知识可以参见`/notes/randomness`

##### Args:
- input: input tensor of shape $(\text{minibatch} , \text{in\_channels} , iH , iW)$
- weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kH , kW)`
- bias: optional bias tensor of shape :math:`(\text{out\_channels})`. Default: `None`
- stride: the stride of the convolving kernel. Can be a single number or a tuple `(sH, sW)`. Default: 1
- padding: implicit paddings on both sides of the input. Can be a string {'valid', 'same'}, single number or a tuple `(padH, padW)`. Default: 0
    - `padding='valid'` is the same as no padding.
    - `padding='same'` pads the input so the output has the shape as the input. However, this mode doesn't support any stride values other than 1.
    - .. warning::
          For ``padding='same'``, if the ``weight`` is even-length and ``dilation`` is odd in any dimension, a full :func:`pad` operation may be needed internally. Lowering performance.
- dilation: the spacing between kernel elements. Can be a single number or a tuple `(dH, dW)`. Default: 1
- groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the number of groups. Default: 1

##### Examples

In [None]:
>>> # With square kernels and equal stride
>>> filters = torch.randn(8, 4, 3, 3)
>>> inputs = torch.randn(1, 4, 5, 5)
>>> F.conv2d(inputs, filters, padding=1)

#  

# F.pad()
`F.pad(input, pad, mode='constant', value=0)`

对一个张量进行扩充。

需要说明的是，对于需要使用CUDA的CuDNN backend 的情况，该操作可能会选择一个具有不确定性的算法以提高性能；若需要算法保持稳定，可设置`torch.backends.cudnn.deterministic =True`，但这同时可能会损失一定的性能；更多背景知识可以参见`/notes/randomness`

**Args**:

- input: 略
- pad: 对张量各维度扩充的元素个数，应为含有 2m 个元素的元祖，此时`input`最后 m 维将会被填充；当 m 为奇数时向下取整；例如只填充`input`最后一维，则`pad`形状应为 $(N_{left}, N_{right})$，最后两维则 $(N_{left}, N_{right}, N_{top}, N_{bottom})$，最后三维则 $(N_{left}, N_{right}, N_{top}, N_{bottom}, N_{front}, N_{back})$
- mode: 可以是`'constant'`, `'reflect'`, `'replicate'` or `'circular'`，默认`'constant'`；
    - `constant`模式可以对任意维度的张量进行填充；
    - `replicate`模式对 5D 张量的最后 3 个维度进行填充，对 4D 张量最后 2 个维度进行填充，对 3D 张量最后 1 个维度进行填充；
    - `reflect`模式只能对 4D 张量最后 2 个维度进行填充，对 3D 张量最后 1 个维度进行填充；
    - Tensor values at the beginning are used to pad the end, and values at the
    end are used to pad the beginning. For example, consider a single dimension
    with values [0, 1, 2, 3]. With circular padding of (1, 1) it would be
    padded to [3, 0, 1, 2, 3, 0], and with padding (1, 2) it would be padded to
    [3, 0, 1, 2, 3, 0, 1]. If negative padding is applied then the ends of the
    tensor get removed. With circular padding of (-1, -1) the previous example
    would become [1, 2]. Circular padding of (-1, 1) would produce
    [1, 2, 3, 1]
    - 关于各种扩充机制详见`torch.nn.ConstantPad2d`、`torch.nn.ReflectionPad2d`、`torch.nn.ReplicationPad2d`；

- value: ``'constant'``模式下填充的值，默认 0

##### Examples
1. 在张量左侧扩充 0 列、右侧扩充 1 列、上部扩充 2 行、下部 1 行：

In [16]:
x = torch.arange(1, 2*3*3*2+1).reshape([2, 3, 3, 2])
out = F.pad(x, (0, 1, 2, 1), "constant", 0)
print(out[0, 0])

tensor([[0, 0, 0],
        [0, 0, 0],
        [1, 2, 0],
        [3, 4, 0],
        [5, 6, 0],
        [0, 0, 0]])


2. 在张量左侧扩充 0 列、右侧扩充 1 列、上部扩充 2 行、下部 1 行、前后各扩充 1 块：

In [17]:
out = F.pad(x, (0, 1, 2, 1, 1, 1), "constant", 0)
print(out[0])

tensor([[[ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0]],

        [[ 0,  0,  0],
         [ 0,  0,  0],
         [ 1,  2,  0],
         [ 3,  4,  0],
         [ 5,  6,  0],
         [ 0,  0,  0]],

        [[ 0,  0,  0],
         [ 0,  0,  0],
         [ 7,  8,  0],
         [ 9, 10,  0],
         [11, 12,  0],
         [ 0,  0,  0]],

        [[ 0,  0,  0],
         [ 0,  0,  0],
         [13, 14,  0],
         [15, 16,  0],
         [17, 18,  0],
         [ 0,  0,  0]],

        [[ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0]]])


3. 对 ResNet 残差层的 pad 方案的实现

In [3]:
channels = 8
def lambdaLayer(lambda_, t):
    return lambda_(t)

out = lambdaLayer(
    lambda y: F.pad(
        y[:, :, ::2, ::2],
        (0, 0, 0, 0, channels//4, channels//4),
        "constant", 0),
    torch.ones(1, 3, 8, 4)
)

print(out.shape)
print(out)

torch.Size([1, 7, 4, 2])
tensor([[[[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]],

         [[1., 1.],
          [1., 1.],
          [1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.],
          [1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.],
          [1., 1.],
          [1., 1.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]]]])


In [33]:
canvas = torch.arange(3*4, dtype=float).reshape(1, 1, 3, 4)
F.pad(canvas, (1,1,1,1), "circular")

tensor([[[[11.,  8.,  9., 10., 11.,  8.],
          [ 3.,  0.,  1.,  2.,  3.,  0.],
          [ 7.,  4.,  5.,  6.,  7.,  4.],
          [11.,  8.,  9., 10., 11.,  8.],
          [ 3.,  0.,  1.,  2.,  3.,  0.]]]], dtype=torch.float64)