From 07bcdccdfc99f56db82ec58caada24b75cd18967 Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Mon, 5 Dec 2022 09:39:57 +0000 Subject: [PATCH 01/15] move BatchNorm from flud.dygraph.nn to paddle.nn.layer.norm --- .../slim/tests/test_imperative_skip_op.py | 1 + python/paddle/fluid/dygraph/nn.py | 306 ----------------- .../unittests/dygraph_to_static/darknet.py | 2 +- .../dygraph_to_static/test_cycle_gan.py | 4 +- .../dygraph_to_static/test_resnet.py | 3 +- .../dygraph_to_static/test_se_resnet.py | 3 +- .../unittests/dygraph_to_static/test_tsm.py | 3 +- .../unittests/mlu/test_batch_norm_op_mlu.py | 8 +- .../mlu/test_batch_norm_op_mlu_v2.py | 8 +- .../unittests/npu/test_batch_norm_op_npu.py | 4 +- .../tests/unittests/test_batch_norm_op_v2.py | 13 +- .../test_imperative_load_static_param.py | 2 +- .../test_imperative_ocr_attention_model.py | 2 +- .../tests/unittests/test_imperative_resnet.py | 3 +- .../unittests/test_imperative_se_resnext.py | 3 +- .../unittests/xpu/test_batch_norm_op_xpu.py | 3 +- python/paddle/nn/layer/norm.py | 310 +++++++++++++++++- 17 files changed, 343 insertions(+), 335 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py index 131866095ad7b..3b19ee76c3554 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py @@ -26,6 +26,7 @@ from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6 from paddle.nn import Linear, Conv2D, Softmax, BatchNorm +from paddle.nn.norm import BatchNorm from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.log_helper import get_logger diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 4c8b9d7f555f1..9cced53b6edd9 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -52,7 +52,6 @@ 'Conv3D', 'Pool2D', 'Linear', - 'BatchNorm', 'Dropout', 'Embedding', 'GRUUnit', @@ -1031,311 +1030,6 @@ def forward(self, input): return instance_norm_out -class BatchNorm(layers.Layer): - r""" - - This interface is used to construct a callable object of the ``BatchNorm`` class. - For more details, refer to code examples. - It implements the function of the Batch Normalization Layer and can be used - as a normalizer function for conv2d and fully connected operations. - The data is normalized by the mean and variance of the channel based on the current batch data. - Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing - Internal Covariate Shift `_ - for more details. - - When use_global_stats = False, the :math:`\mu_{\beta}` - and :math:`\sigma_{\beta}^{2}` are the statistics of one mini-batch. - Calculated as follows: - - .. math:: - - \mu_{\beta} &\gets \frac{1}{m} \sum_{i=1}^{m} x_i \qquad & - //\ mini-batch\ mean \\ - \sigma_{\beta}^{2} &\gets \frac{1}{m} \sum_{i=1}^{m}(x_i - \mu_{\beta})^2 \qquad & - //\ mini-batch\ variance \\ - - - :math:`x` : mini-batch data - - :math:`m` : the size of the mini-batch data - - When use_global_stats = True, the :math:`\\mu_{\\beta}` - and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch. - They are global or running statistics (moving_mean and moving_variance). It usually got from the - pre-trained model. Calculated as follows: - - .. math:: - moving\_mean = moving\_mean * momentum + \mu_{\beta} * (1. - momentum) \quad &// global mean \\ - moving\_variance = moving\_variance * momentum + \sigma_{\beta}^{2} * (1. - momentum) \quad &// global variance \\ - - The normalization function formula is as follows: - - .. math:: - - \hat{x_i} &\gets \frac{x_i - \mu_\beta} {\sqrt{\ - \sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\ - y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift - - - - :math:`\epsilon` : add a smaller value to the variance to prevent division by zero - - :math:`\gamma` : trainable proportional parameter - - :math:`\beta` : trainable deviation parameter - - Parameters: - num_channels(int): Indicate the number of channels of the input ``Tensor``. - act(str, optional): Activation to be applied to the output of batch normalization. Default: None. - is_test (bool, optional): A flag indicating whether it is in test phrase or not. - This flag only has effect on static graph mode. For dygraph mode, please use ``eval()``. - Default: False. - momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9. - epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5. - param_attr(ParamAttr, optional): The parameter attribute for Parameter `scale` - of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm - will create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr(ParamAttr, optional): The parameter attribute for the bias of batch_norm. - If it is set to None or one attribute of ParamAttr, batch_norm - will create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - dtype(str, optional): Indicate the data type of the input ``Tensor``, - which can be float32 or float64. Default: float32. - data_layout(str, optional): Specify the input data format, the data format can be "NCHW" or "NHWC". Default: NCHW. - in_place(bool, optional): Make the input and output of batch norm reuse memory. Default: False. - moving_mean_name(str, optional): The name of moving_mean which store the global Mean. Default: None. - moving_variance_name(str, optional): The name of the moving_variance which store the global Variance. Default: None. - do_model_average_for_mean_and_var(bool, optional): Whether parameter mean and variance should do model - average when model average is enabled. Default: True. - use_global_stats(bool, optional): Whether to use global mean and - variance. In inference or test mode, set use_global_stats to true - or is_test to true, and the behavior is equivalent. - In train mode, when setting use_global_stats True, the global mean - and variance are also used during train period. Default: False. - trainable_statistics(bool, optional): Whether to calculate mean and var in eval mode. In eval mode, when - setting trainable_statistics True, mean and variance will be calculated by current batch statistics. - Default: False. - - Returns: - None - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - from paddle.fluid.dygraph.base import to_variable - import numpy as np - - x = np.random.random(size=(3, 10, 3, 7)).astype('float32') - with fluid.dygraph.guard(): - x = to_variable(x) - batch_norm = fluid.BatchNorm(10) - hidden1 = batch_norm(x) - """ - - def __init__( - self, - num_channels, - act=None, - is_test=False, - momentum=0.9, - epsilon=1e-05, - param_attr=None, - bias_attr=None, - dtype='float32', - data_layout='NCHW', - in_place=False, - moving_mean_name=None, - moving_variance_name=None, - do_model_average_for_mean_and_var=True, - use_global_stats=False, - trainable_statistics=False, - ): - super().__init__() - self._param_attr = param_attr - self._bias_attr = bias_attr - self._act = act - self._use_mkldnn = _global_flags()["FLAGS_use_mkldnn"] - - assert ( - bias_attr is not False - ), "bias_attr should not be False in batch_norm." - - if dtype == "float16": - self._dtype = "float32" - else: - self._dtype = dtype - - param_shape = [num_channels] - - # create parameter - self.weight = self.create_parameter( - attr=self._param_attr, - shape=param_shape, - dtype=self._dtype, - default_initializer=Constant(1.0), - ) - self.weight.stop_gradient = ( - use_global_stats and self._param_attr.learning_rate == 0.0 - ) - - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=param_shape, - dtype=self._dtype, - is_bias=True, - ) - self.bias.stop_gradient = ( - use_global_stats and self._param_attr.learning_rate == 0.0 - ) - - self._mean = self.create_parameter( - attr=ParamAttr( - name=moving_mean_name, - initializer=Constant(0.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var, - ), - shape=param_shape, - dtype=self._dtype, - ) - self._mean.stop_gradient = True - - self._variance = self.create_parameter( - attr=ParamAttr( - name=moving_variance_name, - initializer=Constant(1.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var, - ), - shape=param_shape, - dtype=self._dtype, - ) - self._variance.stop_gradient = True - - self._in_place = in_place - self._data_layout = data_layout - self._momentum = momentum - self._epsilon = epsilon - self._is_test = is_test - self._fuse_with_relu = False - self._use_global_stats = use_global_stats - self._trainable_statistics = trainable_statistics - - def forward(self, input): - # create output - # mean and mean_out share the same memory - mean_out = self._mean - # variance and variance out share the same memory - variance_out = self._variance - - if _non_static_mode(): - if in_dygraph_mode(): - batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( - input, - self._mean, - self._variance, - self.weight, - self.bias, - not self.training, - self._momentum, - self._epsilon, - self._data_layout, - self._use_global_stats, - self._trainable_statistics, - ) - return dygraph_utils._append_activation_in_dygraph( - batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn - ) - - elif _in_legacy_dygraph(): - attrs = ( - "momentum", - self._momentum, - "epsilon", - self._epsilon, - "is_test", - not self.training, - "data_layout", - self._data_layout, - "use_mkldnn", - self._use_mkldnn, - "fuse_with_relu", - self._fuse_with_relu, - "use_global_stats", - self._use_global_stats, - 'trainable_statistics', - self._trainable_statistics, - ) - batch_norm_out, _, _, _, _, _ = _legacy_C_ops.batch_norm( - input, - self.weight, - self.bias, - self._mean, - self._variance, - None, - mean_out, - variance_out, - *attrs - ) - - return dygraph_utils._append_activation_in_dygraph( - batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn - ) - - check_variable_and_dtype( - input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm' - ) - - attrs = { - "momentum": self._momentum, - "epsilon": self._epsilon, - "is_test": self._is_test, - "data_layout": self._data_layout, - "use_mkldnn": False, - "fuse_with_relu": self._fuse_with_relu, - "use_global_stats": self._use_global_stats, - "trainable_statistics": self._trainable_statistics, - } - - inputs = { - "X": [input], - "Scale": [self.weight], - "Bias": [self.bias], - "Mean": [self._mean], - "Variance": [self._variance], - } - - saved_mean = self._helper.create_variable_for_type_inference( - dtype=self._dtype, stop_gradient=True - ) - saved_variance = self._helper.create_variable_for_type_inference( - dtype=self._dtype, stop_gradient=True - ) - reserve_space = self._helper.create_variable_for_type_inference( - dtype=self._helper.input_dtype(input), stop_gradient=True - ) - - batch_norm_out = ( - input - if self._in_place - else self._helper.create_variable_for_type_inference(self._dtype) - ) - - outputs = { - "Y": [batch_norm_out], - "MeanOut": [mean_out], - "VarianceOut": [variance_out], - "SavedMean": [saved_mean], - "SavedVariance": [saved_variance], - } - if reserve_space is not None: - outputs["ReserveSpace"] = [reserve_space] - - self._helper.append_op( - type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs - ) - - # Currently, we don't support inplace in dygraph mode - return self._helper.append_activation(batch_norm_out, self._act) - - class Dropout(layers.Layer): """ This interface is used to construct a callable object of the ``Dropout`` class. diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py index b1cb22c57008d..1b6879e589df9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py @@ -17,7 +17,7 @@ from paddle.fluid.param_attr import ParamAttr from paddle.fluid.regularizer import L2Decay -from paddle.fluid.dygraph.nn import BatchNorm +from paddle.nn.norm import BatchNorm class ConvBNLayer(fluid.dygraph.Layer): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py index ab79a05796de4..9de875c19f3ef 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py @@ -37,7 +37,9 @@ import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import to_variable, declarative, ProgramTranslator -from paddle.fluid.dygraph.nn import Conv2DTranspose, BatchNorm +from paddle.fluid.dygraph.nn import Conv2DTranspose +from paddle.nn.layer.norm import BatchNorm + # Note: Set True to eliminate randomness. # 1. For one operation, cuDNN has several algorithms, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py index 594a3fa71f894..91a0ec083d1d2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py @@ -23,7 +23,8 @@ import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import ProgramTranslator -from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D +from paddle.fluid.dygraph.nn import Linear, Pool2D +from paddle.nn.layer.norm import BatchNorm from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from predictor_utils import PredictorTools diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index ea87ba5ba68fd..6bfed0f1798d3 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -23,7 +23,8 @@ import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D +from paddle.fluid.dygraph.nn import Linear, Pool2D +from paddle.nn.layer.norm import BatchNorm from paddle.fluid.dygraph import declarative from paddle.fluid.dygraph import ProgramTranslator from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py index cc307e5a7bb16..4f8b5c4e872cc 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py @@ -21,7 +21,8 @@ import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import declarative, ProgramTranslator, to_variable -from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D +from paddle.fluid.dygraph.nn import Linear, Pool2D +from paddle.nn.layer.norm import BatchNorm from tsm_config_utils import merge_configs, parse_config, print_configs random.seed(0) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py index 53b78e18f8861..25c83440a030d 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py @@ -753,7 +753,7 @@ def test_errors(self): class TestDygraphBatchNormAPIError(unittest.TestCase): def test_errors(self): with program_guard(Program(), Program()): - batch_norm = fluid.dygraph.BatchNorm(10) + batch_norm = nn.layer.norm.BatchNorm(10) # the input of BatchNorm must be Variable. x1 = fluid.create_lod_tensor( np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace() @@ -776,7 +776,7 @@ def test_dygraph(self): def compute(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -799,7 +799,7 @@ def test_static(self): def compute(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -824,7 +824,7 @@ def test_reservespace(self): x = fluid.data(name='x', shape=x.shape, dtype=x.dtype) # Set this FLAG, the BatchNorm API will pass "reserve_space" argument into batch_norm op. os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = '1' - batch_norm = fluid.dygraph.BatchNorm(7, data_layout="NHWC") + batch_norm = nn.layer.norm.BatchNorm(7, data_layout="NHWC") hidden1 = batch_norm(x) os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = '0' diff --git a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py index 72e7ac89caf36..c3e9a042233bd 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py @@ -95,7 +95,7 @@ def test_dygraph(self): def compute_v1(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -111,7 +111,7 @@ def compute_v2(x): def compute_v3(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm( shape[1], is_test=is_test, param_attr=fluid.ParamAttr( @@ -153,7 +153,7 @@ def test_static(self): def compute_v1(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -260,7 +260,7 @@ def test_global_stats(self): for p in self.places: with fluid.dygraph.guard(p): x = paddle.randn([2, 6, 6, 4]) - net1 = paddle.fluid.dygraph.BatchNorm( + net1 = paddle.nn.layer.norm( 6, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py index e39506eed7a9b..2eeacffe27058 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py @@ -562,7 +562,7 @@ def test_dygraph(self): def compute(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -583,7 +583,7 @@ def test_static(self): def compute(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py index 614e058c6dead..0aa8f4cfa3c76 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py @@ -16,6 +16,7 @@ import numpy as np import paddle.fluid.core as core import paddle.fluid as fluid +import paddle.nn as nn from paddle.fluid.framework import _test_eager_guard import paddle.fluid as fluid from paddle.fluid import Program, program_guard @@ -81,7 +82,7 @@ def error3d(): def test_large_batch(self): def compute_baseline(x): with fluid.dygraph.guard(p): - bn = fluid.dygraph.BatchNorm(shape[1]) + bn = nn.layer.norm.BatchNorm(shape[1]) x1 = paddle.to_tensor(x) x1.stop_gradient = False y = bn(x1) @@ -127,7 +128,7 @@ def test_eager_api(self): def compute_v1(x): with fluid.dygraph.guard(p): - bn = fluid.dygraph.BatchNorm(shape[1]) + bn = nn.layer.norm.BatchNorm() # bn = paddle.nn.BatchNorm2D(shape[1]) x1 = paddle.to_tensor(x) x1.stop_gradient = False @@ -161,7 +162,7 @@ def test_dygraph(self): def compute_v1(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -182,7 +183,7 @@ def compute_v2(x): def compute_v3(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm.BatchNorm( shape[1], is_test=is_test, param_attr=fluid.ParamAttr( @@ -224,7 +225,7 @@ def test_static(self): def compute_v1(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -378,7 +379,7 @@ def test_global_stats(self): for p in self.places: with fluid.dygraph.guard(p): x = paddle.randn([2, 6, 6, 4]) - net1 = paddle.fluid.dygraph.BatchNorm( + net1 = nn.layer.norm.BatchNorm( 6, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.0) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py index c0f6badfe2c97..1277b9f89d364 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py @@ -17,7 +17,6 @@ import paddle.fluid as fluid import paddle.fluid.framework as framework from paddle.fluid.dygraph.nn import ( - BatchNorm, Conv3D, Embedding, GroupNorm, @@ -26,6 +25,7 @@ NCE, PRelu, ) +from paddle.nn.layer.norm import BatchNorm import numpy as np import os import tempfile diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 08a32aeaa9971..9a63c5ce700e6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -20,10 +20,10 @@ from paddle.fluid.dygraph.nn import ( Pool2D, Linear, - BatchNorm, Embedding, GRUUnit, ) +from paddle.nn.layer.norm import BatchNorm from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope from paddle.fluid.framework import _test_eager_guard diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index 328245ab9c935..12abd58d8eda6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -19,7 +19,8 @@ import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid import Pool2D, BatchNorm, Linear +from paddle.fluid import Pool2D, Linear +from paddle.nn.layer.norm import BatchNorm from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope from utils import DyGraphProgramDescTracerTestHelper, is_equal_program diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py index a0518f7ba7b43..9ec2dc431f58f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py @@ -19,7 +19,8 @@ import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Pool2D, BatchNorm, Linear +from paddle.fluid.dygraph.nn import Pool2D, Linear +from paddle.nn.layer.norm import BatchNorm from test_imperative_base import new_program_scope from paddle.fluid.framework import _test_eager_guard diff --git a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py index bf90dfd870584..bdcf2f5927330 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py @@ -20,6 +20,7 @@ import paddle.fluid.core as core import paddle import paddle.fluid as fluid +import paddle.nn as nn import paddle.nn.functional as F from xpu.get_test_cover_info import ( create_test_class, @@ -364,7 +365,7 @@ def test_global_stats(self): for p in self.places: with fluid.dygraph.guard(p): x = paddle.randn([2, 6, 6, 4]) - net1 = paddle.fluid.dygraph.BatchNorm( + net1 = nn.layer.norm.BatchNorm( 6, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.0) diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 01fd204cab9a4..769c9e13900f0 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -27,17 +27,17 @@ # TODO: define normalization api -from ...fluid.dygraph import BatchNorm # noqa: F401 + from ...fluid.dygraph import SpectralNorm # noqa: F401 from ...framework import get_default_dtype - +from ...fluid import dygraph_utils from ..initializer import Constant from ...framework import ParamAttr from ...fluid.data_feeder import check_variable_and_dtype from ..functional import batch_norm, layer_norm, instance_norm - +from ..framework import _non_static_mode, _global_flags import numpy as np import numbers import warnings @@ -730,6 +730,310 @@ def extra_repr(self): return main_str +class BatchNorm(Layer): + r""" + This interface is used to construct a callable object of the ``BatchNorm`` class. + For more details, refer to code examples. + It implements the function of the Batch Normalization Layer and can be used + as a normalizer function for conv2d and fully connected operations. + The data is normalized by the mean and variance of the channel based on the current batch data. + Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift `_ + for more details. + + When use_global_stats = False, the :math:`\mu_{\beta}` + and :math:`\sigma_{\beta}^{2}` are the statistics of one mini-batch. + Calculated as follows: + + .. math:: + + \mu_{\beta} &\gets \frac{1}{m} \sum_{i=1}^{m} x_i \qquad & + //\ mini-batch\ mean \\ + \sigma_{\beta}^{2} &\gets \frac{1}{m} \sum_{i=1}^{m}(x_i - \mu_{\beta})^2 \qquad & + //\ mini-batch\ variance \\ + + - :math:`x` : mini-batch data + - :math:`m` : the size of the mini-batch data + + When use_global_stats = True, the :math:`\\mu_{\\beta}` + and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch. + They are global or running statistics (moving_mean and moving_variance). It usually got from the + pre-trained model. Calculated as follows: + + .. math:: + moving\_mean = moving\_mean * momentum + \mu_{\beta} * (1. - momentum) \quad &// global mean \\ + moving\_variance = moving\_variance * momentum + \sigma_{\beta}^{2} * (1. - momentum) \quad &// global variance \\ + + The normalization function formula is as follows: + + .. math:: + + \hat{x_i} &\gets \frac{x_i - \mu_\beta} {\sqrt{\ + \sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\ + y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift + + + - :math:`\epsilon` : add a smaller value to the variance to prevent division by zero + - :math:`\gamma` : trainable proportional parameter + - :math:`\beta` : trainable deviation parameter + + Parameters: + num_channels(int): Indicate the number of channels of the input ``Tensor``. + act(str, optional): Activation to be applied to the output of batch normalization. Default: None. + is_test (bool, optional): A flag indicating whether it is in test phrase or not. + This flag only has effect on static graph mode. For dygraph mode, please use ``eval()``. + Default: False. + momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9. + epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5. + param_attr(ParamAttr, optional): The parameter attribute for Parameter `scale` + of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. Default: None. + bias_attr(ParamAttr, optional): The parameter attribute for the bias of batch_norm. + If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + dtype(str, optional): Indicate the data type of the input ``Tensor``, + which can be float32 or float64. Default: float32. + data_layout(str, optional): Specify the input data format, the data format can be "NCHW" or "NHWC". Default: NCHW. + in_place(bool, optional): Make the input and output of batch norm reuse memory. Default: False. + moving_mean_name(str, optional): The name of moving_mean which store the global Mean. Default: None. + moving_variance_name(str, optional): The name of the moving_variance which store the global Variance. Default: None. + do_model_average_for_mean_and_var(bool, optional): Whether parameter mean and variance should do model + average when model average is enabled. Default: True. + use_global_stats(bool, optional): Whether to use global mean and + variance. In inference or test mode, set use_global_stats to true + or is_test to true, and the behavior is equivalent. + In train mode, when setting use_global_stats True, the global mean + and variance are also used during train period. Default: False. + trainable_statistics(bool, optional): Whether to calculate mean and var in eval mode. In eval mode, when + setting trainable_statistics True, mean and variance will be calculated by current batch statistics. + Default: False. + + Returns: + None + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + from paddle.fluid.dygraph.base import to_variable + import numpy as np + + x = np.random.random(size=(3, 10, 3, 7)).astype('float32') + with fluid.dygraph.guard(): + x = to_variable(x) + batch_norm = fluid.BatchNorm(10) + hidden1 = batch_norm(x) + """ + + def __init__( + self, + num_channels, + act=None, + is_test=False, + momentum=0.9, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + dtype='float32', + data_layout='NCHW', + in_place=False, + moving_mean_name=None, + moving_variance_name=None, + do_model_average_for_mean_and_var=True, + use_global_stats=False, + trainable_statistics=False, + ): + super().__init__() + self._param_attr = param_attr + self._bias_attr = bias_attr + self._act = act + self._use_mkldnn = _global_flags()["FLAGS_use_mkldnn"] + + assert ( + bias_attr is not False + ), "bias_attr should not be False in batch_norm." + + if dtype == "float16": + self._dtype = "float32" + else: + self._dtype = dtype + + param_shape = [num_channels] + + # create parameter + self.weight = self.create_parameter( + attr=self._param_attr, + shape=param_shape, + dtype=self._dtype, + default_initializer=Constant(1.0), + ) + self.weight.stop_gradient = ( + use_global_stats and self._param_attr.learning_rate == 0.0 + ) + + self.bias = self.create_parameter( + attr=self._bias_attr, + shape=param_shape, + dtype=self._dtype, + is_bias=True, + ) + self.bias.stop_gradient = ( + use_global_stats and self._param_attr.learning_rate == 0.0 + ) + + self._mean = self.create_parameter( + attr=ParamAttr( + name=moving_mean_name, + initializer=Constant(0.0), + trainable=False, + do_model_average=do_model_average_for_mean_and_var, + ), + shape=param_shape, + dtype=self._dtype, + ) + self._mean.stop_gradient = True + + self._variance = self.create_parameter( + attr=ParamAttr( + name=moving_variance_name, + initializer=Constant(1.0), + trainable=False, + do_model_average=do_model_average_for_mean_and_var, + ), + shape=param_shape, + dtype=self._dtype, + ) + self._variance.stop_gradient = True + + self._in_place = in_place + self._data_layout = data_layout + self._momentum = momentum + self._epsilon = epsilon + self._is_test = is_test + self._fuse_with_relu = False + self._use_global_stats = use_global_stats + self._trainable_statistics = trainable_statistics + + def forward(self, input): + # create output + # mean and mean_out share the same memory + mean_out = self._mean + # variance and variance out share the same memory + variance_out = self._variance + + if _non_static_mode(): + if in_dygraph_mode(): + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( + input, + self._mean, + self._variance, + self.weight, + self.bias, + not self.training, + self._momentum, + self._epsilon, + self._data_layout, + self._use_global_stats, + self._trainable_statistics, + ) + return dygraph_utils._append_activation_in_dygraph( + batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn + ) + + elif _in_legacy_dygraph(): + attrs = ( + "momentum", + self._momentum, + "epsilon", + self._epsilon, + "is_test", + not self.training, + "data_layout", + self._data_layout, + "use_mkldnn", + self._use_mkldnn, + "fuse_with_relu", + self._fuse_with_relu, + "use_global_stats", + self._use_global_stats, + 'trainable_statistics', + self._trainable_statistics, + ) + batch_norm_out, _, _, _, _, _ = _legacy_C_ops.batch_norm( + input, + self.weight, + self.bias, + self._mean, + self._variance, + None, + mean_out, + variance_out, + *attrs + ) + + return dygraph_utils._append_activation_in_dygraph( + batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn + ) + + check_variable_and_dtype( + input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm' + ) + + attrs = { + "momentum": self._momentum, + "epsilon": self._epsilon, + "is_test": self._is_test, + "data_layout": self._data_layout, + "use_mkldnn": False, + "fuse_with_relu": self._fuse_with_relu, + "use_global_stats": self._use_global_stats, + "trainable_statistics": self._trainable_statistics, + } + + inputs = { + "X": [input], + "Scale": [self.weight], + "Bias": [self.bias], + "Mean": [self._mean], + "Variance": [self._variance], + } + + saved_mean = self._helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True + ) + saved_variance = self._helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True + ) + reserve_space = self._helper.create_variable_for_type_inference( + dtype=self._helper.input_dtype(input), stop_gradient=True + ) + + batch_norm_out = ( + input + if self._in_place + else self._helper.create_variable_for_type_inference(self._dtype) + ) + + outputs = { + "Y": [batch_norm_out], + "MeanOut": [mean_out], + "VarianceOut": [variance_out], + "SavedMean": [saved_mean], + "SavedVariance": [saved_variance], + } + if reserve_space is not None: + outputs["ReserveSpace"] = [reserve_space] + + self._helper.append_op( + type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs + ) + + # Currently, we don't support inplace in dygraph mode + return self._helper.append_activation(batch_norm_out, self._act) + + class BatchNorm1D(_BatchNormBase): r""" Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift . From fc5a81eabfc1751de147fc9ac1b645b56e79c5de Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Mon, 5 Dec 2022 11:21:31 +0000 Subject: [PATCH 02/15] modfiy conflict --- .../fluid/tests/unittests/dygraph_to_static/test_resnet.py | 1 - .../tests/unittests/test_imperative_ocr_attention_model.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py index b60327c318535..af44406a99afe 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py @@ -24,7 +24,6 @@ import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX -from paddle.fluid.dygraph.nn import BatchNorm from paddle.jit import ProgramTranslator from paddle.nn.layer.norm import BatchNorm diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 7f17cbf6ef155..29a99d4130124 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -21,7 +21,7 @@ import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import BatchNorm, Embedding, GRUUnit, Linear +from paddle.fluid.dygraph.nn import Embedding, GRUUnit, Linear from paddle.fluid.framework import _test_eager_guard from paddle.nn import Linear from paddle.nn.layer.norm import BatchNorm From 91f1a4d9cda8eea639c2a7ac02a7232b5b0cec7e Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Mon, 5 Dec 2022 12:46:16 +0000 Subject: [PATCH 03/15] modify pre-commit error --- .../tests/unittests/test_imperative_ocr_attention_model.py | 2 +- .../paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 29a99d4130124..aeac8483ac283 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -21,7 +21,7 @@ import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding, GRUUnit, Linear +from paddle.fluid.dygraph.nn import Embedding, GRUUnit from paddle.fluid.framework import _test_eager_guard from paddle.nn import Linear from paddle.nn.layer.norm import BatchNorm diff --git a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py index a104ee7cd75a4..0da8a62031b6a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py @@ -14,8 +14,6 @@ import sys -import paddle - sys.path.append("..") import unittest From f383b37cc4f4adf92a1313bce5143c517e286b2d Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Mon, 5 Dec 2022 14:20:13 +0000 Subject: [PATCH 04/15] modify static-check ci error --- python/paddle/nn/layer/norm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 5e488d1863d1d..5dc2edd40d61e 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -846,7 +846,7 @@ class BatchNorm(Layer): x = np.random.random(size=(3, 10, 3, 7)).astype('float32') with fluid.dygraph.guard(): x = to_variable(x) - batch_norm = fluid.BatchNorm(10) + batch_norm = nn.layer.norm.BatchNorm(10) hidden1 = batch_norm(x) """ From c0f88d3aceaa0b520a67bfca9a45c9419ae89ebb Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Tue, 6 Dec 2022 01:54:08 +0000 Subject: [PATCH 05/15] fix failed tests --- .../fluid/contrib/slim/tests/test_imperative_skip_op.py | 2 +- .../fluid/tests/unittests/dygraph_to_static/darknet.py | 2 +- .../tests/unittests/dygraph_to_static/test_mobile_net.py | 2 +- .../tests/unittests/dygraph_to_static/test_se_resnet.py | 2 +- .../paddle/fluid/tests/unittests/test_batch_norm_op.py | 9 +++++---- .../fluid/tests/unittests/test_batch_norm_op_v2.py | 2 +- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py index 3b19ee76c3554..3d6bc7fa51ce8 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py @@ -26,7 +26,7 @@ from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6 from paddle.nn import Linear, Conv2D, Softmax, BatchNorm -from paddle.nn.norm import BatchNorm +from paddle.nn.layer.norm import BatchNorm from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.log_helper import get_logger diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py index bae882f4ebc84..d6003d9103dc8 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py @@ -64,7 +64,7 @@ def forward(self, inputs): out = self.conv(inputs) out = self.batch_norm(out) if self.act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) + out = paddle.nn.functional.leaky_relu(x=out, alpha=0.1) return out diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index ff36720594c09..f23d0439ac68f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -18,12 +18,12 @@ import unittest import numpy as np +from nn.layer.norm import BatchNorm from predictor_utils import PredictorTools import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX -from paddle.fluid.dygraph.nn import BatchNorm from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr from paddle.jit import ProgramTranslator diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index 70ee21713c7ed..ad62658fc861c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -26,10 +26,10 @@ import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX -from paddle.fluid.dygraph.nn import BatchNorm from paddle.jit import ProgramTranslator from paddle.jit.api import declarative from paddle.nn import Linear +from paddle.nn.layer.norm import BatchNorm SEED = 2020 np.random.seed(SEED) diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index 34b358130219d..ed66c6e7a5282 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -21,6 +21,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core +import paddle.nn as nn from paddle.fluid import Program, program_guard from paddle.fluid.framework import grad_var_name from paddle.fluid.op import Operator @@ -770,7 +771,7 @@ def test_errors(self): class TestDygraphBatchNormAPIError(unittest.TestCase): def test_errors(self): with program_guard(Program(), Program()): - batch_norm = fluid.dygraph.BatchNorm(10) + batch_norm = nn.layer.norm.BatchNorm(10) # the input of BatchNorm must be Variable. x1 = fluid.create_lod_tensor( np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace() @@ -793,7 +794,7 @@ def test_dygraph(self): def compute(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -816,7 +817,7 @@ def test_static(self): def compute(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = fluid.dygraph.BatchNorm( + bn = nn.layer.norm.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -841,7 +842,7 @@ def test_reservespace(self): x = fluid.data(name='x', shape=x.shape, dtype=x.dtype) # Set this FLAG, the BatchNorm API will pass "reserve_space" argument into batch_norm op. os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = '1' - batch_norm = fluid.dygraph.BatchNorm(7, data_layout="NHWC") + batch_norm = nn.layer.norm.BatchNorm(7, data_layout="NHWC") hidden1 = batch_norm(x) os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = '0' diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py index 3ca48ddaa5543..d27953d24e83c 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py @@ -129,7 +129,7 @@ def test_eager_api(self): def compute_v1(x): with fluid.dygraph.guard(p): - bn = nn.layer.norm.BatchNorm() + bn = nn.layer.norm.BatchNorm(shape[1]) # bn = paddle.nn.BatchNorm2D(shape[1]) x1 = paddle.to_tensor(x) x1.stop_gradient = False From 2141dac62d81cbfec566aa4810f576d2d617a9e7 Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Tue, 6 Dec 2022 03:18:54 +0000 Subject: [PATCH 06/15] modify conflict --- .../fluid/contrib/slim/tests/test_imperative_skip_op.py | 3 +-- .../fluid/tests/unittests/test_imperative_se_resnext.py | 8 +++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py index 3d6bc7fa51ce8..91311b8c69534 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py @@ -25,9 +25,8 @@ from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6 -from paddle.nn import Linear, Conv2D, Softmax, BatchNorm +from paddle.nn import Linear, Conv2D, Softmax from paddle.nn.layer.norm import BatchNorm -from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.log_helper import get_logger from imperative_test_utils import ( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py index 0c35855305cea..2e8205d3f86ed 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py @@ -104,8 +104,8 @@ def __init__(self, num_channels, reduction_ratio): super().__init__() self._num_channels = num_channels - self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True) - self._squeeze = Linear( + self._pool = paddle.nn.AdaptiveAvgPool2D(1) + self._squeeze = paddle.nn.Linear( num_channels, num_channels // reduction_ratio, param_attr=fluid.ParamAttr( @@ -288,9 +288,7 @@ def __init__(self, layers=50, class_dim=102): self.bottleneck_block_list.append(bottleneck_block) shortcut = True - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True - ) + self.pool2d_avg = paddle.nn.AdaptiveAvgPool2D(1) import math stdv = 1.0 / math.sqrt(2048 * 1.0) From ffb4a446706d0af78fe6bdfb880965b1533807ee Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Tue, 6 Dec 2022 03:22:29 +0000 Subject: [PATCH 07/15] modify conflict --- .../paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py | 2 +- .../paddle/fluid/tests/unittests/test_imperative_se_resnext.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py index 91311b8c69534..e1aec4eeca5ec 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py @@ -25,8 +25,8 @@ from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6 -from paddle.nn import Linear, Conv2D, Softmax from paddle.nn.layer.norm import BatchNorm +from paddle.nn import Linear, Conv2D, Softmax from paddle.fluid.log_helper import get_logger from imperative_test_utils import ( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py index 2e8205d3f86ed..65a8fc4f4ed80 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py @@ -287,7 +287,6 @@ def __init__(self, layers=50, class_dim=102): num_channels = bottleneck_block._num_channels_out self.bottleneck_block_list.append(bottleneck_block) shortcut = True - self.pool2d_avg = paddle.nn.AdaptiveAvgPool2D(1) import math From 04824b53b72f0e844051409b768760ee91a1652e Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Tue, 6 Dec 2022 03:52:54 +0000 Subject: [PATCH 08/15] delete import modelu GRUUnit --- .../tests/unittests/test_imperative_ocr_attention_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 865c0aa0d7b63..7f40aba3d6488 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -21,7 +21,7 @@ import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.nn import Embedding, GRUUnit +from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard from paddle.nn import Linear from paddle.nn.layer.norm import BatchNorm From 7064e9f89df00f25a62bee581b26077c93fdd8cf Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Tue, 6 Dec 2022 09:26:30 +0000 Subject: [PATCH 09/15] fix falied test --- .../paddle/fluid/tests/unittests/dygraph_to_static/darknet.py | 2 +- .../fluid/tests/unittests/dygraph_to_static/test_mobile_net.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py index d6003d9103dc8..bf312818f2541 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py @@ -64,7 +64,7 @@ def forward(self, inputs): out = self.conv(inputs) out = self.batch_norm(out) if self.act == 'leaky': - out = paddle.nn.functional.leaky_relu(x=out, alpha=0.1) + out = paddle.nn.functional.leaky_relu(out, 0.1) return out diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index c0df924fc43b1..75092c9e102d9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -18,7 +18,6 @@ import unittest import numpy as np -from nn.layer.norm import BatchNorm from predictor_utils import PredictorTools import paddle @@ -29,6 +28,7 @@ from paddle.jit import ProgramTranslator from paddle.jit.api import declarative from paddle.nn import Linear +from paddle.nn.layer.norm import BatchNorm # Note: Set True to eliminate randomness. # 1. For one operation, cuDNN has several algorithms, From 61d48ff143b15c902e1f34c8dcc11a2a31a7cf5d Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Tue, 6 Dec 2022 10:55:55 +0000 Subject: [PATCH 10/15] fix failed testes --- python/paddle/nn/layer/norm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 5dc2edd40d61e..527bf58f5a42e 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -840,9 +840,11 @@ class BatchNorm(Layer): .. code-block:: python import paddle.fluid as fluid + import paddle.nn as nn from paddle.fluid.dygraph.base import to_variable import numpy as np + x = np.random.random(size=(3, 10, 3, 7)).astype('float32') with fluid.dygraph.guard(): x = to_variable(x) From e52ee55b45bd76b02daacb81f5a0913b2f442a64 Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Tue, 6 Dec 2022 13:47:22 +0000 Subject: [PATCH 11/15] fix failed tests --- .../dygraph_to_static/test_mobile_net.py | 3 +- .../unittests/test_imperative_se_resnext.py | 50 +++++++++---------- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index 75092c9e102d9..9f28eaa9fb9b6 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -28,7 +28,6 @@ from paddle.jit import ProgramTranslator from paddle.jit.api import declarative from paddle.nn import Linear -from paddle.nn.layer.norm import BatchNorm # Note: Set True to eliminate randomness. # 1. For one operation, cuDNN has several algorithms, @@ -69,7 +68,7 @@ def __init__( bias_attr=False, ) - self._batch_norm = BatchNorm( + self._batch_norm = paddle.nn.layer.nomr.BatchNorm( num_filters, act=act, param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"), diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py index 65a8fc4f4ed80..353b02fe1b67d 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py @@ -19,7 +19,7 @@ import paddle import paddle.fluid as fluid -from paddle.fluid import Linear, Pool2D, core +from paddle.fluid import core from paddle.fluid.framework import _test_eager_guard from paddle.fluid.layer_helper import LayerHelper from paddle.nn.layer.norm import BatchNorm @@ -108,25 +108,27 @@ def __init__(self, num_channels, reduction_ratio): self._squeeze = paddle.nn.Linear( num_channels, num_channels // reduction_ratio, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + weight_attr=paddle.ParamAttr( + initializer=paddle.nn.initializer.Constant(value=0.05) ), - act='relu', ) - self._excitation = Linear( + self.act_1 = paddle.nn.ReLU() + self._excitation = paddle.nn.Linear( num_channels // reduction_ratio, num_channels, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + weight_attr=paddle.ParamAttr( + initializer=paddle.nn.initializer.Constant(value=0.05) ), - act='sigmoid', ) + self.act_2 = paddle.nn.Softmax() def forward(self, input): y = self._pool(input) - y = fluid.layers.reshape(y, shape=[-1, self._num_channels]) + y = paddle.reshape(y, shape=[-1, self._num_channels]) y = self._squeeze(y) + y = self.act_1(y) y = self._excitation(y) + y = self.act_2(y) y = fluid.layers.elementwise_mul(x=input, y=y, axis=0) return y @@ -187,7 +189,7 @@ def forward(self, inputs): else: short = self.short(inputs) - y = fluid.layers.elementwise_add(x=short, y=scale) + y = paddle.add(x=short, y=scale) layer_helper = LayerHelper(self.full_name(), act='relu') y = layer_helper.append_activation(y) @@ -218,9 +220,7 @@ def __init__(self, layers=50, class_dim=102): stride=2, act='relu', ) - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max' - ) + self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1) elif layers == 101: cardinality = 32 reduction_ratio = 16 @@ -233,9 +233,7 @@ def __init__(self, layers=50, class_dim=102): stride=2, act='relu', ) - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max' - ) + self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1) elif layers == 152: cardinality = 64 reduction_ratio = 16 @@ -262,9 +260,7 @@ def __init__(self, layers=50, class_dim=102): stride=1, act='relu', ) - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max' - ) + self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1) self.bottleneck_block_list = [] num_channels = 64 @@ -294,14 +290,14 @@ def __init__(self, layers=50, class_dim=102): self.pool2d_avg_output = num_filters[-1] * 4 * 1 * 1 - self.out = Linear( + self.out = paddle.nn.Linear( self.pool2d_avg_output, class_dim, - act='softmax', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + weight_attr=paddle.ParamAttr( + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) + self.out_act = paddle.nn.Softmax() def forward(self, inputs): if self.layers == 50 or self.layers == 101: @@ -316,9 +312,9 @@ def forward(self, inputs): for bottleneck_block in self.bottleneck_block_list: y = bottleneck_block(y) y = self.pool2d_avg(y) - y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output]) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_output]) y = self.out(y) - return y + return self.out_act(y) class TestImperativeResneXt(unittest.TestCase): @@ -375,7 +371,7 @@ def run_dygraph(): label.stop_gradient = True out = se_resnext(img) - softmax_out = fluid.layers.softmax(out, use_cudnn=False) + softmax_out = paddle.nn.functional.softmax(out) loss = fluid.layers.cross_entropy( input=softmax_out, label=label ) @@ -455,7 +451,7 @@ def run_dygraph(): ) label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = se_resnext(img) - softmax_out = fluid.layers.softmax(out, use_cudnn=False) + softmax_out = paddle.nn.function.softmax(out) loss = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_loss = paddle.mean(x=loss) optimizer.minimize(avg_loss) From 57bfe4ccb9136ad405a30c9502a79d81e7c6bc61 Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Tue, 6 Dec 2022 14:20:39 +0000 Subject: [PATCH 12/15] fix failed tests --- .../fleet/test_imperative_auto_mixed_precision.py | 2 +- .../fleet/test_imperative_auto_mixed_precision_for_eager.py | 2 +- .../fluid/tests/unittests/dygraph_to_static/darknet.py | 2 +- .../tests/unittests/dygraph_to_static/test_mobile_net.py | 3 ++- .../fluid/tests/unittests/dygraph_to_static/test_op_attr.py | 2 +- .../tests/unittests/dygraph_to_static/test_resnet_v2.py | 2 +- .../fluid/tests/unittests/ir/test_fuse_resnet_unit.py | 2 +- python/paddle/fluid/tests/unittests/test_gradient_clip.py | 2 +- .../fluid/tests/unittests/test_imperative_se_resnext.py | 3 --- .../fluid/tests/unittests/test_inplace_addto_strategy.py | 2 +- python/paddle/fluid/tests/unittests/test_layout_autotune.py | 2 +- .../unittests/xpu/test_fused_resnet_basic_block_op_xpu.py | 6 +++--- python/paddle/vision/models/densenet.py | 2 +- 13 files changed, 15 insertions(+), 17 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision.py index 3141ed81a458b..1a59db5d504dd 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision.py @@ -708,7 +708,7 @@ def test_skip_BatchNorm_Layer_norm(self): for param in model.parameters(): self.assertEqual((param.dtype == paddle.float32), True) - model = paddle.nn.BatchNorm(1) + model = paddle.nn.layer.norm.BatchNorm(1) model = paddle.amp.decorate(models=model, level='O2') for param in model.parameters(): self.assertEqual((param.dtype == paddle.float32), True) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision_for_eager.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision_for_eager.py index dbcdf3ac46c9e..8df63a16969d7 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision_for_eager.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision_for_eager.py @@ -707,7 +707,7 @@ def test_skip_BatchNorm_Layer_norm(self): for param in model.parameters(): self.assertEqual((param.dtype == paddle.float32), True) - model = paddle.nn.BatchNorm(1) + model = paddle.nn.layer.norm.BatchNorm(1) model = paddle.amp.decorate(models=model, level='O2') for param in model.parameters(): self.assertEqual((param.dtype == paddle.float32), True) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py index bf312818f2541..2f776911bdce0 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py @@ -114,7 +114,7 @@ def __init__(self, ch_in, ch_out, is_test=True): def forward(self, inputs): conv1 = self.conv1(inputs) conv2 = self.conv2(conv1) - out = fluid.layers.elementwise_add(x=inputs, y=conv2, act=None) + out = paddle.add(x=inputs, y=conv2) return out diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index 9f28eaa9fb9b6..75092c9e102d9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -28,6 +28,7 @@ from paddle.jit import ProgramTranslator from paddle.jit.api import declarative from paddle.nn import Linear +from paddle.nn.layer.norm import BatchNorm # Note: Set True to eliminate randomness. # 1. For one operation, cuDNN has several algorithms, @@ -68,7 +69,7 @@ def __init__( bias_attr=False, ) - self._batch_norm = paddle.nn.layer.nomr.BatchNorm( + self._batch_norm = BatchNorm( num_filters, act=act, param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"), diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py index d474d80b63e60..d786ec0ed2d4d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py @@ -31,7 +31,7 @@ def __init__(self, in_num, out_num): super().__init__() self.linear = paddle.nn.Linear(in_num, out_num) - self.bn = paddle.nn.BatchNorm(out_num) + self.bn = paddle.nn.layer.norm.BatchNorm(out_num) self.sub = MySub() def forward(self, x): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py index ee01b71e29c78..3ddcd0738007b 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py @@ -76,7 +76,7 @@ def __init__( bias_attr=False, ) - self._batch_norm = paddle.nn.BatchNorm(num_filters, act=act) + self._batch_norm = paddle.nn.layer.norm.BatchNorm(num_filters, act=act) def forward(self, inputs): y = self._conv(inputs) diff --git a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py index bcadccf5fd671..6a36d78e95e54 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py +++ b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py @@ -43,7 +43,7 @@ def test_fuse_resenet_unit(self): conv2d = paddle.nn.Conv2D( 8, 32, 1, bias_attr=False, data_format='NHWC' ) - batch_norm = paddle.nn.BatchNorm( + batch_norm = paddle.nn.layer.norm.BatchNorm( 32, act='relu', data_layout='NHWC' ) out = batch_norm(conv2d(x)) diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py index 4aa064921fe5c..c8684cfcc919d 100644 --- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py +++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py @@ -531,7 +531,7 @@ class SimpleNet(paddle.nn.Layer): def __init__(self): super().__init__() self.linear = paddle.nn.Linear(5, 5) - self.batch_norm = paddle.nn.BatchNorm(5) + self.batch_norm = paddle.nn.layer.BatchNorm(5) def forward(self, x): x = self.linear(x) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py index 353b02fe1b67d..e84e1134f036c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py @@ -24,9 +24,6 @@ from paddle.fluid.layer_helper import LayerHelper from paddle.nn.layer.norm import BatchNorm -# NOTE(zhiqiu): run with FLAGS_cudnn_deterministic=1 - - batch_size = 8 train_parameters = { "input_size": [3, 224, 224], diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py index 9f448e7f07a47..fb57ff7fb81d7 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py @@ -43,7 +43,7 @@ def __init__( data_format=data_format, ) - self._batch_norm = paddle.nn.BatchNorm( + self._batch_norm = paddle.nn.layer.norm.BatchNorm( num_filters, data_layout=data_format ) diff --git a/python/paddle/fluid/tests/unittests/test_layout_autotune.py b/python/paddle/fluid/tests/unittests/test_layout_autotune.py index 70c283a549a09..3512bbca63dcb 100644 --- a/python/paddle/fluid/tests/unittests/test_layout_autotune.py +++ b/python/paddle/fluid/tests/unittests/test_layout_autotune.py @@ -26,7 +26,7 @@ class SimpleNet(paddle.nn.Layer): def __init__(self, data_format="NCHW", class_num=2): super().__init__() self.conv = paddle.nn.Conv2D(3, 8, (3, 3)) - self.bn = paddle.nn.BatchNorm(num_channels=8) + self.bn = paddle.nn.layer.norm.BatchNorm(num_channels=8) self.relu = paddle.nn.ReLU() self.pool = paddle.nn.AvgPool2D(kernel_size=2, stride=2) self.flatten = paddle.nn.Flatten() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py index 68bf21abd1a06..d3657d7c54c6b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py @@ -113,7 +113,7 @@ def Base(self): bias_attr=None, data_format='NCHW', ) - self.bn1 = nn.BatchNorm( + self.bn1 = nn.layer.nomr.BatchNorm( self.out_channels, act='relu', param_attr=bn1_weight, @@ -130,7 +130,7 @@ def Base(self): bias_attr=None, data_format='NCHW', ) - self.bn2 = nn.BatchNorm( + self.bn2 = nn.layer.norm.BatchNorm( self.out_channels, act=None, param_attr=bn2_weight, @@ -147,7 +147,7 @@ def Base(self): bias_attr=None, data_format='NCHW', ) - self.bn3 = nn.BatchNorm( + self.bn3 = nn.layer.norm.BatchNorm( self.out_channels, act=None, param_attr=bn3_weight, diff --git a/python/paddle/vision/models/densenet.py b/python/paddle/vision/models/densenet.py index f620e1d70956b..20747e828c838 100644 --- a/python/paddle/vision/models/densenet.py +++ b/python/paddle/vision/models/densenet.py @@ -20,13 +20,13 @@ from paddle.nn import ( AdaptiveAvgPool2D, AvgPool2D, - BatchNorm, Conv2D, Dropout, Linear, MaxPool2D, ) from paddle.nn.initializer import Uniform +from paddle.nn.layer.norm import BatchNorm from paddle.utils.download import get_weights_path_from_url __all__ = [] From b358a3c8e3c59df573eb009e23bcb504d5ddea17 Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Wed, 7 Dec 2022 01:38:23 +0000 Subject: [PATCH 13/15] fix failed test --- python/paddle/fluid/tests/unittests/test_gradient_clip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py index c8684cfcc919d..ff3f2bfff0afc 100644 --- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py +++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py @@ -531,7 +531,7 @@ class SimpleNet(paddle.nn.Layer): def __init__(self): super().__init__() self.linear = paddle.nn.Linear(5, 5) - self.batch_norm = paddle.nn.layer.BatchNorm(5) + self.batch_norm = paddle.nn.layer.norm.BatchNorm(5) def forward(self, x): x = self.linear(x) From 8e06fa48f3d0ea344b33b04be84355bd16695b5a Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Wed, 7 Dec 2022 05:15:21 +0000 Subject: [PATCH 14/15] fix error in test_fused_resenet_basic_block_op_xpu.py --- .../tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py index d3657d7c54c6b..1b9434da600ab 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py @@ -113,7 +113,7 @@ def Base(self): bias_attr=None, data_format='NCHW', ) - self.bn1 = nn.layer.nomr.BatchNorm( + self.bn1 = nn.layer.norm.BatchNorm( self.out_channels, act='relu', param_attr=bn1_weight, From 9fb71b4e3ae2582fd9494d69eaa1e2db7a939bb8 Mon Sep 17 00:00:00 2001 From: risemeup1 <515586620@qq.com> Date: Fri, 9 Dec 2022 07:48:59 +0000 Subject: [PATCH 15/15] modify after xiaoguang reviewed --- .../contrib/slim/tests/test_imperative_skip_op.py | 3 +-- .../fleet/test_imperative_auto_mixed_precision.py | 2 +- ...est_imperative_auto_mixed_precision_for_eager.py | 2 +- .../tests/unittests/dygraph_to_static/darknet.py | 2 +- .../unittests/dygraph_to_static/test_cycle_gan.py | 2 +- .../unittests/dygraph_to_static/test_mobile_net.py | 3 +-- .../unittests/dygraph_to_static/test_op_attr.py | 2 +- .../unittests/dygraph_to_static/test_resnet.py | 2 +- .../unittests/dygraph_to_static/test_resnet_v2.py | 2 +- .../unittests/dygraph_to_static/test_se_resnet.py | 3 +-- .../tests/unittests/dygraph_to_static/test_tsm.py | 3 +-- .../tests/unittests/ir/test_fuse_resnet_unit.py | 2 +- .../tests/unittests/mlu/test_batch_norm_op_mlu.py | 9 ++++----- .../unittests/mlu/test_batch_norm_op_mlu_v2.py | 9 ++++----- .../tests/unittests/npu/test_batch_norm_op_npu.py | 4 ++-- .../fluid/tests/unittests/test_batch_norm_op.py | 9 ++++----- .../fluid/tests/unittests/test_batch_norm_op_v2.py | 13 ++++++------- .../fluid/tests/unittests/test_gradient_clip.py | 2 +- .../unittests/test_imperative_load_static_param.py | 3 +-- .../test_imperative_ocr_attention_model.py | 3 +-- .../fluid/tests/unittests/test_imperative_resnet.py | 2 +- .../tests/unittests/test_imperative_se_resnext.py | 2 +- .../tests/unittests/test_inplace_addto_strategy.py | 2 +- .../fluid/tests/unittests/test_layout_autotune.py | 2 +- .../tests/unittests/xpu/test_batch_norm_op_xpu.py | 3 +-- .../xpu/test_fused_resnet_basic_block_op_xpu.py | 6 +++--- python/paddle/nn/layer/norm.py | 1 + python/paddle/vision/models/densenet.py | 2 +- 28 files changed, 45 insertions(+), 55 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py index e1aec4eeca5ec..9b8ed24af2e55 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py @@ -25,8 +25,7 @@ from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6 -from paddle.nn.layer.norm import BatchNorm -from paddle.nn import Linear, Conv2D, Softmax +from paddle.nn import Linear, Conv2D, Softmax, BatchNorm from paddle.fluid.log_helper import get_logger from imperative_test_utils import ( diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision.py index 1a59db5d504dd..3141ed81a458b 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision.py @@ -708,7 +708,7 @@ def test_skip_BatchNorm_Layer_norm(self): for param in model.parameters(): self.assertEqual((param.dtype == paddle.float32), True) - model = paddle.nn.layer.norm.BatchNorm(1) + model = paddle.nn.BatchNorm(1) model = paddle.amp.decorate(models=model, level='O2') for param in model.parameters(): self.assertEqual((param.dtype == paddle.float32), True) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision_for_eager.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision_for_eager.py index 8df63a16969d7..dbcdf3ac46c9e 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision_for_eager.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_imperative_auto_mixed_precision_for_eager.py @@ -707,7 +707,7 @@ def test_skip_BatchNorm_Layer_norm(self): for param in model.parameters(): self.assertEqual((param.dtype == paddle.float32), True) - model = paddle.nn.layer.norm.BatchNorm(1) + model = paddle.nn.BatchNorm(1) model = paddle.amp.decorate(models=model, level='O2') for param in model.parameters(): self.assertEqual((param.dtype == paddle.float32), True) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py index 2f776911bdce0..783dfff262e8f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py @@ -16,7 +16,7 @@ import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr from paddle.fluid.regularizer import L2Decay -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm class ConvBNLayer(fluid.dygraph.Layer): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py index a26c65c622315..09117e3054078 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py @@ -40,7 +40,7 @@ from paddle.fluid.dygraph import to_variable from paddle.jit import ProgramTranslator from paddle.jit.api import declarative -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm # Note: Set True to eliminate randomness. # 1. For one operation, cuDNN has several algorithms, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index 5e9b6c2e8f1c2..d5a4ae996d68b 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -27,8 +27,7 @@ from paddle.fluid.param_attr import ParamAttr from paddle.jit import ProgramTranslator from paddle.jit.api import declarative -from paddle.nn import Linear -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm, Linear # Note: Set True to eliminate randomness. # 1. For one operation, cuDNN has several algorithms, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py index d786ec0ed2d4d..d474d80b63e60 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py @@ -31,7 +31,7 @@ def __init__(self, in_num, out_num): super().__init__() self.linear = paddle.nn.Linear(in_num, out_num) - self.bn = paddle.nn.layer.norm.BatchNorm(out_num) + self.bn = paddle.nn.BatchNorm(out_num) self.sub = MySub() def forward(self, x): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py index 529548f1aee13..1ad5facd5d1ac 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py @@ -25,7 +25,7 @@ import paddle.fluid as fluid from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.jit import ProgramTranslator -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm SEED = 2020 IMAGENET1000 = 1281167 diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py index 3ddcd0738007b..ee01b71e29c78 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py @@ -76,7 +76,7 @@ def __init__( bias_attr=False, ) - self._batch_norm = paddle.nn.layer.norm.BatchNorm(num_filters, act=act) + self._batch_norm = paddle.nn.BatchNorm(num_filters, act=act) def forward(self, inputs): y = self._conv(inputs) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index a68fe536267b6..b260bbc359ca9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -28,8 +28,7 @@ from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.jit import ProgramTranslator from paddle.jit.api import declarative -from paddle.nn import Linear -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm, Linear SEED = 2020 np.random.seed(SEED) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py index e0d6cdcf260d6..4d8ab7612aa4c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py @@ -26,8 +26,7 @@ from paddle.fluid.dygraph import to_variable from paddle.jit import ProgramTranslator from paddle.jit.api import declarative -from paddle.nn import Linear -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm, Linear random.seed(0) np.random.seed(0) diff --git a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py index 6a36d78e95e54..bcadccf5fd671 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py +++ b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py @@ -43,7 +43,7 @@ def test_fuse_resenet_unit(self): conv2d = paddle.nn.Conv2D( 8, 32, 1, bias_attr=False, data_format='NHWC' ) - batch_norm = paddle.nn.layer.norm.BatchNorm( + batch_norm = paddle.nn.BatchNorm( 32, act='relu', data_layout='NHWC' ) out = batch_norm(conv2d(x)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py index 3de7dd924d279..66876ddb79294 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py @@ -18,7 +18,6 @@ import paddle import paddle.fluid.core as core from paddle.fluid.op import Operator -import paddle.fluid as fluid import sys sys.path.append('..') @@ -753,7 +752,7 @@ def test_errors(self): class TestDygraphBatchNormAPIError(unittest.TestCase): def test_errors(self): with program_guard(Program(), Program()): - batch_norm = nn.layer.norm.BatchNorm(10) + batch_norm = paddle.nn.BatchNorm(10) # the input of BatchNorm must be Variable. x1 = fluid.create_lod_tensor( np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace() @@ -776,7 +775,7 @@ def test_dygraph(self): def compute(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = nn.layer.norm.BatchNorm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -799,7 +798,7 @@ def test_static(self): def compute(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = nn.layer.norm.BatchNorm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -824,7 +823,7 @@ def test_reservespace(self): x = fluid.data(name='x', shape=x.shape, dtype=x.dtype) # Set this FLAG, the BatchNorm API will pass "reserve_space" argument into batch_norm op. os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = '1' - batch_norm = nn.layer.norm.BatchNorm(7, data_layout="NHWC") + batch_norm = paddle.nn.BatchNorm(7, data_layout="NHWC") hidden1 = batch_norm(x) os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = '0' diff --git a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py index c3e9a042233bd..17672d668d38a 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py @@ -17,7 +17,6 @@ import numpy as np import paddle.fluid.core as core from paddle.fluid.op import Operator -import paddle.fluid as fluid import sys sys.path.append("..") @@ -95,7 +94,7 @@ def test_dygraph(self): def compute_v1(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = nn.layer.norm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -111,7 +110,7 @@ def compute_v2(x): def compute_v3(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = nn.layer.norm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, param_attr=fluid.ParamAttr( @@ -153,7 +152,7 @@ def test_static(self): def compute_v1(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = nn.layer.norm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -260,7 +259,7 @@ def test_global_stats(self): for p in self.places: with fluid.dygraph.guard(p): x = paddle.randn([2, 6, 6, 4]) - net1 = paddle.nn.layer.norm( + net1 = paddle.nn.BatchNorm( 6, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py index 2eeacffe27058..353fd250a5e1b 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py @@ -562,7 +562,7 @@ def test_dygraph(self): def compute(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = nn.layer.norm.BatchNorm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -583,7 +583,7 @@ def test_static(self): def compute(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = nn.layer.norm.BatchNorm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index 2e92d3d27f5eb..6802a8a9ea995 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -21,7 +21,6 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core -import paddle.nn as nn from paddle.fluid import Program, program_guard from paddle.fluid.framework import grad_var_name from paddle.fluid.op import Operator @@ -771,7 +770,7 @@ def test_errors(self): class TestDygraphBatchNormAPIError(unittest.TestCase): def test_errors(self): with program_guard(Program(), Program()): - batch_norm = nn.layer.norm.BatchNorm(10) + batch_norm = paddle.nn.BatchNorm(10) # the input of BatchNorm must be Variable. x1 = fluid.create_lod_tensor( np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace() @@ -794,7 +793,7 @@ def test_dygraph(self): def compute(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = nn.layer.norm.BatchNorm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -817,7 +816,7 @@ def test_static(self): def compute(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = nn.layer.norm.BatchNorm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -842,7 +841,7 @@ def test_reservespace(self): x = fluid.data(name='x', shape=x.shape, dtype=x.dtype) # Set this FLAG, the BatchNorm API will pass "reserve_space" argument into batch_norm op. os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = '1' - batch_norm = nn.layer.norm.BatchNorm(7, data_layout="NHWC") + batch_norm = paddle.nn.BatchNorm(7, data_layout="NHWC") hidden1 = batch_norm(x) os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = '0' diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py index d27953d24e83c..74edcd61d343e 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py @@ -19,7 +19,6 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core -import paddle.nn as nn from paddle.fluid import Program, program_guard from paddle.fluid.framework import _test_eager_guard @@ -83,7 +82,7 @@ def error3d(): def test_large_batch(self): def compute_baseline(x): with fluid.dygraph.guard(p): - bn = nn.layer.norm.BatchNorm(shape[1]) + bn = paddle.nn.BatchNorm(shape[1]) x1 = paddle.to_tensor(x) x1.stop_gradient = False y = bn(x1) @@ -129,7 +128,7 @@ def test_eager_api(self): def compute_v1(x): with fluid.dygraph.guard(p): - bn = nn.layer.norm.BatchNorm(shape[1]) + bn = paddle.nn.BatchNorm(shape[1]) # bn = paddle.nn.BatchNorm2D(shape[1]) x1 = paddle.to_tensor(x) x1.stop_gradient = False @@ -163,7 +162,7 @@ def test_dygraph(self): def compute_v1(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = nn.layer.norm.BatchNorm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -184,7 +183,7 @@ def compute_v2(x): def compute_v3(x, is_test, trainable_statistics): with fluid.dygraph.guard(p): - bn = nn.layer.norm.BatchNorm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, param_attr=fluid.ParamAttr( @@ -226,7 +225,7 @@ def test_static(self): def compute_v1(x_np, is_test, trainable_statistics): with program_guard(Program(), Program()): - bn = nn.layer.norm.BatchNorm( + bn = paddle.nn.BatchNorm( shape[1], is_test=is_test, trainable_statistics=trainable_statistics, @@ -380,7 +379,7 @@ def test_global_stats(self): for p in self.places: with fluid.dygraph.guard(p): x = paddle.randn([2, 6, 6, 4]) - net1 = nn.layer.norm.BatchNorm( + net1 = paddle.nn.BatchNorm( 6, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.0) diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py index f9b377cd2c2c9..0c89e000538d6 100644 --- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py +++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py @@ -529,7 +529,7 @@ class SimpleNet(paddle.nn.Layer): def __init__(self): super().__init__() self.linear = paddle.nn.Linear(5, 5) - self.batch_norm = paddle.nn.layer.norm.BatchNorm(5) + self.batch_norm = paddle.nn.BatchNorm(5) def forward(self, x): x = self.linear(x) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py index 12bea8ce1f65b..2477d2e4785f7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py @@ -22,8 +22,7 @@ import paddle.fluid as fluid import paddle.fluid.framework as framework from paddle.fluid.dygraph.nn import Embedding -from paddle.nn import Linear -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm, Linear class TestDygraphLoadStatic(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 7f40aba3d6488..58be44dc8ef6d 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -23,8 +23,7 @@ from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.framework import _test_eager_guard -from paddle.nn import Linear -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm, Linear class Config: diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index 7fc6fca074f5a..54b771134457a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -24,7 +24,7 @@ from paddle.fluid.dygraph.base import to_variable from paddle.fluid.framework import _test_eager_guard from paddle.fluid.layer_helper import LayerHelper -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm # NOTE(zhiqiu): run with FLAGS_cudnn_deterministic=1 diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py index e84e1134f036c..5a1d986b8a29b 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py @@ -22,7 +22,7 @@ from paddle.fluid import core from paddle.fluid.framework import _test_eager_guard from paddle.fluid.layer_helper import LayerHelper -from paddle.nn.layer.norm import BatchNorm +from paddle.nn import BatchNorm batch_size = 8 train_parameters = { diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py index fb57ff7fb81d7..9f448e7f07a47 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py @@ -43,7 +43,7 @@ def __init__( data_format=data_format, ) - self._batch_norm = paddle.nn.layer.norm.BatchNorm( + self._batch_norm = paddle.nn.BatchNorm( num_filters, data_layout=data_format ) diff --git a/python/paddle/fluid/tests/unittests/test_layout_autotune.py b/python/paddle/fluid/tests/unittests/test_layout_autotune.py index 3512bbca63dcb..70c283a549a09 100644 --- a/python/paddle/fluid/tests/unittests/test_layout_autotune.py +++ b/python/paddle/fluid/tests/unittests/test_layout_autotune.py @@ -26,7 +26,7 @@ class SimpleNet(paddle.nn.Layer): def __init__(self, data_format="NCHW", class_num=2): super().__init__() self.conv = paddle.nn.Conv2D(3, 8, (3, 3)) - self.bn = paddle.nn.layer.norm.BatchNorm(num_channels=8) + self.bn = paddle.nn.BatchNorm(num_channels=8) self.relu = paddle.nn.ReLU() self.pool = paddle.nn.AvgPool2D(kernel_size=2, stride=2) self.flatten = paddle.nn.Flatten() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py index 0da8a62031b6a..d3909193cd6ce 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py @@ -27,7 +27,6 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core -import paddle.nn as nn import paddle.nn.functional as F paddle.enable_static() @@ -367,7 +366,7 @@ def test_global_stats(self): for p in self.places: with fluid.dygraph.guard(p): x = paddle.randn([2, 6, 6, 4]) - net1 = nn.layer.norm.BatchNorm( + net1 = paddle.nn.BatchNorm( 6, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.0) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py index 1b9434da600ab..3518083d75678 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py @@ -113,7 +113,7 @@ def Base(self): bias_attr=None, data_format='NCHW', ) - self.bn1 = nn.layer.norm.BatchNorm( + self.bn1 = paddle.nn.BatchNorm( self.out_channels, act='relu', param_attr=bn1_weight, @@ -130,7 +130,7 @@ def Base(self): bias_attr=None, data_format='NCHW', ) - self.bn2 = nn.layer.norm.BatchNorm( + self.bn2 = paddle.nn.BatchNorm( self.out_channels, act=None, param_attr=bn2_weight, @@ -147,7 +147,7 @@ def Base(self): bias_attr=None, data_format='NCHW', ) - self.bn3 = nn.layer.norm.BatchNorm( + self.bn3 = paddle.nn.BatchNorm( self.out_channels, act=None, param_attr=bn3_weight, diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index d8f4026aadb73..f446970ee0ef6 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -26,6 +26,7 @@ # limitations under the License. # TODO: define normalization api + import numbers import os import warnings diff --git a/python/paddle/vision/models/densenet.py b/python/paddle/vision/models/densenet.py index 20747e828c838..f620e1d70956b 100644 --- a/python/paddle/vision/models/densenet.py +++ b/python/paddle/vision/models/densenet.py @@ -20,13 +20,13 @@ from paddle.nn import ( AdaptiveAvgPool2D, AvgPool2D, + BatchNorm, Conv2D, Dropout, Linear, MaxPool2D, ) from paddle.nn.initializer import Uniform -from paddle.nn.layer.norm import BatchNorm from paddle.utils.download import get_weights_path_from_url __all__ = []