Skip to content

Commit

Permalink
Use normalize instead of l2_normalize (#7113)
Browse files Browse the repository at this point in the history
* use normalize instead of l2_normalize

* refine

* fix l2_norm

* reformat

Co-authored-by: oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
  • Loading branch information
mosout and oneflow-ci-bot authored Jan 17, 2022
1 parent f46f6e3 commit 3e8be2e
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 170 deletions.
1 change: 0 additions & 1 deletion docs/source/functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ Functional operations for neural networks
.. autofunction:: hardswish
.. autofunction:: hardtanh
.. autofunction:: normalize
.. autofunction:: l2_normalize
.. autofunction:: leaky_relu
.. autofunction:: elu
.. autofunction:: celu
Expand Down
4 changes: 2 additions & 2 deletions oneflow/core/functional/functional_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1762,12 +1762,12 @@
bind_python: True

- name: "normalize"
signature: "Tensor (Tensor input, Float p=2.0, Int32 dim=1, Float eps=1e-12) => Normalize"
signature: "Tensor (Tensor input, Float p=2.0, Int32 dim=1, Float eps=1e-12, Bool use_l2_norm_kernel=True) => Normalize"
bind_python: True

- name: "l2_normalize"
signature: "Tensor (Tensor input, Int32 axis=0, Float epsilon=1e-12) => L2Normalize"
bind_python: True
bind_python: False

- name: "l2_normalize_grad"
signature: "Tensor (Tensor dy, Tensor y, Tensor square_x_sum, Int32 axis, Float epsilon) => L2NormalizeGrad"
Expand Down
32 changes: 21 additions & 11 deletions oneflow/core/functional/impl/nn_functor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1662,21 +1662,27 @@ class L2NormalizeFunctor {
}
Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input, const int32_t& axis,
const float& epsilon) const {
const auto ndims = input->shape()->NumAxes();
const auto final_dim = ndims - 1;

auto axis_ = axis >= 0 ? axis : axis + ndims;
CHECK_GE_OR_RETURN(axis_, 0) << "Axis should >=0 but axis is " << axis_ << " now.";
CHECK_LE_OR_RETURN(axis_, final_dim)
<< "Axis should <" << ndims << " but axis is " << axis_ << " now.";

MutableAttrMap attrs;
JUST(attrs.SetAttr<int32_t>("axis", 0));
JUST(attrs.SetAttr<float>("epsilon", epsilon));
JUST(attrs.SetAttr<int32_t>("axis", final_dim));

if (axis != 0) {
std::vector<int> input_perm(input->shape()->dim_vec().size(), 0);
for (size_t i = 0; i < input_perm.size(); ++i) { input_perm[i] = static_cast<int>(i); }
std::swap(input_perm[0], input_perm[static_cast<size_t>(axis)]);
if (axis_ == final_dim) { return OpInterpUtil::Dispatch<Tensor>(*op_, {input}, attrs); }

const auto result = JUST(OpInterpUtil::Dispatch<TensorTuple>(
*op_, {JUST(functional::Transpose(input, input_perm))}, attrs));
return functional::Transpose(result->at(0), input_perm);
}
std::vector<int> input_perm(input->shape()->dim_vec().size(), 0);
for (size_t i = 0; i < input_perm.size(); ++i) { input_perm[i] = static_cast<int>(i); }
std::swap(input_perm[final_dim], input_perm[static_cast<size_t>(axis_)]);

return OpInterpUtil::Dispatch<Tensor>(*op_, {input}, attrs);
const auto result = JUST(OpInterpUtil::Dispatch<TensorTuple>(
*op_, {JUST(functional::Transpose(input, input_perm))}, attrs));
return functional::Transpose(result->at(0), input_perm);
}

private:
Expand All @@ -1686,7 +1692,11 @@ class L2NormalizeFunctor {
class NormalizeFunctor {
public:
Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input, const float& p,
const int32_t& dim, const float& eps) const {
const int32_t& dim, const float& eps,
const bool& use_l2_norm_kernel) const {
if (use_l2_norm_kernel && (std::fabs(p - 2.0f) < std::numeric_limits<float>::min())) {
return functional::L2Normalize(input, dim, eps);
}
return SequenceFunction<Maybe<Tensor>(const std::shared_ptr<Tensor>&, const float&,
const int32_t&)>(
[](const auto& x, const float& p, const int32_t& dim) -> Maybe<Tensor> {
Expand Down
37 changes: 0 additions & 37 deletions python/oneflow/framework/docstr/norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,40 +304,3 @@
""",
)

add_docstr(
oneflow._C.l2_normalize,
"""nn.functional.l2_normalize(input: Tensor, dim: int=0, epsilon: float=1e-12) -> Tensor
Use L2 norm to normalizes along dimension `dim`
The equation is:
.. math::
out = \\frac{x}{max(\\sqrt{\\Sigma{x^2}}, \\epsilon)}
Args:
input (oneflow.Tensor): Input Tensor
dim (int): The axis on which to apply L2 normalization. Defaults to 0.
epsilon (float): The epsilon value is used to avoid division by zero. Defaults to 1e-12.
Returns:
oneflow.Tensor: The normalized Tensor
For example:
.. code-block:: python
>>> import oneflow as flow
>>> x = flow.tensor([[1, 2], [3, 4]], dtype=flow.float32)
>>> out = flow.nn.functional.l2_normalize(x, 0)
>>> out
tensor([[0.3162, 0.4472],
[0.9487, 0.8944]], dtype=oneflow.float32)
>>> out = flow.nn.functional.l2_normalize(x, 1)
>>> out
tensor([[0.4472, 0.8944],
[0.6000, 0.8000]], dtype=oneflow.float32)
""",
)
1 change: 0 additions & 1 deletion python/oneflow/nn/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
from oneflow._C import triplet_margin_loss
from oneflow._C import ctc_greedy_decoder
from oneflow._C import one_hot
from oneflow._C import l2_normalize
from oneflow._C import normalize
from oneflow.nn.modules.sparse import embedding
from oneflow.nn.modules.linear import linear
Expand Down
119 changes: 1 addition & 118 deletions python/oneflow/test/modules/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,131 +15,14 @@
"""

import unittest
from collections import OrderedDict
from test_util import GenArgList
from oneflow.test_utils.automated_test_util import *
import numpy as np
import oneflow as flow
import oneflow.unittest


def _count(shape, begin_axis, end_axis):
cnt = 1
for i in range(begin_axis, end_axis):
cnt *= shape[i]
return cnt


def _l2_norm_numpy(x, dim, epsilon=1e-12):
axes = [k for k in range(len(list(x.shape)))]
axes[0], axes[dim] = axes[dim], axes[0]
axes_tuple = tuple(axes)

x = np.transpose(x, axes_tuple)

square_x_sum_shape = list(x.shape)
square_x_sum_shape[0] = 1

c = x.shape[0]
n = int(x.size / c)
d = _count(x.shape, 1, len(x.shape))

square_x_sum = np.zeros(square_x_sum_shape)

square_x_sum_flatten = square_x_sum.reshape(-1)
in_flatten = x.reshape(-1)
out = np.zeros(x.size)

for i in range(0, n):
offset = int(int((i / d)) * d * c + (i % d))
for j in range(0, c):
item = in_flatten[offset + j * d]
square_x_sum_flatten[i] = square_x_sum_flatten[i] + item * item

norm = np.sqrt(np.maximum(square_x_sum_flatten[i], epsilon))
for j in range(0, c):
index = offset + j * d
out[index] = in_flatten[index] / norm

square_x_sum = square_x_sum_flatten.reshape(square_x_sum.shape)
out = out.reshape(x.shape)
return np.transpose(out, axes_tuple), np.transpose(square_x_sum, axes_tuple)


def _l2_norm_backward_np(dy, y, square_x_sum, dim, epsilon=1e-12):
axes = [k for k in range(len(list(y.shape)))]
axes[0], axes[dim] = axes[dim], axes[0]
axes_tuple = tuple(axes)

dy = np.transpose(dy, axes_tuple)
y = np.transpose(y, axes_tuple)
square_x_sum = np.transpose(square_x_sum, axes_tuple)

c = dy.shape[0]
n = int(dy.size / c)
d = _count(dy.shape, 1, len(y.shape))

dx = np.zeros(dy.shape).reshape(-1)
dy_flatten = dy.reshape(-1)
y_flatten = y.reshape(-1)
square_x_sum_flatten = square_x_sum.reshape(-1)

for i in range(0, n):
norm = np.sqrt(np.maximum(square_x_sum_flatten[i], epsilon))
offset = int(int(int((i / d)) * d * c) + (i % d))
if square_x_sum_flatten[i] >= epsilon:
y_dy_inner_prod = 0
for j in range(0, c):
index = offset + j * d
y_dy_inner_prod = y_dy_inner_prod + dy_flatten[index] * y_flatten[index]
for j in range(0, c):
index = offset + j * d
dx[index] = (1 / norm) * (
dy_flatten[index] - y_dy_inner_prod * y_flatten[index]
)
else:
for j in range(0, c):
index = offset + j * d
dx[index] = (1 / norm) * dy_flatten[index]

return np.transpose(dx.reshape(y.shape), axes_tuple)


def _test_l2_normalize(test_case, device, dim, shape):
input = np.random.randn(*shape)
np_out, square_x_sum = _l2_norm_numpy(input, dim)
of_input = flow.tensor(
input, dtype=flow.float32, requires_grad=True, device=flow.device(device)
)
of_out = flow.nn.functional.l2_normalize(of_input, dim)

test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))

z = of_out.sum()
z.backward()
dx = _l2_norm_backward_np(np.ones(np_out.shape), np_out, square_x_sum, dim)
test_case.assertTrue(np.allclose(of_input.grad.numpy(), dx, 1e-4, 1e-4))


@flow.unittest.skip_unless_1n1d()
class TestL2Normalize(flow.unittest.TestCase):
def test_l2_normalize(test_case):
arg_dict = OrderedDict()
arg_dict["test_fun"] = [
_test_l2_normalize,
]
arg_dict["device"] = ["cpu", "cuda"]
arg_dict["dim"] = [0, 1, 2, 3]
arg_dict["shape"] = [
(10, 10, 20, 30),
]
for arg in GenArgList(arg_dict):
arg[0](test_case, *arg[1:])


@flow.unittest.skip_unless_1n1d()
class TestFunctionalNormalize(flow.unittest.TestCase):
@autotest(check_graph=False)
@autotest()
def test_functional_normalize(test_case):
device = random_device()
ndim = random(low=2)
Expand Down

0 comments on commit 3e8be2e

Please sign in to comment.