diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 323207df20371..efb5b4c9d098f 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -595,7 +595,10 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"mean_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, - {"mean", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + {"mean", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT16, + phi::DataType::BFLOAT16})}, {"merged_adam", XPUKernelSet({phi::DataType::FLOAT32})}, {"merged_momentum", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc index aeb51998f4d7a..29e9c7e0f8901 100644 --- a/paddle/phi/backends/xpu/xpu3_op_list.cc +++ b/paddle/phi/backends/xpu/xpu3_op_list.cc @@ -571,7 +571,10 @@ XPUOpMap& get_kl3_ops() { XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"mean_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, - {"mean", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + {"mean", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT16, + phi::DataType::BFLOAT16})}, {"merged_momentum", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"mish_grad", XPUKernelSet({phi::DataType::FLOAT32})}, diff --git a/paddle/phi/kernels/funcs/tensor_formatter.cc b/paddle/phi/kernels/funcs/tensor_formatter.cc index 16d3b38bced7c..7c4cd28fe20c7 100644 --- a/paddle/phi/kernels/funcs/tensor_formatter.cc +++ b/paddle/phi/kernels/funcs/tensor_formatter.cc @@ -107,6 +107,10 @@ std::string TensorFormatter::Format(const phi::DenseTensor& print_tensor, FormatData(print_tensor, log_stream); } else if (dtype == phi::DataType::BOOL) { FormatData(print_tensor, log_stream); + } else if (dtype == phi::DataType::FLOAT16) { + FormatData(print_tensor, log_stream); + } else if (dtype == phi::DataType::BFLOAT16) { + FormatData(print_tensor, log_stream); } else { log_stream << " - data: unprintable type: " << dtype << std::endl; } @@ -153,6 +157,10 @@ template void TensorFormatter::FormatData( const phi::DenseTensor& print_tensor, std::stringstream& log_stream); template void TensorFormatter::FormatData( const phi::DenseTensor& print_tensor, std::stringstream& log_stream); +template void TensorFormatter::FormatData( + const phi::DenseTensor& print_tensor, std::stringstream& log_stream); +template void TensorFormatter::FormatData( + const phi::DenseTensor& print_tensor, std::stringstream& log_stream); } // namespace funcs } // namespace paddle diff --git a/test/legacy_test/test_print_op.py b/test/legacy_test/test_print_op.py index 95c1dd420626d..1ce1a08643210 100755 --- a/test/legacy_test/test_print_op.py +++ b/test/legacy_test/test_print_op.py @@ -15,6 +15,7 @@ import unittest import numpy as np +from op_test import convert_float_to_uint16 from simple_nets import init_data, simple_fc_net import paddle @@ -30,14 +31,17 @@ class TestPrintOpCPU(unittest.TestCase): def setUp(self): + self.dtype = 'float32' self.place = paddle.CPUPlace() self.x_tensor = base.core.LoDTensor() - tensor_np = np.random.random(size=(2, 3)).astype('float32') + tensor_np = np.random.random(size=(2, 3)).astype(self.dtype) self.x_tensor.set(tensor_np, self.place) self.x_tensor.set_recursive_sequence_lengths([[1, 1]]) def build_network(self, only_forward, **kargs): - x = paddle.static.data('x', shape=[-1, 3], dtype='float32', lod_level=1) + x = paddle.static.data( + 'x', shape=[-1, 3], dtype=self.dtype, lod_level=1 + ) x.stop_gradient = False paddle.static.Print(input=x, **kargs) loss = paddle.mean(x) @@ -77,7 +81,7 @@ def test_all_parameters(self): prog = paddle.static.Program() with paddle.static.program_guard(prog, paddle.static.Program()): x = paddle.static.data( - 'x', shape=[-1, 3], dtype='float32', lod_level=1 + 'x', shape=[-1, 3], dtype=self.dtype, lod_level=1 ) x.stop_gradient = False @@ -136,9 +140,36 @@ def test_errors(self): ) class TestPrintOpGPU(TestPrintOpCPU): def setUp(self): + self.dtype = 'float32' self.place = paddle.CUDAPlace(0) self.x_tensor = base.core.LoDTensor() - tensor_np = np.random.random(size=(2, 3)).astype('float32') + tensor_np = np.random.random(size=(2, 3)).astype(self.dtype) + self.x_tensor.set(tensor_np, self.place) + self.x_tensor.set_recursive_sequence_lengths([[1, 1]]) + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), "core is not compiled with CUDA" +) +class TestPrintOpGPUFP16(TestPrintOpCPU): + def setUp(self): + self.dtype = 'float16' + self.place = paddle.CUDAPlace(0) + self.x_tensor = base.core.LoDTensor() + tensor_np = np.random.random(size=(2, 3)).astype(self.dtype) + self.x_tensor.set(tensor_np, self.place) + self.x_tensor.set_recursive_sequence_lengths([[1, 1]]) + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), "core is not compiled with CUDA" +) +class TestPrintOpGPUBFP16(TestPrintOpCPU): + def setUp(self): + self.dtype = 'bfloat16' + self.place = paddle.CUDAPlace(0) + self.x_tensor = base.core.LoDTensor() + tensor_np = convert_float_to_uint16(np.random.random(size=(2, 3))) self.x_tensor.set(tensor_np, self.place) self.x_tensor.set_recursive_sequence_lengths([[1, 1]]) diff --git a/test/xpu/op_test_xpu.py b/test/xpu/op_test_xpu.py index 7ea5359de5044..09ee428714bd6 100644 --- a/test/xpu/op_test_xpu.py +++ b/test/xpu/op_test_xpu.py @@ -183,8 +183,8 @@ def check_grad_with_place( if not core.is_float16_supported(place): return - if self.dtype == np.float16: - max_relative_error = 1.0 + if self.dtype == np.float16 or self.dtype == np.uint16: + max_relative_error = 0.1 return super().check_grad_with_place( place, inputs_to_check, diff --git a/test/xpu/test_adamw_op_xpu.py b/test/xpu/test_adamw_op_xpu.py index 1a777f2d23578..8584360837d79 100644 --- a/test/xpu/test_adamw_op_xpu.py +++ b/test/xpu/test_adamw_op_xpu.py @@ -84,7 +84,7 @@ def setUp(self): # Test AdamW Op with supplied attributes self.op_type = "adamw" self.init_shape() - self.dtype = self.in_type_str + self.dtype = self.in_type param = np.random.uniform(-1, 1, self.shape).astype(self.dtype) grad = np.random.uniform(-1, 1, self.shape).astype(self.dtype) moment1 = np.random.uniform(-1, 1, self.shape).astype("float32")