-
Notifications
You must be signed in to change notification settings - Fork 806
Open
Description
NEConvolution takes more time to process the i8 / i8 / f32 / f32 (src0 / src1 / src2 / dst) new case than f32 / f32 / f32 / f32 case.
Benchmark was run on Apple M2 Pro.
Benchmark results:
i8 / i8 / f32 / f32: 43 - 47 ms
f32 / f32 / f32 / f32: 37 - 41 ms
i8 / i8 / i32 / i8: 29 - 30 ms
Benchmark program:
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/utils/misc/MMappedFile.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/NEON/NEFunctions.h"
#include "tests/Utils.h"
#include "tests/NEON/Accessor.h"
#include <iostream>
#include <vector>
using namespace arm_compute;
int main(int argc, char *argv[]) {
DataLayout dl = DataLayout::NHWC;
TensorInfo srcTensorInfo = TensorInfo(TensorShape(64, 56, 56), 1, DataType::F32, dl);
TensorInfo weiTensorInfo = TensorInfo(TensorShape(64, 3, 3, 64), 1, DataType::F32, dl);
TensorInfo biaTensorInfo = TensorInfo(TensorShape(64), 1, DataType::F32, dl);
TensorInfo dstTensorInfo = TensorInfo(TensorShape(64, 56, 56), 1, DataType::F32, dl);
if(is_data_type_quantized(dt)) {
srcTensorInfo.set_quantization_info(QuantizationInfo(1.0));
}
PadStrideInfo strideInfo = PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::FLOOR);
auto status = NEConvolutionLayer::validate(&srcTensorInfo, &weiTensorInfo, &biaTensorInfo, &dstTensorInfo, strideInfo);
if(status.error_code() != ErrorCode::OK) {
std::cout << "ERROR: " << status.error_description().c_str() << std::endl;
exit(1);
}
std::cout << "PASSED VALIDATION" << std::endl;
Tensor srcTensor;
Tensor weiTensor;
Tensor biaTensor;
Tensor dstTensor;
srcTensor.allocator()->init(srcTensorInfo);
weiTensor.allocator()->init(weiTensorInfo);
biaTensor.allocator()->init(biaTensorInfo);
dstTensor.allocator()->init(dstTensorInfo);
NEConvolutionLayer conv;
conv.configure(&srcTensor, &weiTensor, &biaTensor, &dstTensor, strideInfo);
std::cout << "PASSED CONFIGURATION" << std::endl;
srcTensor.allocator()->allocate();
weiTensor.allocator()->allocate();
dstTensor.allocator()->allocate();
// warm-up
conv.run();
std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 100; i++) conv.run();
std::chrono::high_resolution_clock::time_point finish = std::chrono::high_resolution_clock::now();
uint64_t total_duration = std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count();
std::cout << "time: " << total_duration << std::endl;
srcTensor.allocator()->free();
weiTensor.allocator()->free();
biaTensor.allocator()->free();
dstTensor.allocator()->free();
return 0;
}
Metadata
Metadata
Assignees
Labels
No labels