Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

light api无法执行量化模型推理 #5896

Closed
wanghaoshuang opened this issue Apr 14, 2021 · 3 comments
Closed

light api无法执行量化模型推理 #5896

wanghaoshuang opened this issue Apr 14, 2021 · 3 comments

Comments

@wanghaoshuang
Copy link

PaddleSlim通过静态图量化训练产出模型ppyolo_tiny_x_coco.zip,并测试了两套推理API:

  1. full api加载__models____params__文件,推理正常;
  2. light api加载opt转换后的nb文件,推理执行失败。

Paddle-Lite版本:2.7
opt版本:2.7
操作系统:android

Run inference by using light api

使用Paddle-Lite 2.7版本的opt工具将量化训练产出模型转为『nb』文件,命令如下:

paddle_lite_opt --model_file=./__model__ \
                --param_file=./__params__  \
                --optimize_out_type=naive_buffer \
                --optimize_out=ppyolo_tiny_final \
                --valid_targets=arm

转换过程出现warning如下:

5b459b4e3999c7e34577c8af06124845

使用以下代码执行推理:

5fb958bc43d922b69ab8549c3f7cc9f8

447fe5149e55dde01876345593ad3d26

报错信息如下:

# 待 @余永佳 补充

Run inference by using full api with CxxConfig

用以下代码加载量化模型,可以正常执行。其中重点设置了cxx_config.set_valid_places.

// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle_api.h"
#include <fstream>
#include <limits>
#include <opencv2/opencv.hpp>
#include <stdio.h>
#include <sys/time.h>
#include <unistd.h>
#include <vector>

int WARMUP_COUNT = 5;
int REPEAT_COUNT = 10;
const int CPU_THREAD_NUM = 1;
const paddle::lite_api::PowerMode CPU_POWER_MODE =
    paddle::lite_api::PowerMode::LITE_POWER_HIGH;
const std::vector<int64_t> INPUT_SHAPE = {1, 3, 416, 416};
const std::vector<float> INPUT_MEAN = {0.485f, 0.456f, 0.406f};
const std::vector<float> INPUT_STD = {0.229f, 0.224f, 0.225f};
const float SCORE_THRESHOLD = 0.2f;

struct RESULT {
  std::string class_name;
  cv::Scalar fill_color;
  float score;
  float x;
  float y;
  float w;
  float h;
};

inline int64_t get_current_us() {
  struct timeval time;
  gettimeofday(&time, NULL);
  return 1000000LL * (int64_t)time.tv_sec + (int64_t)time.tv_usec;
}

std::vector<std::string> load_labels(const std::string &path) {
  std::ifstream file;
  std::vector<std::string> labels;
  file.open(path);
  while (file) {
    std::string line;
    std::getline(file, line);
    labels.push_back(line);
  }
  file.clear();
  file.close();
  return labels;
}

std::vector<cv::Scalar> generate_color_map(int num_of_classes) {
  std::vector<cv::Scalar> color_map = std::vector<cv::Scalar>(num_of_classes);
  for (int i = 0; i < num_of_classes; i++) {
    int j = 0;
    int label = i;
    int R = 0, G = 0, B = 0;
    while (label) {
      R |= (((label >> 0) & 1) << (7 - j));
      G |= (((label >> 1) & 1) << (7 - j));
      B |= (((label >> 2) & 1) << (7 - j));
      j++;
      label >>= 3;
    }
    color_map[i] = cv::Scalar(R, G, B);
  }
  return color_map;
}

void preprocess(cv::Mat &input_image, const std::vector<float> &input_mean,
                const std::vector<float> &input_std, int input_width,
                int input_height, float *input_data) {
  cv::Mat resize_image;
  cv::resize(input_image, resize_image, cv::Size(input_width, input_height), 0, 0);
  if (resize_image.channels() == 4) {
    cv::cvtColor(resize_image, resize_image, cv::COLOR_BGRA2RGB);
  }
  cv::Mat norm_image;
  resize_image.convertTo(norm_image, CV_32FC3, 1 / 255.f);
  // NHWC->NCHW
  int image_size = input_height * input_width;
  const float *image_data = reinterpret_cast<const float *>(norm_image.data);
  float *input_data_c0 = input_data;
  float *input_data_c1 = input_data + image_size;
  float *input_data_c2 = input_data + image_size * 2;
  int i = 0;
  float32x4_t vmean0 = vdupq_n_f32(input_mean[0]);
  float32x4_t vmean1 = vdupq_n_f32(input_mean[1]);
  float32x4_t vmean2 = vdupq_n_f32(input_mean[2]);
  float32x4_t vscale0 = vdupq_n_f32(1.0f / input_std[0]);
  float32x4_t vscale1 = vdupq_n_f32(1.0f / input_std[1]);
  float32x4_t vscale2 = vdupq_n_f32(1.0f / input_std[2]);
  for (; i < image_size - 3; i += 4) {
    float32x4x3_t vin3 = vld3q_f32(image_data);
    float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
    float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
    float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
    float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
    float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
    float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
    vst1q_f32(input_data_c0, vs0);
    vst1q_f32(input_data_c1, vs1);
    vst1q_f32(input_data_c2, vs2);
    image_data += 12;
    input_data_c0 += 4;
    input_data_c1 += 4;
    input_data_c2 += 4;
  }
  for (; i < image_size; i++) {
    *(input_data_c0++) = (*(image_data++) - input_mean[0]) / input_std[0];
    *(input_data_c1++) = (*(image_data++) - input_mean[1]) / input_std[1];
    *(input_data_c2++) = (*(image_data++) - input_mean[2]) / input_std[2];
  }
}

std::vector<RESULT> postprocess(const float *output_data, int64_t output_size, int input_width, int input_height, const std::vector<std::string> &label_list, const std::vector<cv::Scalar> &color_map, cv::Mat &result_image) {
  int image_width = result_image.cols;
  int image_height = result_image.rows;
  std::vector<RESULT> results;
  for (int64_t i = 0; i < output_size; i += 6) {
    // Class id
    auto class_id = static_cast<int>(round(output_data[i]));
    // Confidence score
    auto score = output_data[i + 1];
    if (score < SCORE_THRESHOLD)
      continue;
    RESULT object;
    object.class_name = class_id >= 0 && class_id < label_list.size()
                            ? label_list[class_id]
                            : "Unknow";
    object.fill_color = class_id >= 0 && class_id < color_map.size()
                            ? color_map[class_id]
                            : cv::Scalar(0, 0, 0);
    object.score = score;
    object.x = (output_data[i + 2] / input_width) * image_width;
    object.y = (output_data[i + 3] / input_height) * image_height;
    object.w = ((output_data[i + 4] - output_data[i + 2] + 1) / input_width) * image_width;
    object.h = ((output_data[i + 5] - output_data[i + 3] + 1) / input_height) * image_height;
    results.push_back(object);
    // Visualize results
    cv::Rect bounding_box =
        cv::Rect(object.x, object.y, object.w, object.h) &
        cv::Rect(0, 0, image_width - 1, image_height - 1);
    // Configure text size
    std::string text = object.class_name + " ";
    text += std::to_string(static_cast<int>(object.score * 100)) + "%";
    int font_face = cv::FONT_HERSHEY_PLAIN;
    double font_scale = 1.2f;
    float font_thickness = 1.0f;
    cv::Size text_size =
        cv::getTextSize(text, font_face, font_scale, font_thickness, nullptr);
    // Draw roi object, text, and background
    cv::rectangle(result_image, bounding_box, object.fill_color, 2);
    cv::rectangle(result_image,
                  cv::Point2d(bounding_box.x,
                              bounding_box.y - round(text_size.height * 1.25f)),
                  cv::Point2d(bounding_box.x + bounding_box.width, bounding_box.y),
                  object.fill_color, -1);
    cv::putText(result_image, text, cv::Point2d(bounding_box.x, bounding_box.y),
                font_face, font_scale, cv::Scalar(255, 255, 255), font_thickness);
  }
  return results;
}

cv::Mat process(cv::Mat &input_image, const std::vector<std::string> &label_list, const std::vector<cv::Scalar> &color_map, std::shared_ptr<paddle::lite_api::PaddlePredictor> &predictor) {
  // Preprocess image and fill the data of input tensor
  auto input_tensor = predictor->GetInput(0);
  input_tensor->Resize(INPUT_SHAPE);
  int input_width = INPUT_SHAPE[3];
  int input_height = INPUT_SHAPE[2];
  auto *input_data = input_tensor->mutable_data<float>();
  auto size_tensor = predictor->GetInput(1);
  size_tensor->Resize({1, 2});
  auto* size_data = size_tensor->mutable_data<int32_t>();
  size_data[0] = input_width;
  size_data[1] = input_height;
  double preprocess_start_time = get_current_us();
  preprocess(input_image, INPUT_MEAN, INPUT_STD, input_width, input_height,
             input_data);
  double preprocess_end_time = get_current_us();
  double preprocess_time =
      (preprocess_end_time - preprocess_start_time) / 1000.0f;

  double prediction_time;
  // Run predictor
  // warm up to skip the first inference and get more stable time, remove it in
  // actual products
  for (int i = 0; i < WARMUP_COUNT; i++) {
    predictor->Run();
  }
  // repeat to obtain the average time, set REPEAT_COUNT=1 in actual products
  double max_time_cost = 0.0f;
  double min_time_cost = std::numeric_limits<float>::max();
  double total_time_cost = 0.0f;
  for (int i = 0; i < REPEAT_COUNT; i++) {
    preprocess(input_image, INPUT_MEAN, INPUT_STD, input_width, input_height,
             input_data);
    auto start = get_current_us();
    predictor->Run();
    auto end = get_current_us();
    double cur_time_cost = (end - start) / 1000.0f;
    if (cur_time_cost > max_time_cost) {
      max_time_cost = cur_time_cost;
    }
    if (cur_time_cost < min_time_cost) {
      min_time_cost = cur_time_cost;
    }
    total_time_cost += cur_time_cost;
    prediction_time = total_time_cost / REPEAT_COUNT;
    printf("iter %d cost: %f ms\n", i, cur_time_cost);
  }
  printf("warmup: %d repeat: %d, average: %f ms, max: %f ms, min: %f ms\n",
         WARMUP_COUNT, REPEAT_COUNT, prediction_time, max_time_cost,
         min_time_cost);

  // Get the data of output tensor and postprocess to output detected objects
  auto output_tensor = predictor->GetOutput(0);
  const float *output_data = output_tensor->data<float>();
  int64_t output_size = 1;
  for (auto dim : output_tensor->shape()) {
    output_size *= dim;
  }
  cv::Mat result_image = input_image.clone();
  double postprocess_start_time = get_current_us();
  std::vector<RESULT> results = postprocess(output_data, output_size, input_width, input_height, label_list, color_map, result_image);
  double postprocess_end_time = get_current_us();
  double postprocess_time =
      (postprocess_end_time - postprocess_start_time) / 1000.0f;

  printf("results: %d\n", results.size());
  for (int i = 0; i < results.size(); i++) {
    printf("[%d] %s - %f %f,%f,%f,%f\n", i, results[i].class_name.c_str(),
           results[i].score, results[i].x, results[i].y, results[i].w,
           results[i].h);
  }
  printf("Preprocess time: %f ms\n", preprocess_time);
  printf("Prediction time: %f ms\n", prediction_time);
  printf("Postprocess time: %f ms\n\n", postprocess_time);
  return result_image;
}

int main(int argc, char **argv) {
  if (argc < 6) {
    printf("Usage: \n ./object_detection_demo model_dir model_type label_path image_path result_path");
    return -1;
  }
  std::string model_dir = argv[1];
  int model_type = atoi(argv[2]);
  std::string label_path = argv[3];
  std::string image_path = argv[4];
  std::string result_path = argv[5];
  std::shared_ptr<paddle::lite_api::PaddlePredictor> predictor = nullptr;

  // Load label and image file
  std::vector<std::string> label_list = load_labels(label_path);
  std::vector<cv::Scalar> color_map = generate_color_map(label_list.size());
  cv::Mat input_image = cv::imread(image_path, 1);

#ifdef USE_FULL_API
  // Run inference by using full api with CxxConfig
  paddle::lite_api::CxxConfig cxx_config;
  if (model_type) { // combined model
    cxx_config.set_model_file(model_dir + "/model");
    cxx_config.set_param_file(model_dir + "/params");
  } else {
    cxx_config.set_model_dir(model_dir);
  }
  cxx_config.set_threads(CPU_THREAD_NUM);
  cxx_config.set_power_mode(CPU_POWER_MODE);
  cxx_config.set_valid_places(
      {paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)},
       paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)}});
  // cxx_config.set_subgraph_model_cache_dir(model_dir.substr(0, model_dir.find_last_of("/")));
  try {
    predictor = paddle::lite_api::CreatePaddlePredictor(cxx_config);
    predictor->SaveOptimizedModel(
        model_dir, paddle::lite_api::LiteModelType::kNaiveBuffer);
  } catch (std::exception e) {
    std::cout << "An internal error occurred in PaddleLite(cxx config)."
              << std::endl;
  }
#endif

  // Run inference by using light api with MobileConfig
  paddle::lite_api::MobileConfig mobile_config;
  mobile_config.set_model_from_file(model_dir + ".nb");
  mobile_config.set_threads(CPU_THREAD_NUM);
  mobile_config.set_power_mode(CPU_POWER_MODE);
  // mobile_config.set_subgraph_model_cache_dir(model_dir.substr(0, model_dir.find_last_of("/")));
  try {
    predictor = paddle::lite_api::CreatePaddlePredictor(mobile_config);
    cv::Mat result_image = process(input_image, label_list, color_map, predictor);
    cv::imwrite(result_path, result_image);
  } catch (std::exception e) {
    std::cout << "An internal error occurred in PaddleLite(mobile config)."
              << std::endl;
  }

  return 0;
}

@paddle-bot-old
Copy link

您好,我们已经收到了您的问题,会安排技术人员尽快解答您的问题,请耐心等待。请您再次检查是否提供了清晰的问题描述、复现代码、环境&版本、报错信息等。同时,您也可以通过查看官网文档常见问题历史Issue来寻求解答。祝您生活愉快~

Hi! We've received your issue and please be patient to get responded. We will arrange technicians to answer your questions as soon as possible. Please make sure that you have posted enough message to demo your request. You may also check out the APIFAQ and Github Issue to get the answer.Have a nice day!

@yyjFish
Copy link

yyjFish commented Apr 14, 2021

[机型]:骁龙845
[cpu/gpu]:cpu
[问题描述]:Int8模型可以正常加载,但是运行的时候发生crash;

补充:FP16模型可以正常加载并执行。

————————————————————crash log——————————————————————
2021-04-14 11:24:22.085 27369-27369/? E/libc: Access denied finding property "ro.vendor.net.upload.benchmark.default"
2021-04-14 11:24:22.160 27373-27373/? E/libc: Access denied finding property "ro.vendor.net.upload.benchmark.default"
2021-04-14 11:24:25.656 27299-27444/com.baidu.graph.lensdemo A/libc: Fatal signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x0 in tid 27444 (.graph.lensdemo), pid 27299 (.graph.lensdemo)
2021-04-14 11:24:25.756 27475-27475/? A/DEBUG: Softversion: PD1805_A_8.11.5
2021-04-14 11:24:25.756 27475-27475/? A/DEBUG: Time: 2021-04-14 11:24:25
2021-04-14 11:24:25.756 27475-27475/? A/DEBUG: *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
2021-04-14 11:24:25.756 27475-27475/? A/DEBUG: Build fingerprint: 'vivo/PD1805/PD1805:10/QP1A.190711.020/compiler02252051:user/release-keys'
2021-04-14 11:24:25.756 27475-27475/? A/DEBUG: Revision: '0'
2021-04-14 11:24:25.756 27475-27475/? A/DEBUG: ABI: 'arm'
2021-04-14 11:24:25.756 27475-27475/? A/DEBUG: Timestamp: 2021-04-14 11:24:25+0800
2021-04-14 11:24:25.756 27475-27475/? A/DEBUG: pid: 27299, tid: 27444, name: .graph.lensdemo >>> com.baidu.graph.lensdemo <<<
2021-04-14 11:24:25.757 27475-27475/? A/DEBUG: uid: 10350
2021-04-14 11:24:25.757 27475-27475/? A/DEBUG: signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x0
2021-04-14 11:24:25.757 27475-27475/? A/DEBUG: Cause: null pointer dereference
2021-04-14 11:24:25.757 27475-27475/? A/DEBUG: r0 0000000b r1 00000018 r2 00000001 r3 00000000
2021-04-14 11:24:25.757 27475-27475/? A/DEBUG: r4 00000018 r5 00000000 r6 00000000 r7 c2e68950
2021-04-14 11:24:25.757 27475-27475/? A/DEBUG: r8 bb2ffc40 r9 00000000 r10 bb2ffc40 r11 bb2ffc60
2021-04-14 11:24:25.757 27475-27475/? A/DEBUG: ip 00000000 sp c4b94650 lr c5f1aa93 pc c5f1aad6
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: backtrace:
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #00 pc 000f5ad6 /data/app/com.baidu.graph.lensdemo-ZLO6KtkVCHX8xAVIaQ_0zg==/lib/arm/libmml_framework.so (BuildId: 56ae870d51afebff408996e9c19296521b07f431)
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #1 pc 000d46df /data/app/com.baidu.graph.lensdemo-ZLO6KtkVCHX8xAVIaQ_0zg==/lib/arm/libmml_framework.so (BuildId: 56ae870d51afebff408996e9c19296521b07f431)
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #2 pc 000d46b5 /data/app/com.baidu.graph.lensdemo-ZLO6KtkVCHX8xAVIaQ_0zg==/lib/arm/libmml_framework.so (paddle::lite::RuntimeProgram::Run()+20) (BuildId: 56ae870d51afebff408996e9c19296521b07f431)
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #3 pc 000b2f19 /data/app/com.baidu.graph.lensdemo-ZLO6KtkVCHX8xAVIaQ_0zg==/lib/arm/libmml_framework.so (PaddleLiteMachinePredictor::predict()+12) (BuildId: 56ae870d51afebff408996e9c19296521b07f431)
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #4 pc 00063df7 /data/app/com.baidu.graph.lensdemo-ZLO6KtkVCHX8xAVIaQ_0zg==/lib/arm/libwalle_sdk.so (BuildId: 75a19959af0742d60cb1c316a6f9ac6d5e3cdb72)
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #5 pc 000648a9 /data/app/com.baidu.graph.lensdemo-ZLO6KtkVCHX8xAVIaQ_0zg==/lib/arm/libwalle_sdk.so (BuildId: 75a19959af0742d60cb1c316a6f9ac6d5e3cdb72)
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #6 pc 000f0a19 /data/app/com.baidu.graph.lensdemo-ZLO6KtkVCHX8xAVIaQ_0zg==/lib/arm/libwalle_sdk.so (BuildId: 75a19959af0742d60cb1c316a6f9ac6d5e3cdb72)
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #7 pc 000f1b83 /data/app/com.baidu.graph.lensdemo-ZLO6KtkVCHX8xAVIaQ_0zg==/lib/arm/libwalle_sdk.so (BuildId: 75a19959af0742d60cb1c316a6f9ac6d5e3cdb72)
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #8 pc 000a0e8b /apex/com.android.runtime/lib/bionic/libc.so (__pthread_start(void*)+20) (BuildId: 40c385d66551117e4460b96805e3b500)
2021-04-14 11:24:25.759 27475-27475/? A/DEBUG: #9 pc 00058123 /apex/com.android.runtime/lib/bionic/libc.so (__start_thread+30) (BuildId: 40c385d66551117e4460b96805e3b500)
2021-04-14 11:24:26.938 27541-27541/? E/.graph.lensdem: Unknown bits set in runtime_flags: 0x8000
2021-04-14 11:24:27.010 27541-27572/com.baidu.graph.lensdemo E/VPerformance: Fail to get file list com.baidu.graph.lensdemo
2021-04-14 11:24:27.010 27541-27572/com.baidu.graph.lensdemo E/VPerformance: Fail to get file list com.baidu.graph.lensdemo
2021-04-14 11:24:27.012 27541-27572/com.baidu.graph.lensdemo E/VPerformance: Fail to get file list oat

@wanghaoshuang
Copy link
Author

@xingjing1

@qili93 qili93 closed this as completed Feb 5, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants