Description
平台(如果交叉编译请再附上交叉编译目标平台):
Platform(Include target platform as well if cross-compiling):
x86 ubuntu22.04
Github版本:
Github Version:
MNN2.8.1
pytorch模型转换为onnx之后(设置了动态输入),转换为mnn进行推理,其中有三个输入,两个输入是固定不变的,一个输入的size每次增加,本模型需要循环推理,第一次推理的输出结果可以与原始模型对应上,第二次resizeSession之后,模型推理的结果不符合预期。代码如下:
`int Decoder::deInfer(const std::vector src, const std::vector src_mask, int input_h, std::vector<std::vector<int32_t>> ids, float* next_token_logits){
#if MODULE
int input_ids_size = (int)(ids[0].size());
std::vector<int32_t> idxx;
for (int i=0; i<ids.size(); i++){
for (int j=0; j<ids[i].size(); j++){
idxx.push_back(ids[i][j]);
}
}
LOG(INFO) << "idxx.size(): " << idxx.size();
for (int i=0; i<idxx.size(); i++){
std::cout<< idxx[i] <<" ";
}
std::cout<<std::endl;
LOG(INFO) << "input_h: " << input_h << " input_ids_size: " << input_ids_size;
auto input_0 = MNN::Express::_Input({2, 13, input_h, 256}, MNN::Express::NCHW, halide_type_of<float>());
auto input_1 = MNN::Express::_Input({2, 13, input_h}, MNN::Express::NCHW, halide_type_of<int>());
auto input_2 = MNN::Express::_Input({2, input_ids_size}, MNN::Express::NCHW, halide_type_of<int32_t>());
::memcpy(input_0->writeMap<float>(), src.data(), src.size() * sizeof(float));
::memcpy(input_1->writeMap<bool>(), src_mask.data(), src_mask.size() * sizeof(bool));
::memcpy(input_2->writeMap<int32_t>(), idxx.data(), idxx.size() * sizeof(int32_t));
std::vector<VARP> outputs;
try {
std::ostringstream fileNameOs;
outputs = module->onForward({input_0, input_1, input_2});
std::ostringstream dimInfo;
auto info = outputs[0]->getInfo();
for (int d=0; d<info->dim.size(); ++d) {
dimInfo << info->dim[d] << "_";
}
auto fileName = fileNameOs.str();
MNN_PRINT("Output Name: %s, Dim: %s\n", fileName.c_str(), dimInfo.str().c_str());
// module->traceOrOptimize(MNN::Interpreter::Session_Resize_Fix);
auto ptr = outputs[0]->readMap<float>();
for (int i=0; i<input_ids_size*2; i++){
LOG(INFO) << "ptr[0 + "<<i<<"*46]: " << ptr[0 + i*46] << " ptr[1 + "<<i<<"*46]: " << ptr[1 + i*46] << " ptr[2 + "<<i<<"*46]: " << ptr[2 + i*46] << " ptr[3 + "<<i<<"*46]: " << ptr[3 + i*46] << " ptr[4 + i*46]: " << ptr[4 + i*46];
}
memcpy(next_token_logits, ptr+(input_ids_size-1)*46, 46*sizeof(float));
memcpy(next_token_logits+46, ptr+(input_ids_size*2-1)*46, 46*sizeof(float));
}
catch (std::exception const &e) {
LOG(ERROR) << "Error when run decoder onnx forword: " << (e.what());
}
#else
if (!m_mnnNet_decoder){
printf("error: CFaceDetection::FaceDetectImp(), m_mnnNet_det is null.\n");
cout<< 1 <<endl;
return -1;
}
int input_ids_size = (int)(ids[0].size());
m_mnnNet_decoder->resizeTensor(input_img, {2, 13, input_h, 256});
m_mnnNet_decoder->resizeTensor(input_mask, {2, 13, input_h});
m_mnnNet_decoder->resizeTensor(input_ids, {2, input_ids_size});
m_mnnNet_decoder->resizeSession(m_mnnSession_decoder);
m_mnnNet_decoder->resizeTensor(output_vector, {2, input_ids_size, 46});
int i_modelW2 = input_img->width();
int i_modelH2 = input_img->height();
int i_modelC2 = input_img->channel();
int i_modelB2 = input_img->batch();
int i2_modelW2 = input_mask->width();
int i2_modelH2 = input_mask->height();
int i2_modelC2 = input_mask->channel();
int i2_modelB2 = input_mask->batch();
int m_modelW2 = input_ids->width();
int m_modelH2 = input_ids->height();
int m_modelC2 = input_ids->channel();
int m_modelB2 = input_ids->batch();
int o_modelW2 = output_vector->width();
int o_modelH2 = output_vector->height();
int o_modelC2 = output_vector->channel();
int o_modelB2 = output_vector->batch();
LOG(INFO) << i_modelB2 << " " << i_modelC2 << " " << i_modelH2 << " " << i_modelW2;
LOG(INFO) << i2_modelB2 << " " << i2_modelC2 << " " << i2_modelH2 << " " << i_modelB2;
LOG(INFO) << m_modelB2 << " " << m_modelC2 << " " << m_modelH2 << " " << m_modelW2;
LOG(INFO) << o_modelB2 << " " << o_modelC2 << " " << o_modelH2 << " " << o_modelW2;
auto input_img_buffer = new Tensor(input_img, Tensor::CAFFE);
for (int i=0; i<2*13*input_h*256; i++){
input_img_buffer->host<float>()[i] = src[i];
// input_img_buffer->host<float>()[i] = 0.0;
}
input_img->copyFromHostTensor(input_img_buffer);
// auto input_imgxx = new Tensor(input_img, Tensor::CAFFE);
// input_img->copyToHostTensor(input_imgxx);
// auto dataid1 = input_imgxx->host<float>();
// LOG(INFO) <<"---------------------------------------------";
// for (int i=0; i<5; i++){
// LOG(INFO) << dataid1[i];
// }
// input mask
auto input_mask_buffer = new Tensor(input_mask, Tensor::CAFFE);
for (int i=0; i<2*13*input_h; i++){
input_mask_buffer->host<int>()[i] = src_mask[i];
// input_mask_buffer->host<bool>()[i] = false;
}
input_mask->copyFromHostTensor(input_mask_buffer);
// auto input_maskxx = new Tensor(input_ids, Tensor::CAFFE);
// input_mask->copyToHostTensor(input_maskxx);
// auto dataid2 = input_maskxx->host<int>();
// LOG(INFO) <<"---------------------------------------------";
// for (int i=0; i<5; i++){
// LOG(INFO) << dataid2[i];
// }
// input ids
auto input_ids_buffer = new Tensor(input_ids, Tensor::CAFFE);
for (int i=0; i<ids[0].size(); i++){
input_ids_buffer->host<int32_t>()[i] = ids[0][i];
}
for (int i=0; i<ids[1].size(); i++){
input_ids_buffer->host<int32_t>()[i+ids[0].size()] = ids[1][i];
}
input_ids->copyFromHostTensor(input_ids_buffer);
// auto input_idxx = new Tensor(input_ids, Tensor::CAFFE);
// input_ids->copyToHostTensor(input_idxx);
// auto dataid = input_idxx->host<int>();
// LOG(INFO) <<"---------------------------------------------";
// for (int i=0; i<input_ids_size*2; i++){
// LOG(INFO) << dataid[i];
// }
m_mnnNet_decoder->runSession(m_mnnSession_decoder);
auto nchwTensor_feature = new Tensor(output_vector, Tensor::CAFFE);
output_vector->copyToHostTensor(nchwTensor_feature);
auto data_feature = nchwTensor_feature->host<float>();
for (int i=0; i<input_ids_size*2; i++){
LOG(INFO) << "data_feature[0 + "<<i<<"*46]: " << data_feature[0 + i*46] << " data_feature[1 + "<<i<<"*46]: " << data_feature[1 + i*46] << " data_feature[2 + "<<i<<"*46]: " << data_feature[2 + i*46] << " data_feature[3 + "<<i<<"*46]: " << data_feature[3 + i*46] << " data_feature[4 + i*46]: " << data_feature[4 + i*46];
}
memcpy(next_token_logits, data_feature+(input_ids_size-1)*46, 46*sizeof(float));
memcpy(next_token_logits+46, data_feature+(input_ids_size*2-1)*46, 46*sizeof(float));
delete input_img_buffer;
delete input_mask_buffer;
delete input_ids_buffer;
delete nchwTensor_feature;
#endif
return 0;
}
`
使用Session和Module两种推理方式,结果都不对
模型导出onnx代码如下:
torch.onnx.export(decoder, (src[0].to(device), src_mask[0].to(device), input_ids.to(device)), "decoder_0515.onnx", input_names=["input1","input2","input3"], output_names=["output"], dynamic_axes={"input1":{2:"input_width"},"input2":{2:"input_width"}, "input3":{1:"length"}}, verbose=True, opset_version=19)