forked from opencv/opencv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
onnx_importer.cpp
4179 lines (3666 loc) · 158 KB
/
onnx_importer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/layer_reg.private.hpp>
#include <opencv2/core/utils/fp_control_utils.hpp>
#include <opencv2/core/utils/logger.defines.hpp>
#undef CV_LOG_STRIP_LEVEL
#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
#include <opencv2/core/utils/logger.hpp>
#include <opencv2/core/utils/configuration.private.hpp>
#ifdef HAVE_PROTOBUF
#include <iostream>
#include <fstream>
#include <string>
#include <limits>
#include <algorithm>
#if defined _MSC_VER && _MSC_VER < 1910/*MSVS 2017*/
#pragma warning(push)
#pragma warning(disable: 4503) // decorated name length exceeded, name was truncated
#endif
#if defined(__GNUC__) && __GNUC__ >= 5
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
#endif
#include "opencv-onnx.pb.h"
#if defined(__GNUC__) && __GNUC__ >= 5
#pragma GCC diagnostic pop
#endif
#include "onnx_graph_simplifier.hpp"
#endif
namespace cv {
namespace dnn {
CV__DNN_INLINE_NS_BEGIN
extern bool DNN_DIAGNOSTICS_RUN;
#ifdef HAVE_PROTOBUF
class ONNXLayerHandler;
template <typename T>
static T getScalarFromMat(Mat m)
{
CV_Assert(m.total() == 1);
return m.at<T>(0);
}
class ONNXImporter
{
FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
opencv_onnx::ModelProto model_proto;
struct LayerInfo {
int layerId;
int outputId;
int depth;
LayerInfo(int _layerId = 0, int _outputId = 0, int _depth = CV_32F)
:layerId(_layerId), outputId(_outputId), depth(_depth) {}
};
struct TensorInfo {
int real_ndims;
TensorInfo(int _real_ndims = 0) : real_ndims(_real_ndims) {}
};
std::map<std::string, Mat> getGraphTensors(
const opencv_onnx::GraphProto& graph_proto);
Mat getBlob(const opencv_onnx::NodeProto& node_proto, int index);
Mat getBlob(const std::string& input_name);
TensorInfo getBlobExtraInfo(const opencv_onnx::NodeProto& node_proto, int index);
TensorInfo getBlobExtraInfo(const std::string& input_name);
LayerParams getLayerParams(const opencv_onnx::NodeProto& node_proto);
void addConstant(const std::string& name, const Mat& blob);
void addLayer(LayerParams& layerParams,
const opencv_onnx::NodeProto& node_proto);
void setParamsDtype(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
const std::string& input, size_t n);
void lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size);
void lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n);
std::string lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n);
std::string lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto,
int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name,
const int index);
void lstm_add_transform(int num_directions, int batch_size, int hidden_size,
int index, const std::string& input_name, const std::string& output_name);
public:
ONNXImporter(Net& net, const char *onnxFile);
ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer);
void populateNet();
protected:
std::unique_ptr<ONNXLayerHandler> layerHandler;
Net& dstNet;
opencv_onnx::GraphProto graph_proto;
std::string framework_name;
std::map<std::string, Mat> constBlobs;
std::map<std::string, TensorInfo> constBlobsExtraInfo;
std::map<std::string, MatShape> outShapes; // List of internal blobs shapes.
bool hasDynamicShapes; // Whether the model has inputs with dynamic shapes
typedef std::map<std::string, MatShape>::iterator IterShape_t;
std::map<std::string, LayerInfo> layer_id;
typedef std::map<std::string, LayerInfo>::iterator IterLayerId_t;
typedef std::map<std::string, LayerInfo>::const_iterator ConstIterLayerId_t;
void handleNode(const opencv_onnx::NodeProto& node_proto);
private:
friend class ONNXLayerHandler;
typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
typedef std::map<std::string, ONNXImporterNodeParser> DispatchMap;
typedef std::map<std::string, DispatchMap> DomainDispatchMap;
DomainDispatchMap domain_dispatch_map;
std::string getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto);
const DispatchMap& getDispatchMap(const opencv_onnx::NodeProto& node_proto);
void buildDispatchMap_ONNX_AI(int opset_version);
void buildDispatchMap_COM_MICROSOFT(int opset_version);
// Domain: 'ai.onnx' (default)
// URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md
void parseArg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseMaxUnpool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseGlobalPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseSlice (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseNeg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseConstant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseLSTM (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseGRU (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseImageScaler (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseClip (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseElu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseTanh (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseAbs (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parsePRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseLRN (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseBatchNormalization (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseConvTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseSqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseFlatten (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseUnsqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseExpand (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseReshape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parsePad (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseShape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseConstantFill (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseElementWise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseDepthToSpace (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseRange (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseScatter (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseTile (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseLayerNorm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
// Domain: com.microsoft
// URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
void parseQuantDequant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseQConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseQMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseQEltwise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseQLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseQSigmoid (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseQAvgPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseQConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseQGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseQSoftmax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
// '???' domain or '???' layer type
void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
int onnx_opset; // OperatorSetIdProto for 'onnx' domain
std::map<std::string, int> onnx_opset_map; // map from OperatorSetIdProto
void parseOperatorSet();
const std::string str_domain_ai_onnx = "ai.onnx";
bool useLegacyNames;
bool getParamUseLegacyNames()
{
bool param = utils::getConfigurationParameterBool("OPENCV_DNN_ONNX_USE_LEGACY_NAMES", false);
return param;
}
std::string extractNodeName(const opencv_onnx::NodeProto& node_proto);
};
class ONNXLayerHandler : public detail::LayerHandler
{
public:
explicit ONNXLayerHandler(ONNXImporter* importer_);
void fillRegistry(const opencv_onnx::GraphProto& net);
protected:
ONNXImporter* importer;
};
ONNXLayerHandler::ONNXLayerHandler(ONNXImporter* importer_) : importer(importer_){}
void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net)
{
int layersSize = net.node_size();
for (int li = 0; li < layersSize; li++) {
const opencv_onnx::NodeProto &node_proto = net.node(li);
const std::string& name = node_proto.output(0);
const std::string& type = node_proto.op_type();
const std::string& layer_type_domain = importer->getLayerTypeDomain(node_proto);
const auto& dispatch = importer->getDispatchMap(node_proto);
if (dispatch.find(type) == dispatch.end())
{
addMissing(name, cv::format("%s.%s", layer_type_domain.c_str(), type.c_str()));
}
}
printMissing();
}
ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
: layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
, dstNet(net)
, onnx_opset(0)
, useLegacyNames(getParamUseLegacyNames())
{
hasDynamicShapes = false;
CV_Assert(onnxFile);
CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);
std::fstream input(onnxFile, std::ios::in | std::ios::binary);
if (!input)
{
CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile));
}
if (!model_proto.ParseFromIstream(&input))
{
CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile));
}
populateNet();
}
ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
: layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
, dstNet(net)
, onnx_opset(0)
, useLegacyNames(getParamUseLegacyNames())
{
hasDynamicShapes = false;
CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
struct _Buf : public std::streambuf
{
_Buf(const char* buffer, size_t sizeBuffer)
{
char* p = const_cast<char*>(buffer);
setg(p, p, p + sizeBuffer);
}
};
_Buf buf(buffer, sizeBuffer);
std::istream input(&buf);
if (!model_proto.ParseFromIstream(&input))
CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array.");
populateNet();
}
inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey)
{
if (layerParams.has(oldKey)) {
layerParams.set(newKey, layerParams.get(oldKey));
layerParams.erase(oldKey);
}
}
static
void dumpValueInfoProto(int i, const opencv_onnx::ValueInfoProto& valueInfoProto, const std::string& prefix)
{
CV_Assert(valueInfoProto.has_name());
CV_Assert(valueInfoProto.has_type());
const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
CV_Assert(typeProto.has_tensor_type());
const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
CV_Assert(tensor.has_shape());
const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
int dim_size = tensorShape.dim_size();
CV_CheckGE(dim_size, 0, "");
MatShape shape(dim_size);
for (int j = 0; j < dim_size; ++j)
{
const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
if (dimension.has_dim_param())
{
CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
}
// https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
if (dimension.has_denotation())
{
CV_LOG_INFO(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
}
shape[j] = dimension.dim_value();
}
CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << " as '" << valueInfoProto.name() << "'] shape=" << toString(shape));
}
static
void dumpTensorProto(int i, const opencv_onnx::TensorProto& tensorProto, const std::string& prefix)
{
if (utils::logging::getLogLevel() < utils::logging::LOG_LEVEL_VERBOSE)
return;
int dim_size = tensorProto.dims_size();
CV_CheckGE(dim_size, 0, "");
MatShape shape(dim_size);
for (int j = 0; j < dim_size; ++j)
{
int sz = static_cast<int>(tensorProto.dims(j));
shape[j] = sz;
}
CV_LOG_VERBOSE(NULL, 0, "DNN/ONNX: " << prefix << "[" << i << " as '" << tensorProto.name() << "'] shape=" << toString(shape) << " data_type=" << (int)tensorProto.data_type());
}
void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto)
{
if (!tensor_proto.raw_data().empty()) {
delete tensor_proto.release_raw_data();
}
}
void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
std::vector<Mat>& outputs)
{
Ptr<Layer> layer = LayerFactory::createLayerInstance(params.type, params);
CV_Assert((bool)layer);
std::vector<MatShape> inpShapes(inputs.size());
int ddepth = params.get<int>("depth", CV_32F);
for (size_t i = 0; i < inputs.size(); ++i)
{
inpShapes[i] = shape(inputs[i]);
if (i > 0 && ddepth != inputs[i].depth())
CV_Error(Error::StsNotImplemented, "Mixed input data types.");
// Quantize and Dequantize layer have different output type than input.
if (params.type != "Quantize" && params.type != "Dequantize")
ddepth = inputs[i].depth();
}
std::vector<MatShape> outShapes, internalShapes;
layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes);
std::vector<Mat> internals(internalShapes.size());
outputs.resize(outShapes.size());
for (size_t i = 0; i < outShapes.size(); ++i)
outputs[i].create(outShapes[i], ddepth);
for (size_t i = 0; i < internalShapes.size(); ++i)
internals[i].create(internalShapes[i], ddepth);
layer->finalize(inputs, outputs);
layer->forward(inputs, outputs, internals);
}
std::map<std::string, Mat> ONNXImporter::getGraphTensors(
const opencv_onnx::GraphProto& graph_proto)
{
std::map<std::string, Mat> layers_weights;
for (int i = 0; i < graph_proto.initializer_size(); i++)
{
const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i);
dumpTensorProto(i, tensor_proto, "initializer");
Mat mat = getMatFromTensor(tensor_proto);
releaseONNXTensor(const_cast<opencv_onnx::TensorProto&>(tensor_proto)); // drop already loaded data
if (DNN_DIAGNOSTICS_RUN && mat.empty())
continue;
layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
constBlobsExtraInfo.insert(std::make_pair(tensor_proto.name(), TensorInfo(tensor_proto.dims_size())));
}
return layers_weights;
}
static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
std::vector<int32_t> dst(src.size());
convertInt64ToInt32(src, dst, src.size());
return DictValue::arrayInt(&dst[0], src.size());
}
static DictValue parseStr(const ::google::protobuf::RepeatedPtrField< ::std::string>& src) {
return DictValue::arrayString(src.begin(), static_cast<int>(src.size()));
}
LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto)
{
LayerParams lp;
for(int i = 0; i < node_proto.attribute_size(); i++)
{
opencv_onnx::AttributeProto attribute_proto = node_proto.attribute(i);
std::string attribute_name = attribute_proto.name();
try
{
if(attribute_name == "kernel_shape")
{
CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
lp.set("kernel_size", parse(attribute_proto.ints()));
}
else if(attribute_name == "strides")
{
CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
lp.set("stride", parse(attribute_proto.ints()));
}
else if(attribute_name == "pads")
{
if (node_proto.op_type() == "Pad")
{
// Padding layer.
// Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
// We need to shuffle it to begin0, end0, begin1, end1, ...
CV_Assert(attribute_proto.ints_size() % 2 == 0);
const int dims = attribute_proto.ints_size() / 2;
std::vector<int32_t> paddings;
paddings.reserve(attribute_proto.ints_size());
for (int i = 0; i < dims; ++i)
{
paddings.push_back(attribute_proto.ints(i));
paddings.push_back(attribute_proto.ints(dims + i));
}
lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
}
else
{
// Convolution or pooling.
CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
lp.set("pad", parse(attribute_proto.ints()));
}
}
else if(attribute_name == "auto_pad")
{
if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") {
lp.set("pad_mode", "SAME");
}
else if (attribute_proto.s() == "VALID") {
lp.set("pad_mode", "VALID");
}
}
else if(attribute_name == "dilations")
{
CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
lp.set("dilation", parse(attribute_proto.ints()));
}
else if(attribute_name == "activations" && node_proto.op_type() == "LSTM")
{
lp.set(attribute_name, parseStr(attribute_proto.strings()));
}
else if (attribute_proto.has_i())
{
::google::protobuf::int64 src = attribute_proto.i();
if (src < std::numeric_limits<int32_t>::min() || src > std::numeric_limits<int32_t>::max())
CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range");
else
lp.set(attribute_name, saturate_cast<int32_t>(src));
}
else if (attribute_proto.has_f())
{
lp.set(attribute_name, attribute_proto.f());
}
else if (attribute_proto.has_s())
{
lp.set(attribute_name, attribute_proto.s());
}
else if (attribute_proto.floats_size() > 0)
{
lp.set(attribute_name, DictValue::arrayReal(
attribute_proto.floats().data(), attribute_proto.floats_size()));
}
else if (attribute_proto.ints_size() > 0)
{
lp.set(attribute_name, parse(attribute_proto.ints()));
}
else if (attribute_proto.has_t())
{
opencv_onnx::TensorProto tensor = attribute_proto.t();
Mat blob = getMatFromTensor(tensor);
lp.blobs.push_back(blob);
lp.set("original_dims_of_mat", tensor.dims_size());
}
else if (attribute_proto.has_g())
{
CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str()));
}
else if (attribute_proto.graphs_size() > 0)
{
CV_Error(Error::StsNotImplemented,
cv::format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported",
attribute_name.c_str(), attribute_proto.graphs_size())
);
}
else if (attribute_proto.strings_size() > 0)
{
std::string msg = cv::format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported",
attribute_name.c_str(), attribute_proto.strings_size());
CV_LOG_ERROR(NULL, msg);
for (int i = 0; i < attribute_proto.strings_size(); i++)
{
CV_LOG_ERROR(NULL, " Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'");
}
CV_Error(Error::StsNotImplemented, msg);
}
else if (attribute_proto.tensors_size() > 0)
{
CV_Error(Error::StsNotImplemented,
cv::format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported",
attribute_name.c_str(), attribute_proto.tensors_size())
);
}
else
{
CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str()));
}
}
catch (const cv::Exception& e)
{
CV_UNUSED(e);
if (DNN_DIAGNOSTICS_RUN)
{
CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem with processing attributes for node " << node_proto.name() << " Attribute " << attribute_name.c_str()
);
continue;
}
throw;
}
}
return lp;
}
Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, int index)
{
CV_Assert(index < node_proto.input_size());
const std::string& input_name = node_proto.input(index);
return getBlob(input_name);
}
Mat ONNXImporter::getBlob(const std::string& input_name)
{
std::map<std::string, Mat>::const_iterator constBlob = constBlobs.find(input_name);
if (constBlob == constBlobs.end())
{
CV_Error(Error::StsBadArg, std::string("Blob ") + input_name + " not found in const blobs");
}
return constBlob->second;
}
ONNXImporter::TensorInfo ONNXImporter::getBlobExtraInfo(const opencv_onnx::NodeProto &node_proto, int index)
{
CV_Assert(index < node_proto.input_size());
const std::string& input_name = node_proto.input(index);
return getBlobExtraInfo(input_name);
}
ONNXImporter::TensorInfo ONNXImporter::getBlobExtraInfo(const std::string& input_name)
{
std::map<std::string, TensorInfo>::const_iterator constBlobExtraInfo = constBlobsExtraInfo.find(input_name);
if (constBlobExtraInfo == constBlobsExtraInfo.end())
{
CV_Error(Error::StsBadArg, std::string("Blob ") + input_name + " not found in const blobs of extra info");
}
return constBlobExtraInfo->second;
}
void ONNXImporter::addLayer(LayerParams& layerParams,
const opencv_onnx::NodeProto& node_proto)
{
int depth = layerParams.get<int>("depth", CV_32F);
int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams);
for (int i = 0; i < node_proto.output_size(); ++i)
{
const std::string& output_name = node_proto.output(i);
if (!output_name.empty())
{
layer_id.insert(std::make_pair(output_name, LayerInfo(id, i, depth)));
}
}
std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
int inpNum = 0;
for (int j = 0; j < node_proto.input_size(); j++)
{
const std::string& input_name = node_proto.input(j);
IterLayerId_t layerId = layer_id.find(input_name);
if (layerId != layer_id.end()) {
dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum);
++inpNum;
// Collect input shapes.
IterShape_t shapeIt = outShapes.find(input_name);
CV_Assert(shapeIt != outShapes.end());
layerInpShapes.push_back(shapeIt->second);
}
}
// Compute shape of output blob for this layer.
Ptr<Layer> layer = dstNet.getLayer(id); // FIXIT: avoid instantiation of layers during the import stage
layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i)
{
const std::string& output_name = node_proto.output(i);
if (!output_name.empty())
{
outShapes[node_proto.output(i)] = layerOutShapes[i];
}
}
}
/** @brief Make N copies of input layer and set them as input to node_proto.
* @param prefix prefix of new layers' names
* @param node_proto node which will contain all copies as inputs
* @param input name of the node to copy
* @param n number of copies
*/
void ONNXImporter::expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
const std::string& input, size_t n)
{
std::vector<std::string> input_names;
input_names.reserve(n);
for (size_t j = 0; j < n; j++)
{
LayerParams copyLP;
copyLP.name = format("%s/copy_%zu", prefix.c_str(), j);
copyLP.type = "Identity";
CV_Assert((layer_id.find(copyLP.name) == layer_id.end()) &&
"Couldn't copy the node: generated name already exists in the graph.");
input_names.push_back(copyLP.name);
node_proto.set_input(0, input);
node_proto.set_output(0, copyLP.name);
addLayer(copyLP, node_proto);
}
node_proto.clear_input();
for (size_t i = 0; i < input_names.size(); i++)
{
node_proto.add_input(input_names[i]);
}
}
void ONNXImporter::addConstant(const std::string& name, const Mat& blob)
{
CV_LOG_DEBUG(NULL, "DNN/ONNX: add constant '" << name << "' shape=" << toString(shape(blob)) << ": " << toString(blob));
constBlobs.insert(std::make_pair(name, blob));
outShapes.insert(std::make_pair(name, shape(blob)));
}
void ONNXImporter::parseOperatorSet()
{
int ir_version = model_proto.has_ir_version() ? static_cast<int>(model_proto.ir_version()) : -1;
if (ir_version < 3)
return;
int opset_size = model_proto.opset_import_size();
if (opset_size <= 0)
{
CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information")
return;
}
for (int i = 0; i < opset_size; ++i)
{
const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i);
const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string();
int version = opset_entry.has_version() ? opset_entry.version() : -1;
if (domain.empty() || domain == str_domain_ai_onnx)
{
// ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md
onnx_opset = std::max(onnx_opset, version);
onnx_opset_map[str_domain_ai_onnx] = onnx_opset;
}
else
{
CV_LOG_DEBUG(NULL, "DNN/ONNX: using non-standard ONNX opset[" << i << "]: domain='" << domain << "' version=" << version);
onnx_opset_map[domain] = onnx_opset;
}
}
CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset);
buildDispatchMap_ONNX_AI(onnx_opset);
for (const auto& pair : onnx_opset_map)
{
if (pair.first == str_domain_ai_onnx)
{
continue; // done above
}
else if (pair.first == "com.microsoft")
{
buildDispatchMap_COM_MICROSOFT(pair.second);
}
else
{
CV_LOG_INFO(NULL, "DNN/ONNX: unknown domain='" << pair.first << "' version=" << pair.second << ". No dispatch map, you may need to register 'custom' layers.");
}
}
}
static bool ifInt8Output(const String& layerType)
{
// Contains all node types whose output should be int8 when it get int8 input.
// ai.onnx opset 15
static std::vector<String> input8output8List = {
"QuantizeLinear",
"QLinearAdd",
"QLinearMul",
"QLinearAveragePool",
"QLinearGlobalAveragePool",
"QLinearLeakyRelu",
"QLinearSigmoid",
"QLinearConcat",
"QGemm",
"QLinearSoftmax",
"QLinearConv",
"QLinearMatMul",
"MaxPool",
"ReduceMax",
"ReduceMin",
"Split",
"Clip",
"Abs",
"Transpose",
"Squeeze",
"Flatten",
"Unsqueeze",
"Expand",
"Reshape",
"Pad",
"Gather",
"Concat",
"Resize",
"SpaceToDepth",
"DepthToSpace",
"Pow",
"Add",
"Sub",
"Mul",
"Div"
};
auto layerIt = std::find(input8output8List.begin(), input8output8List.end(), layerType);
return layerIt != input8output8List.end();
}
void ONNXImporter::setParamsDtype(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
// If the current layer should output the same data type as the input, and it's input type is int8, we think the current
// layer should also output int8.
// Check if the layer has int8 input.
const std::string& layer_type = node_proto.op_type();
for (int i = 0; i < node_proto.input_size(); ++i)
{
if (layer_id.find(node_proto.input(i)) != layer_id.end())
{
LayerInfo layerInfo = layer_id.find(node_proto.input(i))->second;
if (layerInfo.depth == CV_8S && ifInt8Output(layer_type))
{
layerParams.set("depth", CV_8S);
return;
}
}
}
layerParams.set("depth", CV_32F);
}
void ONNXImporter::populateNet()
{
CV_Assert(model_proto.has_graph());
graph_proto = model_proto.graph();
std::string framework_version;
if (model_proto.has_producer_name())
framework_name = model_proto.producer_name();
if (model_proto.has_producer_version())
framework_version = model_proto.producer_version();
CV_LOG_INFO(NULL, "DNN/ONNX: loading ONNX"
<< (model_proto.has_ir_version() ? cv::format(" v%d", (int)model_proto.ir_version()) : cv::String())
<< " model produced by '" << framework_name << "'"
<< (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str()))
<< ". Number of nodes = " << graph_proto.node_size()
<< ", initializers = " << graph_proto.initializer_size()
<< ", inputs = " << graph_proto.input_size()
<< ", outputs = " << graph_proto.output_size()
);
parseOperatorSet();
simplifySubgraphs(graph_proto);
const int layersSize = graph_proto.node_size();
CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes");
constBlobs = getGraphTensors(graph_proto); // scan GraphProto.initializer
std::vector<String> netInputs; // map with network inputs (without const blobs)
// Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
for (int i = 0; i < graph_proto.input_size(); ++i)
{
const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i);
CV_Assert(valueInfoProto.has_name());
const std::string& name = valueInfoProto.name();
CV_Assert(valueInfoProto.has_type());
const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
CV_Assert(typeProto.has_tensor_type());
const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
CV_Assert(tensor.has_shape());
const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
int dim_size = tensorShape.dim_size();
CV_CheckGE(dim_size, 0, ""); // some inputs are scalars (dims=0), e.g. in Test_ONNX_nets.Resnet34_kinetics test
MatShape inpShape(dim_size);
for (int j = 0; j < dim_size; ++j)
{
const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
if (dimension.has_dim_param())
{
CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
}
// https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
if (dimension.has_denotation())
{
CV_LOG_INFO(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
}
inpShape[j] = dimension.dim_value();
// NHW, NCHW(NHWC), NCDHW(NDHWC); do not set this flag if only N is dynamic
if (dimension.has_dim_param() && !(j == 0 && inpShape.size() >= 3))
{
hasDynamicShapes = true;
}
}
bool isInitialized = ((constBlobs.find(name) != constBlobs.end()));
CV_LOG_IF_DEBUG(NULL, !isInitialized, "DNN/ONNX: input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
CV_LOG_IF_VERBOSE(NULL, 0, isInitialized, "DNN/ONNX: pre-initialized input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
if (dim_size > 0 && !hasDynamicShapes) // FIXIT result is not reliable for models with multiple inputs
{
inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
}
outShapes[valueInfoProto.name()] = inpShape;
// fill map: push layer name, layer id and output id
if (!isInitialized)
{
netInputs.push_back(name);
layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1)));
}
}
dstNet.setInputsNames(netInputs);
if (!hasDynamicShapes)
{
for (int i = 0; i < netInputs.size(); ++i)
dstNet.setInputShape(netInputs[i], outShapes[netInputs[i]]);
}
// dump outputs
for (int i = 0; i < graph_proto.output_size(); ++i)
{
dumpValueInfoProto(i, graph_proto.output(i), "output");
}
if (DNN_DIAGNOSTICS_RUN) {
CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!");
layerHandler->fillRegistry(graph_proto);
}
for(int li = 0; li < layersSize; li++)
{
const opencv_onnx::NodeProto& node_proto = graph_proto.node(li);
handleNode(node_proto);
}
// register outputs
for (int i = 0; i < graph_proto.output_size(); ++i)
{
const std::string& output_name = graph_proto.output(i).name();
if (output_name.empty())
{
CV_LOG_ERROR(NULL, "DNN/ONNX: can't register output without name: " << i);
continue;
}
ConstIterLayerId_t layerIt = layer_id.find(output_name);
if (layerIt == layer_id.end())
{
CV_LOG_ERROR(NULL, "DNN/ONNX: can't find layer for output name: '" << output_name << "'. Does model imported properly?");
continue;
}
const LayerInfo& li = layerIt->second;
int outputId = dstNet.registerOutput(output_name, li.layerId, li.outputId); CV_UNUSED(outputId);
// no need to duplicate message from engine: CV_LOG_DEBUG(NULL, "DNN/ONNX: registered output='" << output_name << "' with id=" << outputId);
}
CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN ? "DNN/ONNX: diagnostic run completed!" : "DNN/ONNX: import completed!"));
}
std::string ONNXImporter::getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto)
{
if (!node_proto.has_domain())
return str_domain_ai_onnx;
const std::string& domain = node_proto.domain();
if (domain.empty())
return str_domain_ai_onnx;
return domain;
}
const ONNXImporter::DispatchMap& ONNXImporter::getDispatchMap(const opencv_onnx::NodeProto& node_proto)
{
static DispatchMap empty_map;
const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
auto it = domain_dispatch_map.find(layer_type_domain);
if (it == domain_dispatch_map.end())
{
return empty_map;
}
return it->second;
}
std::string ONNXImporter::extractNodeName(const opencv_onnx::NodeProto& node_proto)
{
// We need to rework DNN outputs API, this is a workaround for #21698
if (node_proto.has_name() && !node_proto.name().empty())
{
if (useLegacyNames)
return node_proto.name();
return cv::format("onnx_node!%s", node_proto.name().c_str());
}
for (int i = 0; i < node_proto.output_size(); ++i)
{
const std::string& name = node_proto.output(i);
// There are two ways to leave an optional input or output unspecified:
// the first, available only for trailing inputs and outputs, is to simply not provide that input;
// the second method is to use an empty string in place of an input or output name.
if (!name.empty())
{
if (useLegacyNames)
return name.c_str();
return cv::format("onnx_node_output_%d!%s", i, name.c_str());
}
}
CV_Error(Error::StsAssert, "Couldn't deduce Node name.");
}
void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto)
{
CV_Assert(node_proto.output_size() >= 1);
const std::string& name = extractNodeName(node_proto);
const std::string& layer_type = node_proto.op_type();
const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
const auto& dispatch = getDispatchMap(node_proto);