diff --git a/common/dab.fbs b/common/dab.fbs index 95a7b3d..91db9c8 100644 --- a/common/dab.fbs +++ b/common/dab.fbs @@ -10,6 +10,7 @@ table Tensor { float32_data: [float32]; shape: [uint32]; name: string; + align_hwc_to_128: bool; } table Input { diff --git a/common/dab_generated.h b/common/dab_generated.h index 8d27629..ced21d0 100644 --- a/common/dab_generated.h +++ b/common/dab_generated.h @@ -67,7 +67,8 @@ inline const char * const *EnumNamesDataType() { } inline const char *EnumNameDataType(DataType e) { - const size_t index = static_cast(e); + if (e < DataType::Float32 || e > DataType::Bit) return ""; + const size_t index = static_cast(e); return EnumNamesDataType()[index]; } @@ -129,17 +130,19 @@ inline const char * const *EnumNamesLayerType() { } inline const char *EnumNameLayerType(LayerType e) { - const size_t index = static_cast(e); + if (e < LayerType::FpConv2D || e > LayerType::Shuffle) return ""; + const size_t index = static_cast(e); return EnumNamesLayerType()[index]; } struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_DATA_TYPE = 4, VT_BIN_DATA = 6, VT_FLOAT32_DATA = 8, VT_SHAPE = 10, - VT_NAME = 12 + VT_NAME = 12, + VT_ALIGN_HWC_TO_128 = 14 }; DataType data_type() const { return static_cast(GetField(VT_DATA_TYPE, 0)); @@ -156,6 +159,9 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const flatbuffers::String *name() const { return GetPointer(VT_NAME); } + bool align_hwc_to_128() const { + return GetField(VT_ALIGN_HWC_TO_128, 0) != 0; + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_DATA_TYPE) && @@ -167,6 +173,7 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { verifier.VerifyVector(shape()) && VerifyOffset(verifier, VT_NAME) && verifier.VerifyString(name()) && + VerifyField(verifier, VT_ALIGN_HWC_TO_128) && verifier.EndTable(); } }; @@ -189,6 +196,9 @@ struct TensorBuilder { void add_name(flatbuffers::Offset name) { fbb_.AddOffset(Tensor::VT_NAME, name); } + void add_align_hwc_to_128(bool align_hwc_to_128) { + fbb_.AddElement(Tensor::VT_ALIGN_HWC_TO_128, static_cast(align_hwc_to_128), 0); + } explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -207,12 +217,14 @@ inline flatbuffers::Offset CreateTensor( flatbuffers::Offset> bin_data = 0, flatbuffers::Offset> float32_data = 0, flatbuffers::Offset> shape = 0, - flatbuffers::Offset name = 0) { + flatbuffers::Offset name = 0, + bool align_hwc_to_128 = false) { TensorBuilder builder_(_fbb); builder_.add_name(name); builder_.add_shape(shape); builder_.add_float32_data(float32_data); builder_.add_bin_data(bin_data); + builder_.add_align_hwc_to_128(align_hwc_to_128); builder_.add_data_type(data_type); return builder_.Finish(); } @@ -223,18 +235,24 @@ inline flatbuffers::Offset CreateTensorDirect( const std::vector *bin_data = nullptr, const std::vector *float32_data = nullptr, const std::vector *shape = nullptr, - const char *name = nullptr) { + const char *name = nullptr, + bool align_hwc_to_128 = false) { + auto bin_data__ = bin_data ? _fbb.CreateVector(*bin_data) : 0; + auto float32_data__ = float32_data ? _fbb.CreateVector(*float32_data) : 0; + auto shape__ = shape ? _fbb.CreateVector(*shape) : 0; + auto name__ = name ? _fbb.CreateString(name) : 0; return flatbnn::CreateTensor( _fbb, data_type, - bin_data ? _fbb.CreateVector(*bin_data) : 0, - float32_data ? _fbb.CreateVector(*float32_data) : 0, - shape ? _fbb.CreateVector(*shape) : 0, - name ? _fbb.CreateString(name) : 0); + bin_data__, + float32_data__, + shape__, + name__, + align_hwc_to_128); } struct Input FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_SHAPE = 4, VT_NAME = 6 }; @@ -289,14 +307,16 @@ inline flatbuffers::Offset CreateInputDirect( flatbuffers::FlatBufferBuilder &_fbb, const std::vector *shape = nullptr, const char *name = nullptr) { + auto shape__ = shape ? _fbb.CreateVector(*shape) : 0; + auto name__ = name ? _fbb.CreateString(name) : 0; return flatbnn::CreateInput( _fbb, - shape ? _fbb.CreateVector(*shape) : 0, - name ? _fbb.CreateString(name) : 0); + shape__, + name__); } struct Binarize FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_OUTPUT = 6 }; @@ -351,14 +371,16 @@ inline flatbuffers::Offset CreateBinarizeDirect( flatbuffers::FlatBufferBuilder &_fbb, const char *input = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateBinarize( _fbb, - input ? _fbb.CreateString(input) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + output__); } struct BinConv2D FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_WEIGHT = 6, VT_BIAS = 8, @@ -476,19 +498,26 @@ inline flatbuffers::Offset CreateBinConv2DDirect( const std::vector *strides = nullptr, const std::vector *dilations = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto weight__ = weight ? _fbb.CreateString(weight) : 0; + auto bias__ = bias ? _fbb.CreateString(bias) : 0; + auto pads__ = pads ? _fbb.CreateVector(*pads) : 0; + auto strides__ = strides ? _fbb.CreateVector(*strides) : 0; + auto dilations__ = dilations ? _fbb.CreateVector(*dilations) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateBinConv2D( _fbb, - input ? _fbb.CreateString(input) : 0, - weight ? _fbb.CreateString(weight) : 0, - bias ? _fbb.CreateString(bias) : 0, - pads ? _fbb.CreateVector(*pads) : 0, - strides ? _fbb.CreateVector(*strides) : 0, - dilations ? _fbb.CreateVector(*dilations) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + weight__, + bias__, + pads__, + strides__, + dilations__, + output__); } struct FpConv2D FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_WEIGHT = 6, VT_BIAS = 8, @@ -606,19 +635,26 @@ inline flatbuffers::Offset CreateFpConv2DDirect( const std::vector *strides = nullptr, const std::vector *dilations = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto weight__ = weight ? _fbb.CreateString(weight) : 0; + auto bias__ = bias ? _fbb.CreateString(bias) : 0; + auto pads__ = pads ? _fbb.CreateVector(*pads) : 0; + auto strides__ = strides ? _fbb.CreateVector(*strides) : 0; + auto dilations__ = dilations ? _fbb.CreateVector(*dilations) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateFpConv2D( _fbb, - input ? _fbb.CreateString(input) : 0, - weight ? _fbb.CreateString(weight) : 0, - bias ? _fbb.CreateString(bias) : 0, - pads ? _fbb.CreateVector(*pads) : 0, - strides ? _fbb.CreateVector(*strides) : 0, - dilations ? _fbb.CreateVector(*dilations) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + weight__, + bias__, + pads__, + strides__, + dilations__, + output__); } struct AvePool FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_KERNEL_SHAPE = 6, VT_PADS = 8, @@ -712,17 +748,22 @@ inline flatbuffers::Offset CreateAvePoolDirect( const std::vector *pads = nullptr, const std::vector *strides = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto kernel_shape__ = kernel_shape ? _fbb.CreateVector(*kernel_shape) : 0; + auto pads__ = pads ? _fbb.CreateVector(*pads) : 0; + auto strides__ = strides ? _fbb.CreateVector(*strides) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateAvePool( _fbb, - input ? _fbb.CreateString(input) : 0, - kernel_shape ? _fbb.CreateVector(*kernel_shape) : 0, - pads ? _fbb.CreateVector(*pads) : 0, - strides ? _fbb.CreateVector(*strides) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + kernel_shape__, + pads__, + strides__, + output__); } struct MaxPool FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_KERNEL_SHAPE = 6, VT_PADS = 8, @@ -816,17 +857,22 @@ inline flatbuffers::Offset CreateMaxPoolDirect( const std::vector *pads = nullptr, const std::vector *strides = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto kernel_shape__ = kernel_shape ? _fbb.CreateVector(*kernel_shape) : 0; + auto pads__ = pads ? _fbb.CreateVector(*pads) : 0; + auto strides__ = strides ? _fbb.CreateVector(*strides) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateMaxPool( _fbb, - input ? _fbb.CreateString(input) : 0, - kernel_shape ? _fbb.CreateVector(*kernel_shape) : 0, - pads ? _fbb.CreateVector(*pads) : 0, - strides ? _fbb.CreateVector(*strides) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + kernel_shape__, + pads__, + strides__, + output__); } struct Relu FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_OUTPUT = 6 }; @@ -881,14 +927,16 @@ inline flatbuffers::Offset CreateReluDirect( flatbuffers::FlatBufferBuilder &_fbb, const char *input = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateRelu( _fbb, - input ? _fbb.CreateString(input) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + output__); } struct Softmax FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_OUTPUT = 6 }; @@ -943,14 +991,16 @@ inline flatbuffers::Offset CreateSoftmaxDirect( flatbuffers::FlatBufferBuilder &_fbb, const char *input = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateSoftmax( _fbb, - input ? _fbb.CreateString(input) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + output__); } struct FC FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_WEIGHT = 6, VT_BIAS = 8, @@ -1029,16 +1079,20 @@ inline flatbuffers::Offset CreateFCDirect( const char *weight = nullptr, const char *bias = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto weight__ = weight ? _fbb.CreateString(weight) : 0; + auto bias__ = bias ? _fbb.CreateString(bias) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateFC( _fbb, - input ? _fbb.CreateString(input) : 0, - weight ? _fbb.CreateString(weight) : 0, - bias ? _fbb.CreateString(bias) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + weight__, + bias__, + output__); } struct Add FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT1 = 4, VT_INPUT2 = 6, VT_OUTPUT = 8 @@ -1105,15 +1159,18 @@ inline flatbuffers::Offset CreateAddDirect( const char *input1 = nullptr, const char *input2 = nullptr, const char *output = nullptr) { + auto input1__ = input1 ? _fbb.CreateString(input1) : 0; + auto input2__ = input2 ? _fbb.CreateString(input2) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateAdd( _fbb, - input1 ? _fbb.CreateString(input1) : 0, - input2 ? _fbb.CreateString(input2) : 0, - output ? _fbb.CreateString(output) : 0); + input1__, + input2__, + output__); } struct Concat FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUTS = 4, VT_AXIS = 6, VT_OUTPUT = 8 @@ -1180,15 +1237,17 @@ inline flatbuffers::Offset CreateConcatDirect( const std::vector> *inputs = nullptr, int32_t axis = 0, const char *output = nullptr) { + auto inputs__ = inputs ? _fbb.CreateVector>(*inputs) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateConcat( _fbb, - inputs ? _fbb.CreateVector>(*inputs) : 0, + inputs__, axis, - output ? _fbb.CreateString(output) : 0); + output__); } struct Shuffle FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_OUTPUT = 6 }; @@ -1243,14 +1302,16 @@ inline flatbuffers::Offset CreateShuffleDirect( flatbuffers::FlatBufferBuilder &_fbb, const char *input = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateShuffle( _fbb, - input ? _fbb.CreateString(input) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + output__); } struct Split FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_OUTPUTS = 6 }; @@ -1306,14 +1367,16 @@ inline flatbuffers::Offset CreateSplitDirect( flatbuffers::FlatBufferBuilder &_fbb, const char *input = nullptr, const std::vector> *outputs = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto outputs__ = outputs ? _fbb.CreateVector>(*outputs) : 0; return flatbnn::CreateSplit( _fbb, - input ? _fbb.CreateString(input) : 0, - outputs ? _fbb.CreateVector>(*outputs) : 0); + input__, + outputs__); } struct Affine FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_INPUT = 4, VT_A = 6, VT_B = 8, @@ -1392,16 +1455,20 @@ inline flatbuffers::Offset CreateAffineDirect( const char *a = nullptr, const char *b = nullptr, const char *output = nullptr) { + auto input__ = input ? _fbb.CreateString(input) : 0; + auto a__ = a ? _fbb.CreateString(a) : 0; + auto b__ = b ? _fbb.CreateString(b) : 0; + auto output__ = output ? _fbb.CreateString(output) : 0; return flatbnn::CreateAffine( _fbb, - input ? _fbb.CreateString(input) : 0, - a ? _fbb.CreateString(a) : 0, - b ? _fbb.CreateString(b) : 0, - output ? _fbb.CreateString(output) : 0); + input__, + a__, + b__, + output__); } struct Layer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_TYPE = 4, VT_FP_CONV2D_PARAM = 6, VT_BIN_CONV2D_PARAM = 8, @@ -1611,6 +1678,7 @@ inline flatbuffers::Offset CreateLayerDirect( flatbuffers::Offset split_param = 0, flatbuffers::Offset shuffle_param = 0, const char *name = nullptr) { + auto name__ = name ? _fbb.CreateString(name) : 0; return flatbnn::CreateLayer( _fbb, type, @@ -1627,11 +1695,11 @@ inline flatbuffers::Offset CreateLayerDirect( binarize_param, split_param, shuffle_param, - name ? _fbb.CreateString(name) : 0); + name__); } struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - enum { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_LAYERS = 4, VT_INITIALIZERS = 6, VT_INPUTS = 8, @@ -1712,11 +1780,14 @@ inline flatbuffers::Offset CreateModelDirect( const std::vector> *initializers = nullptr, const std::vector> *inputs = nullptr, uint32_t version = 0) { + auto layers__ = layers ? _fbb.CreateVector>(*layers) : 0; + auto initializers__ = initializers ? _fbb.CreateVector>(*initializers) : 0; + auto inputs__ = inputs ? _fbb.CreateVector>(*inputs) : 0; return flatbnn::CreateModel( _fbb, - layers ? _fbb.CreateVector>(*layers) : 0, - initializers ? _fbb.CreateVector>(*initializers) : 0, - inputs ? _fbb.CreateVector>(*inputs) : 0, + layers__, + initializers__, + inputs__, version); } diff --git a/dabnn/layers/BinConv.cpp b/dabnn/layers/BinConv.cpp index 3dd4543..f66d08b 100644 --- a/dabnn/layers/BinConv.cpp +++ b/dabnn/layers/BinConv.cpp @@ -78,7 +78,6 @@ BinConv::BinConv(NetCP net, const std::string &name, css input, css weight, bool BinConv::direct_conv_compatible() const { #ifdef __aarch64__ - return false; if (weight_mat->h == 3 && weight_mat->w == 3 && input_mat->elem_c == 64 && stride_h == stride_w) { return true; diff --git a/tools/onnx2bnn/OnnxConverter.cpp b/tools/onnx2bnn/OnnxConverter.cpp index 461ed6f..72b48aa 100644 --- a/tools/onnx2bnn/OnnxConverter.cpp +++ b/tools/onnx2bnn/OnnxConverter.cpp @@ -51,7 +51,7 @@ void OnnxConverter::AddBinConv(const std::string &input_name, flatbnn::CreateLayer(builder_, flatbnn::LayerType::BinConv2D, 0, param); const auto flat_tensor = flatbnn::CreateTensorDirect( builder_, flatbnn::DataType::Bit, &bin_weight.data, nullptr, - &bin_weight.shape, weight_name.c_str()); + &bin_weight.shape, weight_name.c_str(), bin_weight.align_hwc_to_128); tensors_.push_back(flat_tensor); layers_.push_back(layer); } @@ -123,27 +123,36 @@ OnnxConverter::BTensor OnnxConverter::bitpack(OnnxConverter::FTensor ftensor) { "bitpack requires bin_t is 64 bit"); const auto N = Shaper::kn(ftensor.shape); + const auto C = Shaper::kc(ftensor.shape); const auto HWC = Shaper::total(ftensor.shape) / N; vector packed_data; bin_t tmp; - FORZ(n, N) { - FORZS(i, HWC, 128) { - const size_t eff_bits = std::min(HWC - i, 128); - pack_64_bitset(&ftensor.data[n * HWC + i], &tmp, - std::min(eff_bits, 64)); - packed_data.push_back(tmp); - pack_64_bitset( - &ftensor.data[n * HWC + i + 64], &tmp, - std::min(std::max(0, eff_bits - 64), 64)); + Shape shape = {ftensor.shape[0], ftensor.shape[1], ftensor.shape[2], + ftensor.shape[3]}; + bool align_hwc_to_128 = (C != 64); + if (align_hwc_to_128) { + FORZ(n, N) { + FORZS(i, HWC, 128) { + const size_t eff_bits = std::min(HWC - i, 128); + pack_64_bitset(&ftensor.data[n * HWC + i], &tmp, + std::min(eff_bits, 64)); + packed_data.push_back(tmp); + pack_64_bitset( + &ftensor.data[n * HWC + i + 64], &tmp, + std::min(std::max(0, eff_bits - 64), 64)); + packed_data.push_back(tmp); + } + } + } else { + FORZS(i, Shaper::total(ftensor.shape), 64) { + pack_64_bitset(&ftensor.data[i], &tmp); packed_data.push_back(tmp); } } - Shape shape = {ftensor.shape[0], ftensor.shape[1], ftensor.shape[2], - ftensor.shape[3]}; - return {packed_data, shape}; + return {packed_data, shape, align_hwc_to_128}; } std::vector OnnxConverter::split( @@ -208,7 +217,7 @@ std::vector OnnxConverter::Convert( : tensor.float_data().data(); auto data_vec = vector(ptr, ptr + Product(shape)); - onnx_float_tensors_[tensor.name()] = {data_vec, shape}; + onnx_float_tensors_[tensor.name()] = {data_vec, shape, false}; } operands_.push_back(tensor.name()); } diff --git a/tools/onnx2bnn/OnnxConverter.h b/tools/onnx2bnn/OnnxConverter.h index a6b61e9..f884fd3 100644 --- a/tools/onnx2bnn/OnnxConverter.h +++ b/tools/onnx2bnn/OnnxConverter.h @@ -22,6 +22,10 @@ class OnnxConverter { struct Tensor { std::vector data; Shaper::Shape shape; + bool align_hwc_to_128 = false; + Tensor() = default; + Tensor(const std::vector &data, const Shaper::Shape &shape, const bool align_hwc_to_128): + data(data), shape(shape), align_hwc_to_128(align_hwc_to_128) {} inline T get(const std::vector &x) { auto step = get_shape_for_accessing_element(); for (int i = shape.size() - 2; i >= 0; i--) {