Enable Dynamic shape support via tensor virtual and physical resizing (…

…pytorch#2340) Summary: X-link: pytorch/pytorch#121598 ## Context This changeset lays the foundations for supporting dynamic shapes in the ExecuTorch Vulkan delegate via allowing Tensors to be resized in one of two ways: 1. Discarding underlying `vkImage` or `vkBuffer` and reallocating a new `vkImage` or `vkBuffer` with updated sizes. This method is intended to be used when the current `vkImage` or `vkBuffer` is not large enough to contain the new sizes. 2. Update the tensor's size metadata without reallocating any new resources. This allows shaders to interpret the underlying `vkImage` or `vkBuffer` as if it were smaller than it actually is, and allows command buffers to be preserved when sizes are changed. Differential Revision: D54728401
SS-JIA · Mar 10, 2024 · c3990de · c3990de
1 parent ceb1f1d
commit c3990de
Show file tree

Hide file tree

Showing 10 changed files with 776 additions and 239 deletions.
diff --git a/backends/vulkan/test/glsl/all_shaders.yaml b/backends/vulkan/test/glsl/all_shaders.yaml
@@ -0,0 +1,42 @@
+binary_op_nobroadcast__test:
+  parameter_names_with_default_values:
+    OPERATOR: X + Y
+  shader_variants:
+    - NAME: binary_add_nobroadcast__test
+      OPERATOR: X + Y
+    - NAME: binary_sub_nobroadcast__test
+      OPERATOR: X - Y
+    - NAME: binary_mul_nobroadcast__test
+      OPERATOR: X * Y
+    - NAME: binary_div_nobroadcast__test
+      OPERATOR: X / Y
+    - NAME: binary_pow_nobroadcast__test
+      OPERATOR: pow(X, Y)
+
+image_to_nchw__test:
+  parameter_names_with_default_values:
+    NDIM: 3
+    DTYPE: float
+    PACKING: CHANNELS_PACKED
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: "half"
+        SUFFIX: "half"
+      - VALUE: "float"
+        SUFFIX: "float"
+  shader_variants:
+    - NAME: image3d_to_nchw_C_packed
+
+nchw_to_image__test:
+  parameter_names_with_default_values:
+    NDIM: 3
+    DTYPE: float
+    PACKING: CHANNELS_PACKED
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: "half"
+        SUFFIX: "half"
+      - VALUE: "float"
+        SUFFIX: "float"
+  shader_variants:
+    - NAME: nchw_to_image3d_C_packed
diff --git a/backends/vulkan/test/glsl/binary_op_nobroadcast__test.glsl b/backends/vulkan/test/glsl/binary_op_nobroadcast__test.glsl
@@ -0,0 +1,35 @@
+#version 450 core
+// clang-format off
+#define PRECISION ${PRECISION}
+#define FORMAT ${FORMAT}
+
+#define OP(X, Y) ${OPERATOR}
+// clang-format on
+
+layout(std430) buffer;
+
+// clang-format off
+layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D image_out;
+// clang-format on
+layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
+layout(set = 0, binding = 2) uniform PRECISION sampler3D image_other;
+
+layout(set = 0, binding = 3) uniform PRECISION restrict OutExtents {
+  uvec4 data;
+}
+out_extents;
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+
+  if (any(greaterThanEqual(pos, out_extents.data.xyz))) {
+    return;
+  }
+
+  vec4 in_texel = texelFetch(image_in, pos, 0);
+  vec4 other_texel = texelFetch(image_other, pos, 0);
+
+  imageStore(image_out, pos, OP(in_texel, other_texel));
+}
diff --git a/backends/vulkan/test/glsl/common.h b/backends/vulkan/test/glsl/common.h
diff --git a/backends/vulkan/test/glsl/fill_texture__test.glsl b/backends/vulkan/test/glsl/fill_texture__test.glsl
@@ -0,0 +1,28 @@
+#version 450 core
+#define PRECISION ${PRECISION}
+#define FORMAT ${FORMAT}
+
+layout(std430) buffer;
+
+/* Qualifiers: layout - storage - precision - memory */
+
+// clang-format off
+layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
+// clang-format on
+layout(set = 0, binding = 1) uniform PRECISION restrict Block {
+  ivec3 size;
+  int fill;
+  vec4 vals;
+} params;
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+
+  if (any(greaterThanEqual(pos, params.size))) {
+    return;
+  }
+
+  imageStore(uOutput, pos, params.vals);
+}
diff --git a/backends/vulkan/test/glsl/image_to_nchw__test.glsl b/backends/vulkan/test/glsl/image_to_nchw__test.glsl
@@ -0,0 +1,54 @@
+#version 450 core
+// clang-format off
+#define PRECISION ${PRECISION}
+// clang-format on
+
+#include "indexing_utils.h"
+
+layout(std430) buffer;
+
+layout(set = 0, binding = 0) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} image_in;
+layout(set = 0, binding = 1) buffer PRECISION restrict writeonly Buffer {
+  ${T[DTYPE]} data[];
+}
+buffer_out;
+
+layout(set = 0, binding = 2) uniform PRECISION restrict GpuSizes {
+  ivec4 data;
+}
+gpu_sizes;
+
+layout(set = 0, binding = 3) uniform PRECISION restrict CpuSizes {
+  ivec4 data;
+}
+cpu_sizes;
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+  const ivec4 coord = POS_TO_COORD_${PACKING}(pos, gpu_sizes.data);
+
+  if (any(greaterThanEqual(coord, gpu_sizes.data))) {
+    return;
+  }
+
+  const ${VEC4_T[DTYPE]} intex = texelFetch(image_in, pos, 0);
+
+  const int base_index = COORD_TO_BUFFER_IDX(coord, cpu_sizes.data);
+  const ivec4 buf_indices =
+      base_index + ivec4(0, 1, 2, 3) * (gpu_sizes.data.x * gpu_sizes.data.y);
+
+  if (coord.z < cpu_sizes.data.z) {
+    buffer_out.data[buf_indices.x] = intex.x;
+  }
+  if (coord.z + 1 < cpu_sizes.data.z) {
+    buffer_out.data[buf_indices.y] = intex.y;
+  }
+  if (coord.z + 2 < cpu_sizes.data.z) {
+    buffer_out.data[buf_indices.z] = intex.z;
+  }
+  if (coord.z + 3 < cpu_sizes.data.z) {
+    buffer_out.data[buf_indices.w] = intex.w;
+  }
+}
diff --git a/backends/vulkan/test/glsl/indexing_utils.h b/backends/vulkan/test/glsl/indexing_utils.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#define POS_TO_COORD_CHANNELS_PACKED(pos, sizes) \
+  ivec4(pos.x, pos.y, (pos.z * 4) % sizes.z, (pos.z * 4) / sizes.z)
+
+#define COORD_TO_BUFFER_IDX(coord, sizes)                  \
+  coord.x + coord.y* sizes.x + coord.z* sizes.y* sizes.x + \
+      coord.w* sizes.z* sizes.y* sizes.x;
diff --git a/backends/vulkan/test/glsl/nchw_to_image__test.glsl b/backends/vulkan/test/glsl/nchw_to_image__test.glsl
@@ -0,0 +1,56 @@
+#version 450 core
+// clang-format off
+#define PRECISION ${PRECISION}
+// clang-format on
+
+#include "indexing_utils.h"
+
+layout(std430) buffer;
+
+// clang-format off
+layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
+// clang-format on
+layout(set = 0, binding = 1) buffer  PRECISION restrict readonly Buffer {
+  ${T[DTYPE]} data[];
+}
+buffer_in;
+
+layout(set = 0, binding = 2) uniform PRECISION restrict GpuSizes {
+  ivec4 data;
+}
+gpu_sizes;
+
+layout(set = 0, binding = 3) uniform PRECISION restrict CpuSizes {
+  ivec4 data;
+}
+cpu_sizes;
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+  const ivec4 coord = POS_TO_COORD_${PACKING}(pos, gpu_sizes.data);
+
+  if (any(greaterThanEqual(coord, gpu_sizes.data))) {
+    return;
+  }
+
+  const int base_index = COORD_TO_BUFFER_IDX(coord, cpu_sizes.data);
+  const ivec4 buf_indices =
+      base_index + ivec4(0, 1, 2, 3) * (gpu_sizes.data.x * gpu_sizes.data.y);
+
+  ${T[DTYPE]} val_x = buffer_in.data[buf_indices.x];
+  ${T[DTYPE]} val_y = buffer_in.data[buf_indices.y];
+  ${T[DTYPE]} val_z = buffer_in.data[buf_indices.z];
+  ${T[DTYPE]} val_w = buffer_in.data[buf_indices.w];
+
+  ${VEC4_T[DTYPE]} texel = ${VEC4_T[DTYPE]}(val_x, val_y, val_z, val_w);
+
+  if (coord.z + 3 >= cpu_sizes.data.z) {
+    ivec4 c_ind = ivec4(coord.z) + ivec4(0, 1, 2, 3);
+    vec4 valid_c = vec4(lessThan(c_ind, ivec4(cpu_sizes.data.z)));
+    texel = texel * valid_c;
+  }
+
+  imageStore(image_out, ${GET_POS[NDIM]("pos")}, texel);
+}