Skip to content

Commit

Permalink
Enable Dynamic shape support via tensor virtual and physical resizing (
Browse files Browse the repository at this point in the history
…pytorch#2340)

Summary:
X-link: pytorch/pytorch#121598


## Context

This changeset lays the foundations for supporting dynamic shapes in the ExecuTorch Vulkan delegate via allowing Tensors to be resized in one of two ways:

1. Discarding underlying `vkImage` or `vkBuffer` and reallocating a new `vkImage` or `vkBuffer` with updated sizes. This method is intended to be used when the current `vkImage` or `vkBuffer` is not large enough to contain the new sizes.
2. Update the tensor's size metadata without reallocating any new resources. This allows shaders to interpret the underlying `vkImage` or `vkBuffer` as if it were smaller than it actually is, and allows command buffers to be preserved when sizes are changed.

Differential Revision: D54728401
  • Loading branch information
SS-JIA authored and facebook-github-bot committed Mar 10, 2024
1 parent ceb1f1d commit c3990de
Show file tree
Hide file tree
Showing 10 changed files with 776 additions and 239 deletions.
42 changes: 42 additions & 0 deletions backends/vulkan/test/glsl/all_shaders.yaml
@@ -0,0 +1,42 @@
binary_op_nobroadcast__test:
parameter_names_with_default_values:
OPERATOR: X + Y
shader_variants:
- NAME: binary_add_nobroadcast__test
OPERATOR: X + Y
- NAME: binary_sub_nobroadcast__test
OPERATOR: X - Y
- NAME: binary_mul_nobroadcast__test
OPERATOR: X * Y
- NAME: binary_div_nobroadcast__test
OPERATOR: X / Y
- NAME: binary_pow_nobroadcast__test
OPERATOR: pow(X, Y)

image_to_nchw__test:
parameter_names_with_default_values:
NDIM: 3
DTYPE: float
PACKING: CHANNELS_PACKED
generate_variant_forall:
DTYPE:
- VALUE: "half"
SUFFIX: "half"
- VALUE: "float"
SUFFIX: "float"
shader_variants:
- NAME: image3d_to_nchw_C_packed

nchw_to_image__test:
parameter_names_with_default_values:
NDIM: 3
DTYPE: float
PACKING: CHANNELS_PACKED
generate_variant_forall:
DTYPE:
- VALUE: "half"
SUFFIX: "half"
- VALUE: "float"
SUFFIX: "float"
shader_variants:
- NAME: nchw_to_image3d_C_packed
35 changes: 35 additions & 0 deletions backends/vulkan/test/glsl/binary_op_nobroadcast__test.glsl
@@ -0,0 +1,35 @@
#version 450 core
// clang-format off
#define PRECISION ${PRECISION}
#define FORMAT ${FORMAT}

#define OP(X, Y) ${OPERATOR}
// clang-format on

layout(std430) buffer;

// clang-format off
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D image_out;
// clang-format on
layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
layout(set = 0, binding = 2) uniform PRECISION sampler3D image_other;

layout(set = 0, binding = 3) uniform PRECISION restrict OutExtents {
uvec4 data;
}
out_extents;

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (any(greaterThanEqual(pos, out_extents.data.xyz))) {
return;
}

vec4 in_texel = texelFetch(image_in, pos, 0);
vec4 other_texel = texelFetch(image_other, pos, 0);

imageStore(image_out, pos, OP(in_texel, other_texel));
}
Empty file.
28 changes: 28 additions & 0 deletions backends/vulkan/test/glsl/fill_texture__test.glsl
@@ -0,0 +1,28 @@
#version 450 core
#define PRECISION ${PRECISION}
#define FORMAT ${FORMAT}

layout(std430) buffer;

/* Qualifiers: layout - storage - precision - memory */

// clang-format off
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
// clang-format on
layout(set = 0, binding = 1) uniform PRECISION restrict Block {
ivec3 size;
int fill;
vec4 vals;
} params;

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (any(greaterThanEqual(pos, params.size))) {
return;
}

imageStore(uOutput, pos, params.vals);
}
54 changes: 54 additions & 0 deletions backends/vulkan/test/glsl/image_to_nchw__test.glsl
@@ -0,0 +1,54 @@
#version 450 core
// clang-format off
#define PRECISION ${PRECISION}
// clang-format on

#include "indexing_utils.h"

layout(std430) buffer;

layout(set = 0, binding = 0) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} image_in;
layout(set = 0, binding = 1) buffer PRECISION restrict writeonly Buffer {
${T[DTYPE]} data[];
}
buffer_out;

layout(set = 0, binding = 2) uniform PRECISION restrict GpuSizes {
ivec4 data;
}
gpu_sizes;

layout(set = 0, binding = 3) uniform PRECISION restrict CpuSizes {
ivec4 data;
}
cpu_sizes;

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);
const ivec4 coord = POS_TO_COORD_${PACKING}(pos, gpu_sizes.data);

if (any(greaterThanEqual(coord, gpu_sizes.data))) {
return;
}

const ${VEC4_T[DTYPE]} intex = texelFetch(image_in, pos, 0);

const int base_index = COORD_TO_BUFFER_IDX(coord, cpu_sizes.data);
const ivec4 buf_indices =
base_index + ivec4(0, 1, 2, 3) * (gpu_sizes.data.x * gpu_sizes.data.y);

if (coord.z < cpu_sizes.data.z) {
buffer_out.data[buf_indices.x] = intex.x;
}
if (coord.z + 1 < cpu_sizes.data.z) {
buffer_out.data[buf_indices.y] = intex.y;
}
if (coord.z + 2 < cpu_sizes.data.z) {
buffer_out.data[buf_indices.z] = intex.z;
}
if (coord.z + 3 < cpu_sizes.data.z) {
buffer_out.data[buf_indices.w] = intex.w;
}
}
14 changes: 14 additions & 0 deletions backends/vulkan/test/glsl/indexing_utils.h
@@ -0,0 +1,14 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#define POS_TO_COORD_CHANNELS_PACKED(pos, sizes) \
ivec4(pos.x, pos.y, (pos.z * 4) % sizes.z, (pos.z * 4) / sizes.z)

#define COORD_TO_BUFFER_IDX(coord, sizes) \
coord.x + coord.y* sizes.x + coord.z* sizes.y* sizes.x + \
coord.w* sizes.z* sizes.y* sizes.x;
56 changes: 56 additions & 0 deletions backends/vulkan/test/glsl/nchw_to_image__test.glsl
@@ -0,0 +1,56 @@
#version 450 core
// clang-format off
#define PRECISION ${PRECISION}
// clang-format on

#include "indexing_utils.h"

layout(std430) buffer;

// clang-format off
layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
// clang-format on
layout(set = 0, binding = 1) buffer PRECISION restrict readonly Buffer {
${T[DTYPE]} data[];
}
buffer_in;

layout(set = 0, binding = 2) uniform PRECISION restrict GpuSizes {
ivec4 data;
}
gpu_sizes;

layout(set = 0, binding = 3) uniform PRECISION restrict CpuSizes {
ivec4 data;
}
cpu_sizes;

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);
const ivec4 coord = POS_TO_COORD_${PACKING}(pos, gpu_sizes.data);

if (any(greaterThanEqual(coord, gpu_sizes.data))) {
return;
}

const int base_index = COORD_TO_BUFFER_IDX(coord, cpu_sizes.data);
const ivec4 buf_indices =
base_index + ivec4(0, 1, 2, 3) * (gpu_sizes.data.x * gpu_sizes.data.y);

${T[DTYPE]} val_x = buffer_in.data[buf_indices.x];
${T[DTYPE]} val_y = buffer_in.data[buf_indices.y];
${T[DTYPE]} val_z = buffer_in.data[buf_indices.z];
${T[DTYPE]} val_w = buffer_in.data[buf_indices.w];

${VEC4_T[DTYPE]} texel = ${VEC4_T[DTYPE]}(val_x, val_y, val_z, val_w);

if (coord.z + 3 >= cpu_sizes.data.z) {
ivec4 c_ind = ivec4(coord.z) + ivec4(0, 1, 2, 3);
vec4 valid_c = vec4(lessThan(c_ind, ivec4(cpu_sizes.data.z)));
texel = texel * valid_c;
}

imageStore(image_out, ${GET_POS[NDIM]("pos")}, texel);
}

0 comments on commit c3990de

Please sign in to comment.