Skip to content

Commit

Permalink
[ET-VK] Enable Dynamic shape support via tensor virtual and physical …
Browse files Browse the repository at this point in the history
…resizing (pytorch#121598)

Summary:

X-link: pytorch/executorch#2340

## Context

This changeset lays the foundations for supporting dynamic shapes in the ExecuTorch Vulkan delegate via allowing Tensors to be resized in one of two ways:

1. Discarding underlying `vkImage` or `vkBuffer` and reallocating a new `vkImage` or `vkBuffer` with updated sizes. This method is intended to be used when the current `vkImage` or `vkBuffer` is not large enough to contain the new sizes.
2. Update the tensor's size metadata without reallocating any new resources. This allows shaders to interpret the underlying `vkImage` or `vkBuffer` as if it were smaller than it actually is, and allows command buffers to be preserved when sizes are changed.

Test Plan: Check CI. Tests have also been added to `vulkan_compute_api_test` that test the two methods of tensor resizing.

Differential Revision: D54728401
  • Loading branch information
SS-JIA authored and facebook-github-bot committed Mar 11, 2024
1 parent fac06a1 commit 345d3c3
Show file tree
Hide file tree
Showing 6 changed files with 328 additions and 18 deletions.
29 changes: 28 additions & 1 deletion aten/src/ATen/native/vulkan/api/Context.h
Expand Up @@ -205,6 +205,7 @@ class Context final {
class UniformParamsBuffer final {
private:
Context* context_p_;
size_t nbytes_;
VulkanBuffer vulkan_buffer_;

public:
Expand All @@ -213,6 +214,7 @@ class UniformParamsBuffer final {
template <typename Block>
UniformParamsBuffer(Context* context_p, const Block& block)
: context_p_(context_p),
nbytes_(sizeof(block)),
vulkan_buffer_(
context_p_->adapter_ptr()->vma().create_params_buffer(block)) {}

Expand All @@ -231,13 +233,29 @@ class UniformParamsBuffer final {
VulkanBuffer& buffer() {
return vulkan_buffer_;
}

template <typename Block>
void update(const Block& block) {
if (sizeof(block) != nbytes_) {
VK_THROW(
"Attempted to update UniformParamsBuffer with data of different size");
}
// Fill the uniform buffer with data in block
{
MemoryMap mapping(vulkan_buffer_, MemoryAccessType::WRITE);
Block* data_ptr = mapping.template data<Block>();

*data_ptr = block;
}
}
};

class StorageBuffer final {
private:
Context* context_p_;
ScalarType dtype_;
size_t numel_;
size_t nbytes_;
VulkanBuffer vulkan_buffer_;

public:
Expand All @@ -249,8 +267,9 @@ class StorageBuffer final {
: context_p_(context_p),
dtype_(dtype),
numel_(numel),
nbytes_(element_size(dtype_) * numel_),
vulkan_buffer_(context_p_->adapter_ptr()->vma().create_storage_buffer(
element_size(dtype_) * numel_,
nbytes_,
gpuonly)) {}

StorageBuffer(const StorageBuffer&) = delete;
Expand All @@ -270,6 +289,14 @@ class StorageBuffer final {
inline VulkanBuffer& buffer() {
return vulkan_buffer_;
}

inline size_t numel() {
return numel_;
}

inline size_t nbytes() {
return nbytes_;
}
};

bool available();
Expand Down
8 changes: 8 additions & 0 deletions aten/src/ATen/native/vulkan/api/Resource.h
Expand Up @@ -151,6 +151,10 @@ class VulkanBuffer final {
return (memory_.allocation != VK_NULL_HANDLE);
}

inline bool owns_memory() const {
return owns_memory_;
}

operator bool() const {
return (handle_ != VK_NULL_HANDLE);
}
Expand Down Expand Up @@ -372,6 +376,10 @@ class VulkanImage final {
return (memory_.allocation != VK_NULL_HANDLE);
}

inline bool owns_memory() const {
return owns_memory_;
}

inline operator bool() const {
return (handles_.image != VK_NULL_HANDLE);
}
Expand Down
147 changes: 136 additions & 11 deletions aten/src/ATen/native/vulkan/api/Tensor.cpp
Expand Up @@ -347,12 +347,13 @@ vTensor::vTensor(
strides_{calc_strides(sizes, memory_layout_, storage_type)},
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
// Vulkan uniform buffer containing sizes and stride info
metadata_uniform_{make_metadata_uniform(
context,
gpu_sizes_,
gpu_strides_,
storage_type)},
virtual_extents_(
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
// Utility Uniform Buffers that can be passed to shaders as arguments
metadata_uniform_(),
cpu_sizes_uniform_(nullptr),
gpu_sizes_uniform_(nullptr),
extents_uniform_(nullptr),
// Construct Tensor storage
view_(std::make_shared<vTensorStorage>(
context,
Expand All @@ -377,12 +378,13 @@ vTensor::vTensor(
strides_{calc_strides(sizes, memory_layout_, storage_type)},
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
virtual_extents_(
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
// Vulkan uniform buffer containing sizes and stride info
metadata_uniform_{make_metadata_uniform(
context,
gpu_sizes_,
gpu_strides_,
storage_type)},
metadata_uniform_(),
cpu_sizes_uniform_(nullptr),
gpu_sizes_uniform_(nullptr),
extents_uniform_(nullptr),
// Quantization params
is_quantized_{true},
q_scale_{q_scale},
Expand Down Expand Up @@ -425,6 +427,43 @@ api::VulkanBuffer& vTensor::buffer(
return view_->buffer_;
}

api::VulkanBuffer& vTensor::buffer_metadata() {
if (!metadata_uniform_.buffer()) {
metadata_uniform_ = make_metadata_uniform(
view_->context_, gpu_sizes_, gpu_strides_, storage_type());
}
return metadata_uniform_.buffer();
}

std::shared_ptr<api::UniformParamsBuffer> vTensor::cpu_sizes_ubo() {
if (!cpu_sizes_uniform_) {
cpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
view_->context_, api::utils::make_whcn_ivec4(sizes_)));
}
return cpu_sizes_uniform_;
}

std::shared_ptr<api::UniformParamsBuffer> vTensor::gpu_sizes_ubo() {
if (!gpu_sizes_uniform_) {
gpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
view_->context_, api::utils::make_whcn_ivec4(gpu_sizes_)));
}
return gpu_sizes_uniform_;
}

std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
if (!extents_uniform_) {
extents_uniform_.reset(new api::UniformParamsBuffer(
view_->context_,
api::utils::uvec4(
{view_->extents_.data[0],
view_->extents_.data[1],
view_->extents_.data[2],
1u})));
}
return extents_uniform_;
}

vTensor::BufferMetadata vTensor::get_cpu_buffer_metadata() const {
return {
api::utils::make_nchw_uvec4(sizes_),
Expand Down Expand Up @@ -473,6 +512,65 @@ void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
}
}

void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
sizes_ = new_sizes;
gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
virtual_extents_ =
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);

if (cpu_sizes_uniform_) {
cpu_sizes_uniform_->update(api::utils::make_whcn_ivec4(sizes_));
}

if (gpu_sizes_uniform_) {
gpu_sizes_uniform_->update(api::utils::make_whcn_ivec4(gpu_sizes_));
}

if (extents_uniform_) {
extents_uniform_->update(api::utils::uvec4(
{virtual_extents_.data[0],
virtual_extents_.data[1],
virtual_extents_.data[2],
1u}));
}
}

void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
update_size_metadata(new_sizes);
view_->discard_and_reallocate(
calc_gpu_sizes(new_sizes, memory_layout_, storage_type()),
memory_layout_,
dtype_);
}

void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
update_size_metadata(new_sizes);
if (storage_type() == api::StorageType::BUFFER) {
if (gpu_nbytes() > view_->buffer_.mem_size()) {
VK_THROW(
"Cannot virtual_resize a vTensor with sizes that require a larger "
"buffer! reallocate() should be used instead.");
}
} else {
bool valid_resize = true;
if (virtual_extents_.data[0] > view_->extents_.data[0]) {
valid_resize = false;
}
if (virtual_extents_.data[1] > view_->extents_.data[1]) {
valid_resize = false;
}
if (virtual_extents_.data[2] > view_->extents_.data[2]) {
valid_resize = false;
}

if (!valid_resize) {
VK_THROW(
"Cannot virtual_resize a vTensor with sizes that require a larger "
"image texture! reallocate() should be used instead.");
}
}
}

//
// vTensorStorage
//
Expand Down Expand Up @@ -569,11 +667,16 @@ vTensorStorage::vTensorStorage(
last_access_{} {}

vTensorStorage::~vTensorStorage() {
flush();
}

void vTensorStorage::flush() {
if (image_) {
context_->register_image_cleanup(image_);
} else if (buffer_) {
context_->register_buffer_cleanup(buffer_);
}
last_access_ = {};
}

void vTensorStorage::transition(
Expand Down Expand Up @@ -663,6 +766,28 @@ void add_buffer_barrier(
}
}

void vTensorStorage::discard_and_reallocate(
const std::vector<int64_t>& gpu_sizes,
const api::GPUMemoryLayout gpu_memory_layout,
const api::ScalarType dtype) {
const bool image_owns_memory = image_.owns_memory();
const bool buffer_owns_memory = buffer_.owns_memory();

flush();

extents_ = create_image_extents(gpu_sizes, storage_type_, gpu_memory_layout);
image_ = allocate_image(
context_,
extents_,
storage_type_,
api::to_vkformat(dtype),
image_owns_memory);

buffer_length_ = api::utils::multiply_integers(gpu_sizes);
buffer_ = allocate_buffer(
context_, buffer_length_, storage_type_, dtype, buffer_owns_memory);
}

} // namespace vulkan
} // namespace native
} // namespace at

0 comments on commit 345d3c3

Please sign in to comment.