Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
3762 lines (3217 sloc)
126 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* Copyright (c) 2017-2022 Hans-Kristian Arntzen | |
| * | |
| * Permission is hereby granted, free of charge, to any person obtaining | |
| * a copy of this software and associated documentation files (the | |
| * "Software"), to deal in the Software without restriction, including | |
| * without limitation the rights to use, copy, modify, merge, publish, | |
| * distribute, sublicense, and/or sell copies of the Software, and to | |
| * permit persons to whom the Software is furnished to do so, subject to | |
| * the following conditions: | |
| * | |
| * The above copyright notice and this permission notice shall be | |
| * included in all copies or substantial portions of the Software. | |
| * | |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
| * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
| * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
| */ | |
| #include "render_graph.hpp" | |
| #include "type_to_string.hpp" | |
| #include "format.hpp" | |
| #include "quirks.hpp" | |
| #include "muglm/muglm_impl.hpp" | |
| #include "thread_group.hpp" | |
| #include "task_composer.hpp" | |
| #include "vulkan_prerotate.hpp" | |
| #include <algorithm> | |
| namespace Granite | |
| { | |
| bool RenderPassInterface::render_pass_is_conditional() const | |
| { | |
| return false; | |
| } | |
| bool RenderPassInterface::render_pass_is_separate_layered() const | |
| { | |
| return false; | |
| } | |
| bool RenderPassInterface::need_render_pass() const | |
| { | |
| return true; | |
| } | |
| bool RenderPassInterface::get_clear_depth_stencil(VkClearDepthStencilValue *value) const | |
| { | |
| if (value) | |
| *value = { 1.0f, 0u }; | |
| return true; | |
| } | |
| bool RenderPassInterface::get_clear_color(unsigned, VkClearColorValue *value) const | |
| { | |
| if (value) | |
| *value = {}; | |
| return true; | |
| } | |
| void RenderPassInterface::build_render_pass(Vulkan::CommandBuffer &) | |
| { | |
| } | |
| void RenderPassInterface::build_render_pass_separate_layer(Vulkan::CommandBuffer &, unsigned) | |
| { | |
| } | |
| void RenderPassInterface::enqueue_prepare_render_pass(RenderGraph &, TaskComposer &) | |
| { | |
| } | |
| void RenderPassInterface::setup(Vulkan::Device &) | |
| { | |
| } | |
| void RenderPassInterface::setup_dependencies(RenderPass &, RenderGraph &) | |
| { | |
| } | |
| static const RenderGraphQueueFlags compute_queues = RENDER_GRAPH_QUEUE_ASYNC_COMPUTE_BIT | | |
| RENDER_GRAPH_QUEUE_COMPUTE_BIT; | |
| RenderTextureResource &RenderPass::add_attachment_input(const std::string &name) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.read_in_pass(index); | |
| res.add_image_usage(VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT); | |
| attachments_inputs.push_back(&res); | |
| return res; | |
| } | |
| RenderTextureResource &RenderPass::add_history_input(const std::string &name) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.add_image_usage(VK_IMAGE_USAGE_SAMPLED_BIT); | |
| // History inputs are not used in any particular pass, but next frame. | |
| history_inputs.push_back(&res); | |
| return res; | |
| } | |
| RenderBufferResource &RenderPass::add_generic_buffer_input(const std::string &name, VkPipelineStageFlags stages, | |
| VkAccessFlags access, VkBufferUsageFlags usage) | |
| { | |
| auto &res = graph.get_buffer_resource(name); | |
| res.add_queue(queue); | |
| res.read_in_pass(index); | |
| res.add_buffer_usage(usage); | |
| AccessedBufferResource acc; | |
| acc.buffer = &res; | |
| acc.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| acc.access = access; | |
| acc.stages = stages; | |
| generic_buffer.push_back(acc); | |
| return res; | |
| } | |
| RenderBufferResource &RenderPass::add_vertex_buffer_input(const std::string &name) | |
| { | |
| return add_generic_buffer_input(name, | |
| VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, | |
| VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | |
| VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); | |
| } | |
| RenderBufferResource &RenderPass::add_index_buffer_input(const std::string &name) | |
| { | |
| return add_generic_buffer_input(name, | |
| VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, | |
| VK_ACCESS_INDEX_READ_BIT, | |
| VK_BUFFER_USAGE_INDEX_BUFFER_BIT); | |
| } | |
| RenderBufferResource &RenderPass::add_indirect_buffer_input(const std::string &name) | |
| { | |
| return add_generic_buffer_input(name, | |
| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, | |
| VK_ACCESS_INDIRECT_COMMAND_READ_BIT, | |
| VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT); | |
| } | |
| RenderBufferResource &RenderPass::add_uniform_input(const std::string &name, VkPipelineStageFlags stages) | |
| { | |
| if (stages == 0) | |
| { | |
| if ((queue & compute_queues) != 0) | |
| stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | |
| else | |
| stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; | |
| } | |
| return add_generic_buffer_input(name, stages, VK_ACCESS_UNIFORM_READ_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); | |
| } | |
| RenderBufferResource &RenderPass::add_storage_read_only_input(const std::string &name, VkPipelineStageFlags stages) | |
| { | |
| if (stages == 0) | |
| { | |
| if ((queue & compute_queues) != 0) | |
| stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | |
| else | |
| stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; | |
| } | |
| return add_generic_buffer_input(name, stages, VK_ACCESS_SHADER_READ_BIT, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); | |
| } | |
| RenderBufferResource &RenderPass::add_storage_output(const std::string &name, const BufferInfo &info, const std::string &input) | |
| { | |
| auto &res = graph.get_buffer_resource(name); | |
| res.add_queue(queue); | |
| res.set_buffer_info(info); | |
| res.written_in_pass(index); | |
| res.add_buffer_usage(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); | |
| storage_outputs.push_back(&res); | |
| if (!input.empty()) | |
| { | |
| auto &input_res = graph.get_buffer_resource(input); | |
| input_res.read_in_pass(index); | |
| input_res.add_buffer_usage(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); | |
| storage_inputs.push_back(&input_res); | |
| } | |
| else | |
| storage_inputs.push_back(nullptr); | |
| return res; | |
| } | |
| RenderBufferResource &RenderPass::add_transfer_output(const std::string &name, const BufferInfo &info) | |
| { | |
| auto &res = graph.get_buffer_resource(name); | |
| res.add_queue(queue); | |
| res.set_buffer_info(info); | |
| res.written_in_pass(index); | |
| res.add_buffer_usage(VK_BUFFER_USAGE_TRANSFER_DST_BIT); | |
| transfer_outputs.push_back(&res); | |
| return res; | |
| } | |
| RenderTextureResource &RenderPass::add_texture_input(const std::string &name, VkPipelineStageFlags stages) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.read_in_pass(index); | |
| res.add_image_usage(VK_IMAGE_USAGE_SAMPLED_BIT); | |
| // Support duplicate add_texture_inputs. | |
| auto itr = find_if(begin(generic_texture), end(generic_texture), [&](const AccessedTextureResource &acc) { | |
| return acc.texture == &res; | |
| }); | |
| if (itr != end(generic_texture)) | |
| return *itr->texture; | |
| AccessedTextureResource acc; | |
| acc.texture = &res; | |
| acc.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; | |
| acc.access = VK_ACCESS_SHADER_READ_BIT; | |
| if (stages != 0) | |
| acc.stages = stages; | |
| else if ((queue & compute_queues) != 0) | |
| acc.stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | |
| else | |
| acc.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; | |
| generic_texture.push_back(acc); | |
| return res; | |
| } | |
| RenderTextureResource &RenderPass::add_resolve_output(const std::string &name, const AttachmentInfo &info) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.written_in_pass(index); | |
| res.set_attachment_info(info); | |
| res.add_image_usage(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); | |
| resolve_outputs.push_back(&res); | |
| return res; | |
| } | |
| RenderTextureResource &RenderPass::add_color_output(const std::string &name, const AttachmentInfo &info, const std::string &input) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.written_in_pass(index); | |
| res.set_attachment_info(info); | |
| res.add_image_usage(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); | |
| if (info.levels != 1) | |
| res.add_image_usage(VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); | |
| color_outputs.push_back(&res); | |
| if (!input.empty()) | |
| { | |
| auto &input_res = graph.get_texture_resource(input); | |
| input_res.read_in_pass(index); | |
| input_res.add_image_usage(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); | |
| color_inputs.push_back(&input_res); | |
| color_scale_inputs.push_back(nullptr); | |
| } | |
| else | |
| { | |
| color_inputs.push_back(nullptr); | |
| color_scale_inputs.push_back(nullptr); | |
| } | |
| return res; | |
| } | |
| RenderTextureResource &RenderPass::add_storage_texture_output(const std::string &name, const AttachmentInfo &info, | |
| const std::string &input) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.written_in_pass(index); | |
| res.set_attachment_info(info); | |
| res.add_image_usage(VK_IMAGE_USAGE_STORAGE_BIT); | |
| storage_texture_outputs.push_back(&res); | |
| if (!input.empty()) | |
| { | |
| auto &input_res = graph.get_texture_resource(input); | |
| input_res.read_in_pass(index); | |
| input_res.add_image_usage(VK_IMAGE_USAGE_STORAGE_BIT); | |
| storage_texture_inputs.push_back(&input_res); | |
| } | |
| else | |
| storage_texture_inputs.push_back(nullptr); | |
| return res; | |
| } | |
| void RenderPass::add_proxy_output(const std::string &name, VkPipelineStageFlags stages) | |
| { | |
| auto &res = graph.get_proxy_resource(name); | |
| res.add_queue(queue); | |
| res.written_in_pass(index); | |
| assert(stages != 0); | |
| AccessedProxyResource proxy; | |
| proxy.proxy = &res; | |
| proxy.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| proxy.stages = stages; | |
| proxy_outputs.push_back(proxy); | |
| } | |
| void RenderPass::add_proxy_input(const std::string &name, VkPipelineStageFlags stages) | |
| { | |
| auto &res = graph.get_proxy_resource(name); | |
| res.add_queue(queue); | |
| res.read_in_pass(index); | |
| assert(stages != 0); | |
| AccessedProxyResource proxy; | |
| proxy.proxy = &res; | |
| proxy.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| proxy.stages = stages; | |
| proxy_inputs.push_back(proxy); | |
| } | |
| void RenderPass::add_fake_resource_write_alias(const std::string &from, const std::string &to) | |
| { | |
| auto &from_res = graph.get_texture_resource(from); | |
| auto &to_res = graph.get_texture_resource(to); | |
| to_res = from_res; | |
| to_res.get_read_passes().clear(); | |
| to_res.get_write_passes().clear(); | |
| to_res.written_in_pass(index); | |
| fake_resource_alias.emplace_back(&from_res, &to_res); | |
| } | |
| RenderTextureResource &RenderPass::add_blit_texture_read_only_input(const std::string &name) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.read_in_pass(index); | |
| res.add_image_usage(VK_IMAGE_USAGE_TRANSFER_SRC_BIT); | |
| AccessedTextureResource acc; | |
| acc.texture = &res; | |
| acc.layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; | |
| acc.access = VK_ACCESS_TRANSFER_READ_BIT; | |
| acc.stages = VK_PIPELINE_STAGE_TRANSFER_BIT; | |
| generic_texture.push_back(acc); | |
| return res; | |
| } | |
| RenderTextureResource &RenderPass::add_blit_texture_output(const std::string &name, const AttachmentInfo &info, | |
| const std::string &input) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.written_in_pass(index); | |
| res.set_attachment_info(info); | |
| res.add_image_usage(VK_IMAGE_USAGE_TRANSFER_DST_BIT); | |
| blit_texture_outputs.push_back(&res); | |
| if (!input.empty()) | |
| { | |
| auto &input_res = graph.get_texture_resource(input); | |
| input_res.read_in_pass(index); | |
| input_res.add_image_usage(VK_IMAGE_USAGE_TRANSFER_DST_BIT); | |
| blit_texture_inputs.push_back(&input_res); | |
| } | |
| else | |
| blit_texture_inputs.push_back(nullptr); | |
| return res; | |
| } | |
| RenderTextureResource &RenderPass::set_depth_stencil_output(const std::string &name, const AttachmentInfo &info) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.written_in_pass(index); | |
| res.set_attachment_info(info); | |
| res.add_image_usage(VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); | |
| depth_stencil_output = &res; | |
| return res; | |
| } | |
| void RenderPass::add_external_lock(const std::string &name, VkPipelineStageFlags stages) | |
| { | |
| auto *iface = graph.find_external_lock_interface(name); | |
| if (iface) | |
| { | |
| for (auto &lock : lock_interfaces) | |
| { | |
| if (lock.iface == iface) | |
| { | |
| lock.stages |= stages; | |
| return; | |
| } | |
| } | |
| lock_interfaces.push_back({ iface, stages }); | |
| } | |
| } | |
| RenderTextureResource &RenderPass::set_depth_stencil_input(const std::string &name) | |
| { | |
| auto &res = graph.get_texture_resource(name); | |
| res.add_queue(queue); | |
| res.read_in_pass(index); | |
| res.add_image_usage(VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); | |
| depth_stencil_input = &res; | |
| return res; | |
| } | |
| RenderGraph::RenderGraph() | |
| { | |
| EVENT_MANAGER_REGISTER_LATCH(RenderGraph, on_swapchain_changed, on_swapchain_destroyed, Vulkan::SwapchainParameterEvent); | |
| EVENT_MANAGER_REGISTER_LATCH(RenderGraph, on_device_created, on_device_destroyed, Vulkan::DeviceCreatedEvent); | |
| } | |
| void RenderGraph::on_swapchain_destroyed(const Vulkan::SwapchainParameterEvent &) | |
| { | |
| physical_image_attachments.clear(); | |
| physical_history_image_attachments.clear(); | |
| physical_events.clear(); | |
| physical_history_events.clear(); | |
| } | |
| void RenderGraph::on_swapchain_changed(const Vulkan::SwapchainParameterEvent &) | |
| { | |
| } | |
| void RenderGraph::on_device_created(const Vulkan::DeviceCreatedEvent &) | |
| { | |
| } | |
| void RenderGraph::on_device_destroyed(const Vulkan::DeviceCreatedEvent &) | |
| { | |
| reset(); | |
| } | |
| RenderTextureResource &RenderGraph::get_texture_resource(const std::string &name) | |
| { | |
| auto itr = resource_to_index.find(name); | |
| if (itr != end(resource_to_index)) | |
| { | |
| assert(resources[itr->second]->get_type() == RenderResource::Type::Texture); | |
| return static_cast<RenderTextureResource &>(*resources[itr->second]); | |
| } | |
| else | |
| { | |
| unsigned index = resources.size(); | |
| resources.emplace_back(new RenderTextureResource(index)); | |
| resources.back()->set_name(name); | |
| resource_to_index[name] = index; | |
| return static_cast<RenderTextureResource &>(*resources.back()); | |
| } | |
| } | |
| RenderBufferResource &RenderGraph::get_buffer_resource(const std::string &name) | |
| { | |
| auto itr = resource_to_index.find(name); | |
| if (itr != end(resource_to_index)) | |
| { | |
| assert(resources[itr->second]->get_type() == RenderResource::Type::Buffer); | |
| return static_cast<RenderBufferResource &>(*resources[itr->second]); | |
| } | |
| else | |
| { | |
| unsigned index = resources.size(); | |
| resources.emplace_back(new RenderBufferResource(index)); | |
| resources.back()->set_name(name); | |
| resource_to_index[name] = index; | |
| return static_cast<RenderBufferResource &>(*resources.back()); | |
| } | |
| } | |
| RenderResource &RenderGraph::get_proxy_resource(const std::string &name) | |
| { | |
| auto itr = resource_to_index.find(name); | |
| if (itr != end(resource_to_index)) | |
| { | |
| assert(resources[itr->second]->get_type() == RenderResource::Type::Proxy); | |
| return *resources[itr->second]; | |
| } | |
| else | |
| { | |
| unsigned index = resources.size(); | |
| resources.emplace_back(new RenderResource(RenderResource::Type::Proxy, index)); | |
| resources.back()->set_name(name); | |
| resource_to_index[name] = index; | |
| return *resources.back(); | |
| } | |
| } | |
| std::vector<Vulkan::BufferHandle> RenderGraph::consume_physical_buffers() const | |
| { | |
| return physical_buffers; | |
| } | |
| void RenderGraph::install_physical_buffers(std::vector<Vulkan::BufferHandle> buffers) | |
| { | |
| physical_buffers = std::move(buffers); | |
| } | |
| Vulkan::BufferHandle RenderGraph::consume_persistent_physical_buffer_resource(unsigned index) const | |
| { | |
| if (index >= physical_buffers.size()) | |
| return {}; | |
| if (!physical_buffers[index]) | |
| return {}; | |
| return physical_buffers[index]; | |
| } | |
| void RenderGraph::install_persistent_physical_buffer_resource(unsigned index, Vulkan::BufferHandle buffer) | |
| { | |
| if (index >= physical_buffers.size()) | |
| throw std::logic_error("Out of range."); | |
| physical_buffers[index] = buffer; | |
| } | |
| RenderPass &RenderGraph::add_pass(const std::string &name, RenderGraphQueueFlagBits queue) | |
| { | |
| auto itr = pass_to_index.find(name); | |
| if (itr != end(pass_to_index)) | |
| { | |
| return *passes[itr->second]; | |
| } | |
| else | |
| { | |
| unsigned index = passes.size(); | |
| passes.emplace_back(new RenderPass(*this, index, queue)); | |
| passes.back()->set_name(name); | |
| pass_to_index[name] = index; | |
| return *passes.back(); | |
| } | |
| } | |
| RenderPass *RenderGraph::find_pass(const std::string &name) | |
| { | |
| auto itr = pass_to_index.find(name); | |
| if (itr != end(pass_to_index)) | |
| return passes[itr->second].get(); | |
| else | |
| return nullptr; | |
| } | |
| void RenderGraph::set_backbuffer_source(const std::string &name) | |
| { | |
| backbuffer_source = name; | |
| } | |
| void RenderGraph::validate_passes() | |
| { | |
| for (auto &pass_ptr : passes) | |
| { | |
| auto &pass = *pass_ptr; | |
| if (pass.get_color_inputs().size() != pass.get_color_outputs().size()) | |
| throw std::logic_error("Size of color inputs must match color outputs."); | |
| if (pass.get_storage_inputs().size() != pass.get_storage_outputs().size()) | |
| throw std::logic_error("Size of storage inputs must match storage outputs."); | |
| if (pass.get_blit_texture_inputs().size() != pass.get_blit_texture_outputs().size()) | |
| throw std::logic_error("Size of blit inputs must match blit outputs."); | |
| if (pass.get_storage_texture_inputs().size() != pass.get_storage_texture_outputs().size()) | |
| throw std::logic_error("Size of storage texture inputs must match storage texture outputs."); | |
| if (!pass.get_resolve_outputs().empty() && pass.get_resolve_outputs().size() != pass.get_color_outputs().size()) | |
| throw std::logic_error("Must have one resolve output for each color output."); | |
| unsigned num_inputs = pass.get_color_inputs().size(); | |
| for (unsigned i = 0; i < num_inputs; i++) | |
| { | |
| if (!pass.get_color_inputs()[i]) | |
| continue; | |
| if (get_resource_dimensions(*pass.get_color_inputs()[i]) != get_resource_dimensions(*pass.get_color_outputs()[i])) | |
| pass.make_color_input_scaled(i); | |
| } | |
| if (!pass.get_storage_outputs().empty()) | |
| { | |
| unsigned num_outputs = pass.get_storage_outputs().size(); | |
| for (unsigned i = 0; i < num_outputs; i++) | |
| { | |
| if (!pass.get_storage_inputs()[i]) | |
| continue; | |
| if (pass.get_storage_outputs()[i]->get_buffer_info() != pass.get_storage_inputs()[i]->get_buffer_info()) | |
| throw std::logic_error("Doing RMW on a storage buffer, but usage and sizes do not match."); | |
| } | |
| } | |
| if (!pass.get_blit_texture_outputs().empty()) | |
| { | |
| unsigned num_outputs = pass.get_blit_texture_outputs().size(); | |
| for (unsigned i = 0; i < num_outputs; i++) | |
| { | |
| if (!pass.get_blit_texture_inputs()[i]) | |
| continue; | |
| if (get_resource_dimensions(*pass.get_blit_texture_inputs()[i]) != get_resource_dimensions(*pass.get_blit_texture_outputs()[i])) | |
| throw std::logic_error("Doing RMW on a blit image, but usage and sizes do not match."); | |
| } | |
| } | |
| if (!pass.get_storage_texture_outputs().empty()) | |
| { | |
| unsigned num_outputs = pass.get_storage_texture_outputs().size(); | |
| for (unsigned i = 0; i < num_outputs; i++) | |
| { | |
| if (!pass.get_storage_texture_inputs()[i]) | |
| continue; | |
| if (get_resource_dimensions(*pass.get_storage_texture_outputs()[i]) != get_resource_dimensions(*pass.get_storage_texture_inputs()[i])) | |
| throw std::logic_error("Doing RMW on a storage texture image, but sizes do not match."); | |
| } | |
| } | |
| if (pass.get_depth_stencil_input() && pass.get_depth_stencil_output()) | |
| { | |
| if (get_resource_dimensions(*pass.get_depth_stencil_input()) != get_resource_dimensions(*pass.get_depth_stencil_output())) | |
| throw std::logic_error("Dimension mismatch."); | |
| } | |
| } | |
| } | |
| void RenderGraph::build_physical_resources() | |
| { | |
| unsigned phys_index = 0; | |
| // Find resources which can alias safely. | |
| for (auto &pass_index : pass_stack) | |
| { | |
| auto &pass = *passes[pass_index]; | |
| for (auto &input : pass.get_generic_texture_inputs()) | |
| { | |
| if (input.texture->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*input.texture)); | |
| input.texture->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[input.texture->get_physical_index()].queues |= input.texture->get_used_queues(); | |
| physical_dimensions[input.texture->get_physical_index()].image_usage |= input.texture->get_image_usage(); | |
| } | |
| } | |
| for (auto &input : pass.get_generic_buffer_inputs()) | |
| { | |
| if (input.buffer->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*input.buffer)); | |
| input.buffer->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[input.buffer->get_physical_index()].queues |= input.buffer->get_used_queues(); | |
| physical_dimensions[input.buffer->get_physical_index()].image_usage |= input.buffer->get_buffer_usage(); | |
| } | |
| } | |
| for (auto *input : pass.get_color_scale_inputs()) | |
| { | |
| if (input && input->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*input)); | |
| input->set_physical_index(phys_index++); | |
| physical_dimensions[input->get_physical_index()].image_usage |= VK_IMAGE_USAGE_SAMPLED_BIT; | |
| } | |
| else if (input) | |
| { | |
| physical_dimensions[input->get_physical_index()].queues |= input->get_used_queues(); | |
| physical_dimensions[input->get_physical_index()].image_usage |= input->get_image_usage(); | |
| physical_dimensions[input->get_physical_index()].image_usage |= VK_IMAGE_USAGE_SAMPLED_BIT; | |
| } | |
| } | |
| if (!pass.get_color_inputs().empty()) | |
| { | |
| unsigned size = pass.get_color_inputs().size(); | |
| for (unsigned i = 0; i < size; i++) | |
| { | |
| auto *input = pass.get_color_inputs()[i]; | |
| if (input) | |
| { | |
| if (input->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*input)); | |
| input->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[input->get_physical_index()].queues |= input->get_used_queues(); | |
| physical_dimensions[input->get_physical_index()].image_usage |= input->get_image_usage(); | |
| } | |
| if (pass.get_color_outputs()[i]->get_physical_index() == RenderResource::Unused) | |
| pass.get_color_outputs()[i]->set_physical_index(input->get_physical_index()); | |
| else if (pass.get_color_outputs()[i]->get_physical_index() != input->get_physical_index()) | |
| throw std::logic_error("Cannot alias resources. Index already claimed."); | |
| } | |
| } | |
| } | |
| if (!pass.get_storage_inputs().empty()) | |
| { | |
| unsigned size = pass.get_storage_inputs().size(); | |
| for (unsigned i = 0; i < size; i++) | |
| { | |
| auto *input = pass.get_storage_inputs()[i]; | |
| if (input) | |
| { | |
| if (input->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*input)); | |
| input->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[input->get_physical_index()].queues |= input->get_used_queues(); | |
| physical_dimensions[input->get_physical_index()].buffer_info.usage |= input->get_buffer_usage(); | |
| } | |
| if (pass.get_storage_outputs()[i]->get_physical_index() == RenderResource::Unused) | |
| pass.get_storage_outputs()[i]->set_physical_index(input->get_physical_index()); | |
| else if (pass.get_storage_outputs()[i]->get_physical_index() != input->get_physical_index()) | |
| throw std::logic_error("Cannot alias resources. Index already claimed."); | |
| } | |
| } | |
| } | |
| if (!pass.get_blit_texture_inputs().empty()) | |
| { | |
| unsigned size = pass.get_blit_texture_inputs().size(); | |
| for (unsigned i = 0; i < size; i++) | |
| { | |
| auto *input = pass.get_blit_texture_inputs()[i]; | |
| if (input) | |
| { | |
| if (input->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*input)); | |
| input->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[input->get_physical_index()].queues |= input->get_used_queues(); | |
| physical_dimensions[input->get_physical_index()].image_usage |= input->get_image_usage(); | |
| } | |
| if (pass.get_blit_texture_outputs()[i]->get_physical_index() == RenderResource::Unused) | |
| pass.get_blit_texture_outputs()[i]->set_physical_index(input->get_physical_index()); | |
| else if (pass.get_blit_texture_outputs()[i]->get_physical_index() != input->get_physical_index()) | |
| throw std::logic_error("Cannot alias resources. Index already claimed."); | |
| } | |
| } | |
| } | |
| if (!pass.get_storage_texture_inputs().empty()) | |
| { | |
| unsigned size = pass.get_storage_texture_inputs().size(); | |
| for (unsigned i = 0; i < size; i++) | |
| { | |
| auto *input = pass.get_storage_texture_inputs()[i]; | |
| if (input) | |
| { | |
| if (input->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*input)); | |
| input->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[input->get_physical_index()].queues |= input->get_used_queues(); | |
| physical_dimensions[input->get_physical_index()].image_usage |= input->get_image_usage(); | |
| } | |
| if (pass.get_storage_texture_outputs()[i]->get_physical_index() == RenderResource::Unused) | |
| pass.get_storage_texture_outputs()[i]->set_physical_index(input->get_physical_index()); | |
| else if (pass.get_storage_texture_outputs()[i]->get_physical_index() != input->get_physical_index()) | |
| throw std::logic_error("Cannot alias resources. Index already claimed."); | |
| } | |
| } | |
| } | |
| for (auto &input : pass.get_proxy_inputs()) | |
| { | |
| if (input.proxy->get_physical_index() == RenderResource::Unused) | |
| { | |
| ResourceDimensions dim = {}; | |
| dim.flags |= ATTACHMENT_INFO_INTERNAL_PROXY_BIT; | |
| physical_dimensions.push_back(dim); | |
| input.proxy->set_physical_index(phys_index++); | |
| } | |
| else | |
| physical_dimensions[input.proxy->get_physical_index()].queues |= input.proxy->get_used_queues(); | |
| } | |
| for (auto *output : pass.get_color_outputs()) | |
| { | |
| if (output->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*output)); | |
| output->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[output->get_physical_index()].queues |= output->get_used_queues(); | |
| physical_dimensions[output->get_physical_index()].image_usage |= output->get_image_usage(); | |
| } | |
| } | |
| for (auto *output : pass.get_resolve_outputs()) | |
| { | |
| if (output->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*output)); | |
| output->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[output->get_physical_index()].queues |= output->get_used_queues(); | |
| physical_dimensions[output->get_physical_index()].image_usage |= output->get_image_usage(); | |
| } | |
| } | |
| for (auto *output : pass.get_storage_outputs()) | |
| { | |
| if (output->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*output)); | |
| output->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[output->get_physical_index()].queues |= output->get_used_queues(); | |
| physical_dimensions[output->get_physical_index()].buffer_info.usage |= output->get_buffer_usage(); | |
| } | |
| } | |
| for (auto &output : pass.get_proxy_outputs()) | |
| { | |
| if (output.proxy->get_physical_index() == RenderResource::Unused) | |
| { | |
| ResourceDimensions dim = {}; | |
| dim.flags |= ATTACHMENT_INFO_INTERNAL_PROXY_BIT; | |
| physical_dimensions.push_back(dim); | |
| output.proxy->set_physical_index(phys_index++); | |
| } | |
| else | |
| physical_dimensions[output.proxy->get_physical_index()].queues |= output.proxy->get_used_queues(); | |
| } | |
| for (auto *output : pass.get_transfer_outputs()) | |
| { | |
| if (output->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*output)); | |
| output->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[output->get_physical_index()].queues |= output->get_used_queues(); | |
| physical_dimensions[output->get_physical_index()].buffer_info.usage |= output->get_buffer_usage(); | |
| } | |
| } | |
| for (auto *output : pass.get_blit_texture_outputs()) | |
| { | |
| if (output->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*output)); | |
| output->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[output->get_physical_index()].queues |= output->get_used_queues(); | |
| physical_dimensions[output->get_physical_index()].image_usage |= output->get_image_usage(); | |
| } | |
| } | |
| for (auto *output : pass.get_storage_texture_outputs()) | |
| { | |
| if (output->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*output)); | |
| output->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[output->get_physical_index()].queues |= output->get_used_queues(); | |
| physical_dimensions[output->get_physical_index()].image_usage |= output->get_image_usage(); | |
| } | |
| } | |
| auto *ds_output = pass.get_depth_stencil_output(); | |
| auto *ds_input = pass.get_depth_stencil_input(); | |
| if (ds_input) | |
| { | |
| if (ds_input->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*ds_input)); | |
| ds_input->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[ds_input->get_physical_index()].queues |= ds_input->get_used_queues(); | |
| physical_dimensions[ds_input->get_physical_index()].image_usage |= ds_input->get_image_usage(); | |
| } | |
| if (ds_output) | |
| { | |
| if (ds_output->get_physical_index() == RenderResource::Unused) | |
| ds_output->set_physical_index(ds_input->get_physical_index()); | |
| else if (ds_output->get_physical_index() != ds_input->get_physical_index()) | |
| throw std::logic_error("Cannot alias resources. Index already claimed."); | |
| physical_dimensions[ds_output->get_physical_index()].queues |= ds_output->get_used_queues(); | |
| physical_dimensions[ds_output->get_physical_index()].image_usage |= ds_output->get_image_usage(); | |
| } | |
| } | |
| else if (ds_output) | |
| { | |
| if (ds_output->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*ds_output)); | |
| ds_output->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[ds_output->get_physical_index()].queues |= ds_output->get_used_queues(); | |
| physical_dimensions[ds_output->get_physical_index()].image_usage |= ds_output->get_image_usage(); | |
| } | |
| } | |
| // Assign input attachments last so they can alias properly with existing color/depth attachments in the | |
| // same subpass. | |
| for (auto *input : pass.get_attachment_inputs()) | |
| { | |
| if (input->get_physical_index() == RenderResource::Unused) | |
| { | |
| physical_dimensions.push_back(get_resource_dimensions(*input)); | |
| input->set_physical_index(phys_index++); | |
| } | |
| else | |
| { | |
| physical_dimensions[input->get_physical_index()].queues |= input->get_used_queues(); | |
| physical_dimensions[input->get_physical_index()].image_usage |= input->get_image_usage(); | |
| } | |
| } | |
| for (auto &pair : pass.get_fake_resource_aliases()) | |
| pair.second->set_physical_index(pair.first->get_physical_index()); | |
| } | |
| // Figure out which physical resources need to have history. | |
| physical_image_has_history.clear(); | |
| physical_image_has_history.resize(physical_dimensions.size()); | |
| for (auto &pass_index : pass_stack) | |
| { | |
| auto &pass = *passes[pass_index]; | |
| for (auto &history : pass.get_history_inputs()) | |
| { | |
| unsigned history_phys_index = history->get_physical_index(); | |
| if (history_phys_index == RenderResource::Unused) | |
| throw std::logic_error("History input is used, but it was never written to."); | |
| physical_image_has_history[history_phys_index] = true; | |
| } | |
| } | |
| } | |
| void RenderGraph::build_transients() | |
| { | |
| std::vector<unsigned> physical_pass_used(physical_dimensions.size()); | |
| for (auto &u : physical_pass_used) | |
| u = RenderPass::Unused; | |
| for (auto &dim : physical_dimensions) | |
| { | |
| // Buffers are never transient. | |
| // Storage images are never transient. | |
| if (dim.is_buffer_like()) | |
| dim.flags &= ~ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT; | |
| else | |
| dim.flags |= ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT; | |
| auto index = unsigned(&dim - physical_dimensions.data()); | |
| if (physical_image_has_history[index]) | |
| dim.flags &= ~ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT; | |
| if (Vulkan::format_has_depth_or_stencil_aspect(dim.format) && !Vulkan::ImplementationQuirks::get().use_transient_depth_stencil) | |
| dim.flags &= ~ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT; | |
| if (!Vulkan::format_has_depth_or_stencil_aspect(dim.format) && !Vulkan::ImplementationQuirks::get().use_transient_color) | |
| dim.flags |= ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT; | |
| } | |
| for (auto &resource : resources) | |
| { | |
| if (resource->get_type() != RenderResource::Type::Texture) | |
| continue; | |
| unsigned physical_index = resource->get_physical_index(); | |
| if (physical_index == RenderResource::Unused) | |
| continue; | |
| for (auto &pass : resource->get_write_passes()) | |
| { | |
| unsigned phys = passes[pass]->get_physical_pass_index(); | |
| if (phys != RenderPass::Unused) | |
| { | |
| if (physical_pass_used[physical_index] != RenderPass::Unused && | |
| phys != physical_pass_used[physical_index]) | |
| { | |
| physical_dimensions[physical_index].flags &= ~ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT; | |
| break; | |
| } | |
| physical_pass_used[physical_index] = phys; | |
| } | |
| } | |
| for (auto &pass : resource->get_read_passes()) | |
| { | |
| unsigned phys = passes[pass]->get_physical_pass_index(); | |
| if (phys != RenderPass::Unused) | |
| { | |
| if (physical_pass_used[physical_index] != RenderPass::Unused && | |
| phys != physical_pass_used[physical_index]) | |
| { | |
| physical_dimensions[physical_index].flags &= ~ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT; | |
| break; | |
| } | |
| physical_pass_used[physical_index] = phys; | |
| } | |
| } | |
| } | |
| } | |
| void RenderGraph::build_render_pass_info() | |
| { | |
| for (auto &physical_pass : physical_passes) | |
| { | |
| auto &rp = physical_pass.render_pass_info; | |
| physical_pass.subpasses.resize(physical_pass.passes.size()); | |
| rp.subpasses = physical_pass.subpasses.data(); | |
| rp.num_subpasses = physical_pass.subpasses.size(); | |
| rp.clear_attachments = 0; | |
| rp.load_attachments = 0; | |
| rp.store_attachments = ~0u; | |
| physical_pass.color_clear_requests.clear(); | |
| physical_pass.depth_clear_request = {}; | |
| auto &colors = physical_pass.physical_color_attachments; | |
| colors.clear(); | |
| const auto add_unique_color = [&](unsigned index) -> std::pair<unsigned, bool> { | |
| auto itr = find(begin(colors), end(colors), index); | |
| if (itr != end(colors)) | |
| return std::make_pair(unsigned(itr - begin(colors)), false); | |
| else | |
| { | |
| unsigned ret = colors.size(); | |
| colors.push_back(index); | |
| return std::make_pair(ret, true); | |
| } | |
| }; | |
| const auto add_unique_input_attachment = [&](unsigned index) -> std::pair<unsigned, bool> { | |
| if (index == physical_pass.physical_depth_stencil_attachment) | |
| return std::make_pair(unsigned(colors.size()), false); // The N + 1 attachment refers to depth. | |
| else | |
| return add_unique_color(index); | |
| }; | |
| for (auto &subpass : physical_pass.passes) | |
| { | |
| std::vector<ScaledClearRequests> scaled_clear_requests; | |
| auto &pass = *passes[subpass]; | |
| auto subpass_index = unsigned(&subpass - physical_pass.passes.data()); | |
| // Add color attachments. | |
| unsigned num_color_attachments = pass.get_color_outputs().size(); | |
| physical_pass.subpasses[subpass_index].num_color_attachments = num_color_attachments; | |
| for (unsigned i = 0; i < num_color_attachments; i++) | |
| { | |
| auto res = add_unique_color(pass.get_color_outputs()[i]->get_physical_index()); | |
| physical_pass.subpasses[subpass_index].color_attachments[i] = res.first; | |
| if (res.second) // This is the first time the color attachment is used, check if we need LOAD, or if we can clear it. | |
| { | |
| bool has_color_input = !pass.get_color_inputs().empty() && pass.get_color_inputs()[i]; | |
| bool has_scaled_color_input = !pass.get_color_scale_inputs().empty() && pass.get_color_scale_inputs()[i]; | |
| if (!has_color_input && !has_scaled_color_input) | |
| { | |
| if (pass.get_clear_color(i)) | |
| { | |
| rp.clear_attachments |= 1u << res.first; | |
| physical_pass.color_clear_requests.push_back({ &pass, &rp.clear_color[res.first], i }); | |
| } | |
| } | |
| else | |
| { | |
| if (has_scaled_color_input) | |
| scaled_clear_requests.push_back({ i, pass.get_color_scale_inputs()[i]->get_physical_index() }); | |
| else | |
| rp.load_attachments |= 1u << res.first; | |
| } | |
| } | |
| } | |
| if (!pass.get_resolve_outputs().empty()) | |
| { | |
| physical_pass.subpasses[subpass_index].num_resolve_attachments = num_color_attachments; | |
| for (unsigned i = 0; i < num_color_attachments; i++) | |
| { | |
| auto res = add_unique_color(pass.get_resolve_outputs()[i]->get_physical_index()); | |
| physical_pass.subpasses[subpass_index].resolve_attachments[i] = res.first; | |
| // Resolve attachments are don't care always. | |
| } | |
| } | |
| physical_pass.scaled_clear_requests.push_back(std::move(scaled_clear_requests)); | |
| auto *ds_input = pass.get_depth_stencil_input(); | |
| auto *ds_output = pass.get_depth_stencil_output(); | |
| const auto add_unique_ds = [&](unsigned index) -> std::pair<unsigned, bool> { | |
| assert(physical_pass.physical_depth_stencil_attachment == RenderResource::Unused || | |
| physical_pass.physical_depth_stencil_attachment == index); | |
| bool new_attachment = physical_pass.physical_depth_stencil_attachment == RenderResource::Unused; | |
| physical_pass.physical_depth_stencil_attachment = index; | |
| return std::make_pair(index, new_attachment); | |
| }; | |
| if (ds_output && ds_input) | |
| { | |
| auto res = add_unique_ds(ds_output->get_physical_index()); | |
| // If this is the first subpass the attachment is used, we need to load it. | |
| if (res.second) | |
| rp.load_attachments |= 1u << res.first; | |
| rp.op_flags |= Vulkan::RENDER_PASS_OP_STORE_DEPTH_STENCIL_BIT; | |
| physical_pass.subpasses[subpass_index].depth_stencil_mode = Vulkan::RenderPassInfo::DepthStencil::ReadWrite; | |
| } | |
| else if (ds_output) | |
| { | |
| auto res = add_unique_ds(ds_output->get_physical_index()); | |
| // If this is the first subpass the attachment is used, we need to either clear or discard. | |
| if (res.second && pass.get_clear_depth_stencil()) | |
| { | |
| rp.op_flags |= Vulkan::RENDER_PASS_OP_CLEAR_DEPTH_STENCIL_BIT; | |
| physical_pass.depth_clear_request.pass = &pass; | |
| physical_pass.depth_clear_request.target = &rp.clear_depth_stencil; | |
| } | |
| rp.op_flags |= Vulkan::RENDER_PASS_OP_STORE_DEPTH_STENCIL_BIT; | |
| physical_pass.subpasses[subpass_index].depth_stencil_mode = Vulkan::RenderPassInfo::DepthStencil::ReadWrite; | |
| assert(physical_pass.physical_depth_stencil_attachment == RenderResource::Unused || | |
| physical_pass.physical_depth_stencil_attachment == ds_output->get_physical_index()); | |
| physical_pass.physical_depth_stencil_attachment = ds_output->get_physical_index(); | |
| } | |
| else if (ds_input) | |
| { | |
| auto res = add_unique_ds(ds_input->get_physical_index()); | |
| // If this is the first subpass the attachment is used, we need to load. | |
| if (res.second) | |
| { | |
| rp.op_flags |= Vulkan::RENDER_PASS_OP_DEPTH_STENCIL_READ_ONLY_BIT | | |
| Vulkan::RENDER_PASS_OP_LOAD_DEPTH_STENCIL_BIT; | |
| auto current_physical_pass = unsigned(&physical_pass - physical_passes.data()); | |
| const auto check_preserve = [this, current_physical_pass](const RenderResource &tex) -> bool { | |
| for (auto &read_pass : tex.get_read_passes()) | |
| if (passes[read_pass]->get_physical_pass_index() > current_physical_pass) | |
| return true; | |
| return false; | |
| }; | |
| bool preserve_depth = check_preserve(*ds_input); | |
| if (!preserve_depth) | |
| { | |
| for (auto &logical_pass : passes) | |
| { | |
| for (auto &alias : logical_pass->get_fake_resource_aliases()) | |
| { | |
| if (alias.first == ds_input && check_preserve(*alias.second)) | |
| { | |
| preserve_depth = true; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| if (preserve_depth) | |
| { | |
| // Have to store here, or the attachment becomes undefined in future passes. | |
| rp.op_flags |= Vulkan::RENDER_PASS_OP_STORE_DEPTH_STENCIL_BIT; | |
| } | |
| } | |
| physical_pass.subpasses[subpass_index].depth_stencil_mode = Vulkan::RenderPassInfo::DepthStencil::ReadOnly; | |
| } | |
| else | |
| { | |
| physical_pass.subpasses[subpass_index].depth_stencil_mode = Vulkan::RenderPassInfo::DepthStencil::None; | |
| } | |
| } | |
| for (auto &subpass : physical_pass.passes) | |
| { | |
| auto &pass = *passes[subpass]; | |
| unsigned subpass_index = unsigned(&subpass - physical_pass.passes.data()); | |
| // Add input attachments. | |
| // Have to do these in a separate loop so we can pick up depth stencil input attachments properly. | |
| unsigned num_input_attachments = pass.get_attachment_inputs().size(); | |
| physical_pass.subpasses[subpass_index].num_input_attachments = num_input_attachments; | |
| for (unsigned i = 0; i < num_input_attachments; i++) | |
| { | |
| auto res = add_unique_input_attachment(pass.get_attachment_inputs()[i]->get_physical_index()); | |
| physical_pass.subpasses[subpass_index].input_attachments[i] = res.first; | |
| // If this is the first subpass the attachment is used, we need to load it. | |
| if (res.second) | |
| rp.load_attachments |= 1u << res.first; | |
| } | |
| } | |
| physical_pass.render_pass_info.num_color_attachments = physical_pass.physical_color_attachments.size(); | |
| } | |
| } | |
| void RenderGraph::build_physical_passes() | |
| { | |
| physical_passes.clear(); | |
| PhysicalPass physical_pass; | |
| const auto find_attachment = [](const std::vector<RenderTextureResource *> &resource_list, const RenderTextureResource *resource) -> bool { | |
| if (!resource) | |
| return false; | |
| auto itr = find_if(begin(resource_list), end(resource_list), [resource](const RenderTextureResource *res) { | |
| return res->get_physical_index() == resource->get_physical_index(); | |
| }); | |
| return itr != end(resource_list); | |
| }; | |
| const auto find_buffer = [](const std::vector<RenderBufferResource *> &resource_list, const RenderBufferResource *resource) -> bool { | |
| if (!resource) | |
| return false; | |
| auto itr = find_if(begin(resource_list), end(resource_list), [resource](const RenderBufferResource *res) { | |
| return res->get_physical_index() == resource->get_physical_index(); | |
| }); | |
| return itr != end(resource_list); | |
| }; | |
| const auto should_merge = [&](const RenderPass &prev, const RenderPass &next) -> bool { | |
| // Can only merge graphics in same queue. | |
| if ((prev.get_queue() & compute_queues) || (next.get_queue() != prev.get_queue())) | |
| return false; | |
| if (!Vulkan::ImplementationQuirks::get().merge_subpasses) | |
| return false; | |
| for (auto *output : prev.get_color_outputs()) | |
| { | |
| // Need to mip-map after this pass, so cannot merge. | |
| if ((physical_dimensions[output->get_physical_index()].levels > 1) && | |
| (physical_dimensions[output->get_physical_index()].flags & ATTACHMENT_INFO_MIPGEN_BIT) != 0) | |
| return false; | |
| } | |
| // Need non-local dependency, cannot merge. | |
| for (auto &input : next.get_generic_texture_inputs()) | |
| { | |
| if (find_attachment(prev.get_color_outputs(), input.texture)) | |
| return false; | |
| if (find_attachment(prev.get_resolve_outputs(), input.texture)) | |
| return false; | |
| if (find_attachment(prev.get_storage_texture_outputs(), input.texture)) | |
| return false; | |
| if (find_attachment(prev.get_blit_texture_outputs(), input.texture)) | |
| return false; | |
| if (input.texture && prev.get_depth_stencil_output() == input.texture) | |
| return false; | |
| } | |
| // Need non-local dependency, cannot merge. | |
| for (auto &input : next.get_generic_buffer_inputs()) | |
| if (find_buffer(prev.get_storage_outputs(), input.buffer)) | |
| return false; | |
| // Need non-local dependency, cannot merge. | |
| for (auto *input : next.get_blit_texture_inputs()) | |
| if (find_attachment(prev.get_blit_texture_inputs(), input)) | |
| return false; | |
| // Need non-local dependency, cannot merge. | |
| for (auto *input : next.get_storage_inputs()) | |
| if (find_buffer(prev.get_storage_outputs(), input)) | |
| return false; | |
| // Need non-local dependency, cannot merge. | |
| for (auto *input : next.get_storage_texture_inputs()) | |
| if (find_attachment(prev.get_storage_texture_outputs(), input)) | |
| return false; | |
| // Need non-local dependency, cannot merge. | |
| for (auto *input : next.get_color_scale_inputs()) | |
| { | |
| if (find_attachment(prev.get_storage_texture_outputs(), input)) | |
| return false; | |
| if (find_attachment(prev.get_blit_texture_outputs(), input)) | |
| return false; | |
| if (find_attachment(prev.get_color_outputs(), input)) | |
| return false; | |
| if (find_attachment(prev.get_resolve_outputs(), input)) | |
| return false; | |
| } | |
| const auto different_attachment = [](const RenderResource *a, const RenderResource *b) { | |
| return a && b && a->get_physical_index() != b->get_physical_index(); | |
| }; | |
| const auto same_attachment = [](const RenderResource *a, const RenderResource *b) { | |
| return a && b && a->get_physical_index() == b->get_physical_index(); | |
| }; | |
| // Need a different depth attachment, break up the pass. | |
| if (different_attachment(next.get_depth_stencil_input(), prev.get_depth_stencil_input())) | |
| return false; | |
| if (different_attachment(next.get_depth_stencil_output(), prev.get_depth_stencil_input())) | |
| return false; | |
| if (different_attachment(next.get_depth_stencil_input(), prev.get_depth_stencil_output())) | |
| return false; | |
| if (different_attachment(next.get_depth_stencil_output(), prev.get_depth_stencil_output())) | |
| return false; | |
| for (auto *input : next.get_color_inputs()) | |
| { | |
| if (!input) | |
| continue; | |
| if (find_attachment(prev.get_storage_texture_outputs(), input)) | |
| return false; | |
| if (find_attachment(prev.get_blit_texture_outputs(), input)) | |
| return false; | |
| } | |
| // Now, we have found all failure cases, try to see if we *should* merge. | |
| // Keep color on tile. | |
| for (auto *input : next.get_color_inputs()) | |
| { | |
| if (!input) | |
| continue; | |
| if (find_attachment(prev.get_color_outputs(), input)) | |
| return true; | |
| if (find_attachment(prev.get_resolve_outputs(), input)) | |
| return true; | |
| } | |
| // Keep depth on tile. | |
| if (same_attachment(next.get_depth_stencil_input(), prev.get_depth_stencil_input()) || | |
| same_attachment(next.get_depth_stencil_input(), prev.get_depth_stencil_output())) | |
| { | |
| return true; | |
| } | |
| // Keep depth attachment or color on-tile. | |
| for (auto *input : next.get_attachment_inputs()) | |
| { | |
| if (find_attachment(prev.get_color_outputs(), input)) | |
| return true; | |
| if (find_attachment(prev.get_resolve_outputs(), input)) | |
| return true; | |
| if (input && prev.get_depth_stencil_output() == input) | |
| return true; | |
| } | |
| // No reason to merge, so don't. | |
| return false; | |
| }; | |
| for (unsigned index = 0; index < pass_stack.size(); ) | |
| { | |
| unsigned merge_end = index + 1; | |
| for (; merge_end < pass_stack.size(); merge_end++) | |
| { | |
| bool merge = true; | |
| for (unsigned merge_start = index; merge_start < merge_end; merge_start++) | |
| { | |
| if (!should_merge(*passes[pass_stack[merge_start]], *passes[pass_stack[merge_end]])) | |
| { | |
| merge = false; | |
| break; | |
| } | |
| } | |
| if (!merge) | |
| break; | |
| } | |
| physical_pass.passes.insert(end(physical_pass.passes), begin(pass_stack) + index, begin(pass_stack) + merge_end); | |
| physical_passes.push_back(std::move(physical_pass)); | |
| index = merge_end; | |
| } | |
| for (auto &phys_pass : physical_passes) | |
| { | |
| unsigned index = unsigned(&phys_pass - physical_passes.data()); | |
| for (auto &pass : phys_pass.passes) | |
| passes[pass]->set_physical_pass_index(index); | |
| } | |
| } | |
| void RenderGraph::log() | |
| { | |
| for (auto &resource : physical_dimensions) | |
| { | |
| if (resource.buffer_info.size) | |
| { | |
| LOGI("Resource #%u (%s): size: %u\n", | |
| unsigned(&resource - physical_dimensions.data()), | |
| resource.name.c_str(), | |
| unsigned(resource.buffer_info.size)); | |
| } | |
| else | |
| { | |
| LOGI("Resource #%u (%s): %u x %u (fmt: %u), samples: %u, transient: %s%s\n", | |
| unsigned(&resource - physical_dimensions.data()), | |
| resource.name.c_str(), | |
| resource.width, resource.height, unsigned(resource.format), resource.samples, | |
| (resource.flags & ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT) ? "yes" : "no", | |
| unsigned(&resource - physical_dimensions.data()) == swapchain_physical_index ? " (swapchain)" : ""); | |
| } | |
| } | |
| auto barrier_itr = begin(pass_barriers); | |
| const auto swap_str = [this](const Barrier &barrier) -> const char * { | |
| return barrier.resource_index == swapchain_physical_index ? | |
| " (swapchain)" : ""; | |
| }; | |
| for (auto &subpasses : physical_passes) | |
| { | |
| LOGI("Physical pass #%u:\n", unsigned(&subpasses - physical_passes.data())); | |
| for (auto &barrier : subpasses.invalidate) | |
| { | |
| LOGI(" Invalidate: %u%s, layout: %s, access: %s, stages: %s\n", | |
| barrier.resource_index, | |
| swap_str(barrier), | |
| Vulkan::layout_to_string(barrier.layout), | |
| Vulkan::access_flags_to_string(barrier.access).c_str(), | |
| Vulkan::stage_flags_to_string(barrier.stages).c_str()); | |
| } | |
| for (auto &subpass : subpasses.passes) | |
| { | |
| LOGI(" Subpass #%u (%s):\n", unsigned(&subpass - subpasses.passes.data()), this->passes[subpass]->get_name().c_str()); | |
| auto &pass = *this->passes[subpass]; | |
| auto &barriers = *barrier_itr; | |
| for (auto &barrier : barriers.invalidate) | |
| { | |
| if ((physical_dimensions[barrier.resource_index].flags & ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT) == 0) | |
| { | |
| LOGI(" Invalidate: %u%s, layout: %s, access: %s, stages: %s\n", | |
| barrier.resource_index, | |
| swap_str(barrier), | |
| Vulkan::layout_to_string(barrier.layout), | |
| Vulkan::access_flags_to_string(barrier.access).c_str(), | |
| Vulkan::stage_flags_to_string(barrier.stages).c_str()); | |
| } | |
| } | |
| if (pass.get_depth_stencil_output()) | |
| LOGI(" DepthStencil RW: %u\n", pass.get_depth_stencil_output()->get_physical_index()); | |
| else if (pass.get_depth_stencil_input()) | |
| LOGI(" DepthStencil ReadOnly: %u\n", pass.get_depth_stencil_input()->get_physical_index()); | |
| for (auto &output : pass.get_color_outputs()) | |
| LOGI(" ColorAttachment #%u: %u\n", unsigned(&output - pass.get_color_outputs().data()), output->get_physical_index()); | |
| for (auto &output : pass.get_resolve_outputs()) | |
| LOGI(" ResolveAttachment #%u: %u\n", unsigned(&output - pass.get_resolve_outputs().data()), output->get_physical_index()); | |
| for (auto &input : pass.get_attachment_inputs()) | |
| LOGI(" InputAttachment #%u: %u\n", unsigned(&input - pass.get_attachment_inputs().data()), input->get_physical_index()); | |
| for (auto &input : pass.get_generic_texture_inputs()) | |
| LOGI(" Read-only texture #%u: %u\n", unsigned(&input - pass.get_generic_texture_inputs().data()), input.texture->get_physical_index()); | |
| for (auto &input : pass.get_generic_buffer_inputs()) | |
| LOGI(" Read-only buffer #%u: %u\n", unsigned(&input - pass.get_generic_buffer_inputs().data()), input.buffer->get_physical_index()); | |
| for (auto &input : pass.get_color_scale_inputs()) | |
| { | |
| if (input) | |
| { | |
| LOGI(" ColorScaleInput #%u: %u\n", | |
| unsigned(&input - pass.get_color_scale_inputs().data()), | |
| input->get_physical_index()); | |
| } | |
| } | |
| for (auto &barrier : barriers.flush) | |
| { | |
| if ((physical_dimensions[barrier.resource_index].flags & ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT) && | |
| barrier.resource_index != swapchain_physical_index) | |
| { | |
| LOGI(" Flush: %u, layout: %s, access: %s, stages: %s\n", | |
| barrier.resource_index, Vulkan::layout_to_string(barrier.layout), | |
| Vulkan::access_flags_to_string(barrier.access).c_str(), | |
| Vulkan::stage_flags_to_string(barrier.stages).c_str()); | |
| } | |
| } | |
| ++barrier_itr; | |
| } | |
| for (auto &barrier : subpasses.flush) | |
| { | |
| LOGI(" Flush: %u%s, layout: %s, access: %s, stages: %s\n", | |
| barrier.resource_index, | |
| swap_str(barrier), | |
| Vulkan::layout_to_string(barrier.layout), | |
| Vulkan::access_flags_to_string(barrier.access).c_str(), | |
| Vulkan::stage_flags_to_string(barrier.stages).c_str()); | |
| } | |
| } | |
| } | |
| void RenderGraph::enqueue_mipmap_requests(Vulkan::CommandBuffer &cmd, const std::vector<MipmapRequests> &requests) | |
| { | |
| if (requests.empty()) | |
| return; | |
| for (auto &req : requests) | |
| { | |
| auto &image = physical_attachments[req.physical_resource]->get_image(); | |
| cmd.begin_region("render-graph-mipgen"); | |
| cmd.barrier_prepare_generate_mipmap(image, req.layout, req.stages, req.access); | |
| cmd.generate_mipmap(image); | |
| cmd.end_region(); | |
| } | |
| } | |
| void RenderGraph::enqueue_scaled_requests(Vulkan::CommandBuffer &cmd, const std::vector<ScaledClearRequests> &requests) | |
| { | |
| if (requests.empty()) | |
| return; | |
| std::vector<std::pair<std::string, int>> defines; | |
| defines.reserve(requests.size()); | |
| for (auto &req : requests) | |
| { | |
| defines.emplace_back(std::string("HAVE_TARGET_") + std::to_string(req.target), 1); | |
| cmd.set_texture(0, req.target, *physical_attachments[req.physical_resource], Vulkan::StockSampler::LinearClamp); | |
| } | |
| Vulkan::CommandBufferUtil::draw_fullscreen_quad(cmd, "builtin://shaders/quad.vert", | |
| "builtin://shaders/scaled_readback.frag", defines); | |
| } | |
| void RenderGraph::build_aliases() | |
| { | |
| struct Range | |
| { | |
| unsigned first_write_pass = ~0u; | |
| unsigned last_write_pass = 0; | |
| unsigned first_read_pass = ~0u; | |
| unsigned last_read_pass = 0; | |
| bool block_alias = false; | |
| bool has_writer() const | |
| { | |
| return first_write_pass <= last_write_pass; | |
| } | |
| bool has_reader() const | |
| { | |
| return first_read_pass <= last_read_pass; | |
| } | |
| bool is_used() const | |
| { | |
| return has_writer() || has_reader(); | |
| } | |
| bool can_alias() const | |
| { | |
| // If we read before we have completely written to a resource we need to preserve it, so no alias is possible. | |
| if (has_reader() && has_writer() && first_read_pass <= first_write_pass) | |
| return false; | |
| if (block_alias) | |
| return false; | |
| return true; | |
| } | |
| unsigned last_used_pass() const | |
| { | |
| unsigned last_pass = 0; | |
| if (has_writer()) | |
| last_pass = std::max(last_pass, last_write_pass); | |
| if (has_reader()) | |
| last_pass = std::max(last_pass, last_read_pass); | |
| return last_pass; | |
| } | |
| unsigned first_used_pass() const | |
| { | |
| unsigned first_pass = ~0u; | |
| if (has_writer()) | |
| first_pass = std::min(first_pass, first_write_pass); | |
| if (has_reader()) | |
| first_pass = std::min(first_pass, first_read_pass); | |
| return first_pass; | |
| } | |
| bool disjoint_lifetime(const Range &range) const | |
| { | |
| if (!is_used() || !range.is_used()) | |
| return false; | |
| if (!can_alias() || !range.can_alias()) | |
| return false; | |
| bool left = last_used_pass() < range.first_used_pass(); | |
| bool right = range.last_used_pass() < first_used_pass(); | |
| return left || right; | |
| } | |
| }; | |
| std::vector<Range> pass_range(physical_dimensions.size()); | |
| const auto register_reader = [&pass_range](const RenderTextureResource *resource, unsigned pass_index) { | |
| if (resource && pass_index != RenderPass::Unused) | |
| { | |
| unsigned phys = resource->get_physical_index(); | |
| if (phys != RenderResource::Unused) | |
| { | |
| auto &range = pass_range[phys]; | |
| range.last_read_pass = std::max(range.last_read_pass, pass_index); | |
| range.first_read_pass = std::min(range.first_read_pass, pass_index); | |
| } | |
| } | |
| }; | |
| const auto register_writer = [&pass_range](const RenderTextureResource *resource, unsigned pass_index, bool block_alias) { | |
| if (resource && pass_index != RenderPass::Unused) | |
| { | |
| unsigned phys = resource->get_physical_index(); | |
| if (phys != RenderResource::Unused) | |
| { | |
| auto &range = pass_range[phys]; | |
| range.last_write_pass = std::max(range.last_write_pass, pass_index); | |
| range.first_write_pass = std::min(range.first_write_pass, pass_index); | |
| if (block_alias) | |
| range.block_alias = block_alias; | |
| } | |
| } | |
| }; | |
| for (auto &pass : pass_stack) | |
| { | |
| auto &subpass = *passes[pass]; | |
| for (auto *input : subpass.get_color_inputs()) | |
| register_reader(input, subpass.get_physical_pass_index()); | |
| for (auto *input : subpass.get_color_scale_inputs()) | |
| register_reader(input, subpass.get_physical_pass_index()); | |
| for (auto *input : subpass.get_attachment_inputs()) | |
| register_reader(input, subpass.get_physical_pass_index()); | |
| for (auto &input : subpass.get_generic_texture_inputs()) | |
| register_reader(input.texture, subpass.get_physical_pass_index()); | |
| for (auto *input : subpass.get_blit_texture_inputs()) | |
| register_reader(input, subpass.get_physical_pass_index()); | |
| for (auto *input : subpass.get_storage_texture_inputs()) | |
| register_reader(input, subpass.get_physical_pass_index()); | |
| if (subpass.get_depth_stencil_input()) | |
| register_reader(subpass.get_depth_stencil_input(), subpass.get_physical_pass_index()); | |
| // If a subpass may not execute, we cannot alias with that resource because some other pass may invalidate it. | |
| bool block_alias = subpass.may_not_need_render_pass(); | |
| if (subpass.get_depth_stencil_output()) | |
| register_writer(subpass.get_depth_stencil_output(), subpass.get_physical_pass_index(), block_alias); | |
| for (auto *output : subpass.get_color_outputs()) | |
| register_writer(output, subpass.get_physical_pass_index(), block_alias); | |
| for (auto *output : subpass.get_resolve_outputs()) | |
| register_writer(output, subpass.get_physical_pass_index(), block_alias); | |
| for (auto *output : subpass.get_blit_texture_outputs()) | |
| register_writer(output, subpass.get_physical_pass_index(), block_alias); | |
| // Storage textures are not aliased, because they are implicitly preserved. | |
| for (auto *output : subpass.get_storage_texture_outputs()) | |
| register_writer(output, subpass.get_physical_pass_index(), true); | |
| } | |
| std::vector<std::vector<unsigned>> alias_chains(physical_dimensions.size()); | |
| physical_aliases.resize(physical_dimensions.size()); | |
| for (auto &v : physical_aliases) | |
| v = RenderResource::Unused; | |
| for (unsigned i = 0; i < physical_dimensions.size(); i++) | |
| { | |
| // No aliases for buffers. | |
| if (physical_dimensions[i].buffer_info.size) | |
| continue; | |
| // No aliases for images with history. | |
| if (physical_image_has_history[i]) | |
| continue; | |
| // Only try to alias with lower-indexed resources, because we allocate them one-by-one starting from index 0. | |
| for (unsigned j = 0; j < i; j++) | |
| { | |
| if (physical_image_has_history[j]) | |
| continue; | |
| if (physical_dimensions[i] == physical_dimensions[j]) | |
| { | |
| // Only alias if the resources are used in the same queue, this way we avoid introducing | |
| // multi-queue shenanigans. We can only use events to pass aliasing barriers. | |
| // Also, only alias if we have one single queue. | |
| bool same_single_queue = physical_dimensions[i].queues == physical_dimensions[j].queues; | |
| if ((physical_dimensions[i].queues & (physical_dimensions[i].queues - 1)) != 0) | |
| same_single_queue = false; | |
| if (pass_range[i].disjoint_lifetime(pass_range[j]) && same_single_queue) | |
| { | |
| // We can alias! | |
| physical_aliases[i] = j; | |
| if (alias_chains[j].empty()) | |
| alias_chains[j].push_back(j); | |
| alias_chains[j].push_back(i); | |
| // We might have different image usage, propagate this information. | |
| auto merged_image_usage = | |
| physical_dimensions[j].image_usage |= physical_dimensions[i].image_usage; | |
| physical_dimensions[i].image_usage = merged_image_usage; | |
| physical_dimensions[j].image_usage = merged_image_usage; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| // Now we've found the aliases, so set up the transfer barriers in order of use. | |
| for (auto &chain : alias_chains) | |
| { | |
| if (chain.empty()) | |
| continue; | |
| sort(begin(chain), end(chain), [&](unsigned a, unsigned b) -> bool { | |
| return pass_range[a].last_used_pass() < pass_range[b].first_used_pass(); | |
| }); | |
| for (unsigned i = 0; i < chain.size(); i++) | |
| { | |
| if (i + 1 < chain.size()) | |
| physical_passes[pass_range[chain[i]].last_used_pass()].alias_transfer.push_back(std::make_pair(chain[i], chain[i + 1])); | |
| else | |
| physical_passes[pass_range[chain[i]].last_used_pass()].alias_transfer.push_back(std::make_pair(chain[i], chain[0])); | |
| } | |
| } | |
| } | |
| bool RenderGraph::need_invalidate(const Barrier &barrier, const PipelineEvent &event) | |
| { | |
| bool need_invalidate = false; | |
| Util::for_each_bit(barrier.stages, [&](uint32_t bit) { | |
| if (barrier.access & ~event.invalidated_in_stage[bit]) | |
| need_invalidate = true; | |
| }); | |
| return need_invalidate; | |
| } | |
| bool RenderGraph::physical_pass_requires_work(const PhysicalPass &physical_pass) const | |
| { | |
| for (auto &pass : physical_pass.passes) | |
| if (passes[pass]->need_render_pass()) | |
| return true; | |
| return false; | |
| } | |
| void RenderGraph::physical_pass_transfer_ownership(const PhysicalPass &pass) | |
| { | |
| // Need to wait on this event before we can transfer ownership to another alias. | |
| for (auto &transfer : pass.alias_transfer) | |
| { | |
| auto &phys_events = physical_events[transfer.second]; | |
| phys_events = physical_events[transfer.first]; | |
| for (auto &e : phys_events.invalidated_in_stage) | |
| e = 0; | |
| // If we have pending writes, we have a problem. We cannot safely alias unless we first flush caches, | |
| // but we cannot flush caches from UNDEFINED layout. | |
| // "Write-only" resources should be transient to begin with, and not hit this path. | |
| // If required, we could inject a pipeline barrier here which flushes caches. | |
| // Generally, the last pass a resource is used, it will be *read*, not written to. | |
| assert(phys_events.to_flush_access == 0); | |
| phys_events.to_flush_access = 0; | |
| phys_events.layout = VK_IMAGE_LAYOUT_UNDEFINED; | |
| } | |
| } | |
| static void get_queue_type(Vulkan::CommandBuffer::Type &queue_type, bool &graphics, RenderGraphQueueFlagBits flag) | |
| { | |
| switch (flag) | |
| { | |
| default: | |
| case RENDER_GRAPH_QUEUE_GRAPHICS_BIT: | |
| graphics = true; | |
| queue_type = Vulkan::CommandBuffer::Type::Generic; | |
| break; | |
| case RENDER_GRAPH_QUEUE_COMPUTE_BIT: | |
| graphics = false; | |
| queue_type = Vulkan::CommandBuffer::Type::Generic; | |
| break; | |
| case RENDER_GRAPH_QUEUE_ASYNC_COMPUTE_BIT: | |
| graphics = false; | |
| queue_type = Vulkan::CommandBuffer::Type::AsyncCompute; | |
| break; | |
| case RENDER_GRAPH_QUEUE_ASYNC_GRAPHICS_BIT: | |
| graphics = true; | |
| queue_type = Vulkan::CommandBuffer::Type::AsyncGraphics; | |
| break; | |
| } | |
| } | |
| void RenderGraph::PassSubmissionState::emit_pre_pass_barriers() | |
| { | |
| cmd->begin_region("render-graph-sync-pre"); | |
| // Submit barriers. | |
| if (!semaphore_handover_barriers.empty() || !immediate_image_barriers.empty() || | |
| !image_barriers.empty() || !buffer_barriers.empty()) | |
| { | |
| Util::SmallVector<VkImageMemoryBarrier, 64> combined_barriers; | |
| combined_barriers.reserve(semaphore_handover_barriers.size() + | |
| immediate_image_barriers.size() + | |
| image_barriers.size()); | |
| combined_barriers.insert(combined_barriers.end(), semaphore_handover_barriers.begin(), semaphore_handover_barriers.end()); | |
| combined_barriers.insert(combined_barriers.end(), immediate_image_barriers.begin(), immediate_image_barriers.end()); | |
| combined_barriers.insert(combined_barriers.end(), image_barriers.begin(), image_barriers.end()); | |
| auto src = handover_stages | pre_src_stages; | |
| auto dst = handover_stages | immediate_dst_stages | pre_dst_stages; | |
| if (!src) | |
| src = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; | |
| cmd->barrier(src, dst, | |
| 0, nullptr, | |
| buffer_barriers.size(), buffer_barriers.empty() ? nullptr : buffer_barriers.data(), | |
| combined_barriers.size(), | |
| combined_barriers.empty() ? nullptr : combined_barriers.data()); | |
| } | |
| cmd->end_region(); | |
| } | |
| static void wait_for_semaphore_in_queue(Vulkan::Device &device_, Vulkan::Semaphore &sem, | |
| Vulkan::CommandBuffer::Type queue_type, VkPipelineStageFlags stages) | |
| { | |
| if (sem->get_semaphore() != VK_NULL_HANDLE && !sem->is_pending_wait()) | |
| device_.add_wait_semaphore(queue_type, sem, stages, true); | |
| } | |
| void RenderGraph::PassSubmissionState::submit() | |
| { | |
| if (!cmd) | |
| return; | |
| auto &device_ = cmd->get_device(); | |
| for (auto &lock : external_locks) | |
| { | |
| auto sem = lock.iface->external_acquire(); | |
| if (sem) | |
| { | |
| wait_semaphores.push_back(std::move(sem)); | |
| wait_semaphore_stages.push_back(lock.stages); | |
| } | |
| } | |
| size_t num_semaphores = wait_semaphores.size(); | |
| for (size_t i = 0; i < num_semaphores; i++) | |
| wait_for_semaphore_in_queue(device_, wait_semaphores[i], queue_type, wait_semaphore_stages[i]); | |
| if (need_submission_semaphore || !external_locks.empty()) | |
| { | |
| Vulkan::Semaphore semaphores[3]; | |
| uint32_t sem_count = 0; | |
| if (need_submission_semaphore) | |
| sem_count += 2; | |
| if (!external_locks.empty()) | |
| sem_count += 1; | |
| device_.submit(cmd, nullptr, sem_count, semaphores); | |
| if (need_submission_semaphore) | |
| { | |
| *proxy_semaphores[0] = std::move(*semaphores[0]); | |
| *proxy_semaphores[1] = std::move(*semaphores[1]); | |
| } | |
| if (!external_locks.empty()) | |
| { | |
| auto &release_semaphore = semaphores[need_submission_semaphore ? 2 : 0]; | |
| for (auto &lock : external_locks) | |
| lock.iface->external_release(release_semaphore); | |
| } | |
| } | |
| else | |
| device_.submit(cmd); | |
| if (Vulkan::ImplementationQuirks::get().queue_wait_on_submission) | |
| device_.flush_frame(); | |
| } | |
| void RenderGraph::physical_pass_invalidate_attachments(const PhysicalPass &physical_pass) | |
| { | |
| // Before invalidating, force the layout to UNDEFINED. | |
| // This will be required for resource aliasing later. | |
| // Storage textures are preserved over multiple frames, don't discard. | |
| for (auto &discard : physical_pass.discards) | |
| if (!physical_dimensions[discard].is_buffer_like()) | |
| physical_events[discard].layout = VK_IMAGE_LAYOUT_UNDEFINED; | |
| } | |
| void RenderGraph::physical_pass_handle_invalidate_barrier(const Barrier &barrier, PassSubmissionState &state, | |
| bool physical_graphics_queue) | |
| { | |
| auto &event = barrier.history ? physical_history_events[barrier.resource_index] : | |
| physical_events[barrier.resource_index]; | |
| bool need_pipeline_barrier = false; | |
| bool layout_change = false; | |
| bool need_wait_semaphore = false; | |
| auto &wait_semaphore = physical_graphics_queue ? event.wait_graphics_semaphore : event.wait_compute_semaphore; | |
| auto &phys = physical_dimensions[barrier.resource_index]; | |
| if (phys.buffer_info.size || (phys.flags & ATTACHMENT_INFO_INTERNAL_PROXY_BIT) != 0) | |
| { | |
| // Buffers. | |
| bool need_sync = (event.to_flush_access != 0) || need_invalidate(barrier, event); | |
| if (need_sync) | |
| { | |
| need_pipeline_barrier = event.pipeline_barrier_src_stages != 0; | |
| // Signalling and waiting for a semaphore satisfies the memory barrier automatically. | |
| need_wait_semaphore = bool(wait_semaphore); | |
| } | |
| if (need_pipeline_barrier) | |
| { | |
| VK_ASSERT(physical_buffers[barrier.resource_index]); | |
| auto &buffer = *physical_buffers[barrier.resource_index]; | |
| VkBufferMemoryBarrier b = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; | |
| b.srcAccessMask = event.to_flush_access; | |
| b.dstAccessMask = barrier.access; | |
| b.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
| b.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
| b.buffer = buffer.get_buffer(); | |
| b.offset = 0; | |
| b.size = VK_WHOLE_SIZE; | |
| state.buffer_barriers.push_back(b); | |
| } | |
| } | |
| else | |
| { | |
| // Images. | |
| const Vulkan::Image *image = barrier.history ? | |
| physical_history_image_attachments[barrier.resource_index].get() : | |
| &physical_attachments[barrier.resource_index]->get_image(); | |
| if (!image) | |
| { | |
| // Can happen for history inputs if this is the first frame. | |
| return; | |
| } | |
| VkImageMemoryBarrier b = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; | |
| b.oldLayout = event.layout; | |
| b.newLayout = barrier.layout; | |
| b.srcAccessMask = event.to_flush_access; | |
| b.dstAccessMask = barrier.access; | |
| b.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
| b.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
| b.image = image->get_image(); | |
| b.subresourceRange.aspectMask = Vulkan::format_to_aspect_mask(image->get_format()); | |
| b.subresourceRange.layerCount = image->get_create_info().layers; | |
| b.subresourceRange.levelCount = image->get_create_info().levels; | |
| event.layout = barrier.layout; | |
| layout_change = b.oldLayout != b.newLayout; | |
| bool need_sync = | |
| layout_change || | |
| (event.to_flush_access != 0) || | |
| need_invalidate(barrier, event); | |
| if (need_sync) | |
| { | |
| if (event.pipeline_barrier_src_stages) | |
| { | |
| // Either we wait for a pipeline barrier ... | |
| state.image_barriers.push_back(b); | |
| need_pipeline_barrier = true; | |
| } | |
| else if (wait_semaphore) | |
| { | |
| // We wait for a semaphore ... | |
| if (layout_change) | |
| { | |
| // When the semaphore was signalled, caches were flushed, so we don't need to do that again. | |
| // We still need dstAccessMask however, because layout changes may perform writes. | |
| b.srcAccessMask = 0; | |
| state.semaphore_handover_barriers.push_back(b); | |
| state.handover_stages |= barrier.stages; | |
| } | |
| // If we don't need a layout transition, signalling and waiting for semaphores satisfies | |
| // all requirements we have of srcAccessMask/dstAccessMask. | |
| need_wait_semaphore = true; | |
| } | |
| else | |
| { | |
| // ... or vkCmdPipelineBarrier from TOP_OF_PIPE_BIT if this is the first time we use the resource. | |
| state.immediate_image_barriers.push_back(b); | |
| if (b.oldLayout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Cannot do immediate image barriers from a layout other than UNDEFINED."); | |
| state.immediate_dst_stages |= barrier.stages; | |
| } | |
| } | |
| } | |
| // Any pending writes or layout changes means we have to invalidate caches. | |
| if (event.to_flush_access || layout_change) | |
| { | |
| for (auto &e : event.invalidated_in_stage) | |
| e = 0; | |
| } | |
| event.to_flush_access = 0; | |
| if (need_pipeline_barrier) | |
| { | |
| state.pre_dst_stages |= barrier.stages; | |
| assert(event.pipeline_barrier_src_stages != 0); | |
| state.pre_src_stages |= event.pipeline_barrier_src_stages; | |
| // Mark appropriate caches as invalidated now. | |
| Util::for_each_bit(barrier.stages, [&](uint32_t bit) { | |
| event.invalidated_in_stage[bit] |= barrier.access; | |
| }); | |
| } | |
| else if (need_wait_semaphore) | |
| { | |
| assert(wait_semaphore); | |
| // Wait for a semaphore, unless it has already been waited for ... | |
| state.wait_semaphores.push_back(wait_semaphore); | |
| state.wait_semaphore_stages.push_back(barrier.stages); | |
| // Waiting for a semaphore makes data visible to all access bits in relevant stages. | |
| // The exception is if we perform a layout change ... | |
| // In this case we only invalidate the access bits which we placed in the vkCmdPipelineBarrier. | |
| Util::for_each_bit(barrier.stages, [&](uint32_t bit) { | |
| if (layout_change) | |
| event.invalidated_in_stage[bit] |= barrier.access; | |
| else | |
| event.invalidated_in_stage[bit] |= ~0u; | |
| }); | |
| } | |
| } | |
| void RenderGraph::physical_pass_handle_signal(Vulkan::Device &device_, const PhysicalPass &physical_pass, PassSubmissionState &state) | |
| { | |
| for (auto &barrier : physical_pass.flush) | |
| { | |
| if (physical_dimensions[barrier.resource_index].uses_semaphore()) | |
| state.need_submission_semaphore = true; | |
| else | |
| state.post_pipeline_barrier_stages |= barrier.stages; | |
| } | |
| if (state.need_submission_semaphore) | |
| { | |
| state.proxy_semaphores[0] = device_.request_proxy_semaphore(); | |
| state.proxy_semaphores[1] = device_.request_proxy_semaphore(); | |
| } | |
| } | |
| void RenderGraph::physical_pass_handle_flush_barrier(const Barrier &barrier, PassSubmissionState &state) | |
| { | |
| auto &event = barrier.history ? | |
| physical_history_events[barrier.resource_index] : | |
| physical_events[barrier.resource_index]; | |
| // A render pass might have changed the final layout. | |
| if (!physical_dimensions[barrier.resource_index].buffer_info.size) | |
| { | |
| auto *image = barrier.history ? | |
| physical_history_image_attachments[barrier.resource_index].get() : | |
| &physical_attachments[barrier.resource_index]->get_image(); | |
| if (!image) | |
| return; | |
| physical_events[barrier.resource_index].layout = barrier.layout; | |
| } | |
| // Mark if there are pending writes from this pass. | |
| event.to_flush_access = barrier.access; | |
| if (physical_dimensions[barrier.resource_index].uses_semaphore()) | |
| { | |
| assert(state.proxy_semaphores[0]); | |
| assert(state.proxy_semaphores[1]); | |
| event.wait_graphics_semaphore = state.proxy_semaphores[0]; | |
| event.wait_compute_semaphore = state.proxy_semaphores[1]; | |
| event.pipeline_barrier_src_stages = 0; | |
| } | |
| else | |
| event.pipeline_barrier_src_stages = state.post_pipeline_barrier_stages; | |
| } | |
| void RenderGraph::physical_pass_enqueue_graphics_commands(const PhysicalPass &physical_pass, PassSubmissionState &state) | |
| { | |
| auto &cmd = *state.cmd; | |
| for (auto &clear_req : physical_pass.color_clear_requests) | |
| clear_req.pass->get_clear_color(clear_req.index, clear_req.target); | |
| if (physical_pass.depth_clear_request.pass) | |
| { | |
| physical_pass.depth_clear_request.pass->get_clear_depth_stencil( | |
| physical_pass.depth_clear_request.target); | |
| } | |
| Vulkan::QueryPoolHandle start_graphics, end_graphics; | |
| if (enabled_timestamps) | |
| start_graphics = cmd.write_timestamp(VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT); | |
| VK_ASSERT(physical_pass.layers != ~0u); | |
| auto rp_info = physical_pass.render_pass_info; | |
| unsigned layer_iterations = 1; | |
| if (physical_pass.layers > 1) | |
| { | |
| unsigned multiview_count = 0; | |
| unsigned separate_count = 0; | |
| for (auto pass : physical_pass.passes) | |
| { | |
| auto &subpass = passes[pass]; | |
| if (subpass->render_pass_is_multiview()) | |
| multiview_count++; | |
| else | |
| separate_count++; | |
| } | |
| if (multiview_count && separate_count) | |
| { | |
| LOGE("Mismatch in physical pass w.r.t. multiview vs separate layers. Do not mix and match! Render pass will be dropped.\n"); | |
| layer_iterations = 0; | |
| } | |
| else if (multiview_count) | |
| { | |
| if (device->get_device_features().multiview_features.multiview) | |
| { | |
| rp_info.num_layers = physical_pass.layers; | |
| rp_info.base_layer = 0; | |
| } | |
| else | |
| { | |
| LOGE("VK_KHR_multiview is not supported on this device. Falling back to separate layering.\n"); | |
| layer_iterations = physical_pass.layers; | |
| } | |
| } | |
| else | |
| { | |
| layer_iterations = physical_pass.layers; | |
| } | |
| } | |
| for (unsigned layer = 0; layer < layer_iterations; layer++) | |
| { | |
| rp_info.base_layer = layer; | |
| cmd.begin_region("begin-render-pass"); | |
| cmd.begin_render_pass(rp_info, state.subpass_contents[0]); | |
| cmd.end_region(); | |
| for (auto &subpass : physical_pass.passes) | |
| { | |
| auto subpass_index = unsigned(&subpass - physical_pass.passes.data()); | |
| auto &scaled_requests = physical_pass.scaled_clear_requests[subpass_index]; | |
| enqueue_scaled_requests(cmd, scaled_requests); | |
| auto &pass = *passes[subpass]; | |
| // If we have started the render pass, we have to do it, even if a lone subpass might not be required, | |
| // due to clearing and so on. | |
| // This should be an extremely unlikely scenario. | |
| // Either you need all subpasses or none. | |
| cmd.begin_region(pass.get_name().c_str()); | |
| pass.build_render_pass(cmd, layer); | |
| cmd.end_region(); | |
| if (&subpass != &physical_pass.passes.back()) | |
| cmd.next_subpass(state.subpass_contents[subpass_index + 1]); | |
| } | |
| cmd.begin_region("end-render-pass"); | |
| cmd.end_render_pass(); | |
| cmd.end_region(); | |
| } | |
| if (enabled_timestamps) | |
| { | |
| end_graphics = cmd.write_timestamp(VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT); | |
| std::string name; | |
| if (physical_pass.passes.size() == 1) | |
| name = passes[physical_pass.passes.front()]->get_name(); | |
| else | |
| { | |
| for (auto &pass : physical_pass.passes) | |
| { | |
| name += passes[pass]->get_name(); | |
| if (&pass != &physical_pass.passes.back()) | |
| name += " + "; | |
| } | |
| } | |
| device->register_time_interval("graphics", std::move(start_graphics), std::move(end_graphics), name.c_str()); | |
| } | |
| enqueue_mipmap_requests(cmd, physical_pass.mipmap_requests); | |
| } | |
| void RenderGraph::physical_pass_enqueue_compute_commands(const PhysicalPass &physical_pass, PassSubmissionState &state) | |
| { | |
| assert(physical_pass.passes.size() == 1); | |
| auto &cmd = *state.cmd; | |
| auto &pass = *passes[physical_pass.passes.front()]; | |
| Vulkan::QueryPoolHandle start_ts, end_ts; | |
| if (enabled_timestamps) | |
| start_ts = cmd.write_timestamp(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); | |
| cmd.begin_region(pass.get_name().c_str()); | |
| pass.build_render_pass(cmd, 0); | |
| cmd.end_region(); | |
| if (enabled_timestamps) | |
| { | |
| end_ts = cmd.write_timestamp(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); | |
| device->register_time_interval("compute", std::move(start_ts), std::move(end_ts), pass.get_name()); | |
| } | |
| } | |
| void RenderGraph::physical_pass_handle_external_acquire(const PhysicalPass &physical_pass, PassSubmissionState &state) | |
| { | |
| for (auto pass_index : physical_pass.passes) | |
| { | |
| auto &locks = passes[pass_index]->get_lock_interfaces(); | |
| for (auto &lock : locks) | |
| if (lock.iface) | |
| state.external_locks.push_back(lock); | |
| } | |
| } | |
| void RenderGraph::physical_pass_handle_cpu_timeline(Vulkan::Device &device_, | |
| const PhysicalPass &physical_pass, | |
| PassSubmissionState &state, | |
| TaskComposer &incoming_composer) | |
| { | |
| get_queue_type(state.queue_type, state.graphics, passes[physical_pass.passes.front()]->get_queue()); | |
| physical_pass_invalidate_attachments(physical_pass); | |
| // Queue up invalidates and change layouts. | |
| for (auto &barrier : physical_pass.invalidate) | |
| { | |
| bool physical_graphics = device->get_physical_queue_type(state.queue_type) == Vulkan::QUEUE_INDEX_GRAPHICS; | |
| physical_pass_handle_invalidate_barrier(barrier, state, physical_graphics); | |
| } | |
| physical_pass_handle_external_acquire(physical_pass, state); | |
| physical_pass_handle_signal(device_, physical_pass, state); | |
| for (auto &barrier : physical_pass.flush) | |
| physical_pass_handle_flush_barrier(barrier, state); | |
| // Hand over aliases to some future pass. | |
| physical_pass_transfer_ownership(physical_pass); | |
| // Create preparation tasks. | |
| state.subpass_contents.resize(physical_pass.passes.size()); | |
| for (auto &c : state.subpass_contents) | |
| c = VK_SUBPASS_CONTENTS_INLINE; | |
| auto &group = incoming_composer.get_thread_group(); | |
| TaskComposer composer(group); | |
| composer.set_incoming_task(incoming_composer.get_pipeline_stage_dependency()); | |
| composer.begin_pipeline_stage(); | |
| for (auto &pass : physical_pass.passes) | |
| { | |
| auto &subpass = *passes[pass]; | |
| subpass.prepare_render_pass(composer); | |
| } | |
| state.rendering_dependency = composer.get_outgoing_task(); | |
| } | |
| void RenderGraph::physical_pass_handle_gpu_timeline(ThreadGroup &group, Vulkan::Device &device_, | |
| const PhysicalPass &physical_pass, | |
| PassSubmissionState &state) | |
| { | |
| auto task = group.create_task([&]() { | |
| state.cmd = device_.request_command_buffer(state.queue_type); | |
| state.emit_pre_pass_barriers(); | |
| if (state.graphics) | |
| physical_pass_enqueue_graphics_commands(physical_pass, state); | |
| else | |
| physical_pass_enqueue_compute_commands(physical_pass, state); | |
| // Explicitly end in the thread since we would break threading rules otherwise if we record and End | |
| // in the submission task. | |
| state.cmd->end_debug_channel(); | |
| state.cmd->end_threaded_recording(); | |
| }); | |
| task->set_desc((passes[physical_pass.passes.front()]->get_name() + "-build-gpu-commands").c_str()); | |
| if (state.rendering_dependency) | |
| group.add_dependency(*task, *state.rendering_dependency); | |
| state.rendering_dependency = task; | |
| } | |
| void RenderGraph::enqueue_render_pass(Vulkan::Device &device_, PhysicalPass &physical_pass, PassSubmissionState &state, | |
| TaskComposer &composer) | |
| { | |
| if (!physical_pass_requires_work(physical_pass)) | |
| { | |
| physical_pass_transfer_ownership(physical_pass); | |
| return; | |
| } | |
| state.active = true; | |
| // Runs serially on CPU resolve barrier states. | |
| physical_pass_handle_cpu_timeline(device_, physical_pass, state, composer); | |
| } | |
| void RenderGraph::enqueue_swapchain_scale_pass(Vulkan::Device &device_) | |
| { | |
| unsigned resource_index = resource_to_index[backbuffer_source]; | |
| auto &source_resource = *this->resources[resource_index]; | |
| auto queue_type = (physical_dimensions[resource_index].queues & RENDER_GRAPH_QUEUE_GRAPHICS_BIT) != 0 ? | |
| Vulkan::CommandBuffer::Type::Generic : Vulkan::CommandBuffer::Type::AsyncGraphics; | |
| auto physical_queue_type = device_.get_physical_queue_type(queue_type); | |
| auto cmd = device_.request_command_buffer(queue_type); | |
| cmd->begin_region("render-graph-copy-to-swapchain"); | |
| unsigned index = source_resource.get_physical_index(); | |
| auto &image = physical_attachments[index]->get_image(); | |
| auto &wait_semaphore = physical_queue_type == Vulkan::QUEUE_INDEX_GRAPHICS ? | |
| physical_events[index].wait_graphics_semaphore : physical_events[index].wait_compute_semaphore; | |
| auto target_layout = physical_dimensions[index].is_storage_image() ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; | |
| if (physical_events[index].pipeline_barrier_src_stages != 0) | |
| { | |
| VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; | |
| barrier.image = image.get_image(); | |
| barrier.oldLayout = physical_events[index].layout; | |
| barrier.newLayout = target_layout; | |
| barrier.srcAccessMask = physical_events[index].to_flush_access; | |
| barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | |
| barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
| barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | |
| barrier.subresourceRange.levelCount = image.get_create_info().levels; | |
| barrier.subresourceRange.layerCount = image.get_create_info().layers; | |
| barrier.subresourceRange.aspectMask = Vulkan::format_to_aspect_mask(physical_attachments[index]->get_format()); | |
| cmd->barrier(physical_events[index].pipeline_barrier_src_stages, | |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, | |
| 0, nullptr, | |
| 0, nullptr, | |
| 1, &barrier); | |
| physical_events[index].layout = target_layout; | |
| } | |
| else if (wait_semaphore) | |
| { | |
| if (wait_semaphore->get_semaphore() != VK_NULL_HANDLE && | |
| !wait_semaphore->is_pending_wait()) | |
| { | |
| device_.add_wait_semaphore(queue_type, | |
| wait_semaphore, | |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, true); | |
| } | |
| if (physical_events[index].layout != target_layout) | |
| { | |
| cmd->image_barrier(image, physical_events[index].layout, target_layout, | |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, | |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT); | |
| physical_events[index].layout = target_layout; | |
| } | |
| } | |
| else | |
| { | |
| throw std::logic_error("Swapchain resource was not written to."); | |
| } | |
| Vulkan::RenderPassInfo rp_info; | |
| rp_info.num_color_attachments = 1; | |
| rp_info.clear_attachments = 0; | |
| rp_info.store_attachments = 1; | |
| rp_info.color_attachments[0] = swapchain_attachment; | |
| cmd->begin_render_pass(rp_info); | |
| enqueue_scaled_requests(*cmd, {{ 0, index }}); | |
| cmd->end_render_pass(); | |
| // Set a write-after-read barrier on this resource. | |
| physical_events[index].to_flush_access = 0; | |
| for (auto &e : physical_events[index].invalidated_in_stage) | |
| e = 0; | |
| physical_events[index].invalidated_in_stage[trailing_zeroes(VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)] = VK_ACCESS_SHADER_READ_BIT; | |
| cmd->end_region(); | |
| if (physical_dimensions[index].uses_semaphore()) | |
| { | |
| Vulkan::Semaphore semaphores[2]; | |
| device_.submit(cmd, nullptr, 2, semaphores); | |
| physical_events[index].wait_graphics_semaphore = semaphores[0]; | |
| physical_events[index].wait_compute_semaphore = semaphores[1]; | |
| } | |
| else | |
| { | |
| device_.submit(cmd); | |
| } | |
| if (Vulkan::ImplementationQuirks::get().queue_wait_on_submission) | |
| device_.flush_frame(); | |
| } | |
| void RenderGraph::enqueue_render_passes(Vulkan::Device &device_, TaskComposer &composer) | |
| { | |
| pass_submission_state.clear(); | |
| size_t count = physical_passes.size(); | |
| pass_submission_state.resize(count); | |
| auto &thread_group = composer.get_thread_group(); | |
| for (size_t i = 0; i < count; i++) | |
| enqueue_render_pass(device_, physical_passes[i], pass_submission_state[i], composer); | |
| for (size_t i = 0; i < count; i++) | |
| { | |
| // Could be run in parallel. | |
| if (pass_submission_state[i].active) | |
| physical_pass_handle_gpu_timeline(thread_group, device_, physical_passes[i], pass_submission_state[i]); | |
| } | |
| for (auto &state : pass_submission_state) | |
| { | |
| auto &group = composer.begin_pipeline_stage(); | |
| group.set_desc("render-graph-submit"); | |
| if (state.rendering_dependency) | |
| { | |
| thread_group.add_dependency(group, *state.rendering_dependency); | |
| state.rendering_dependency.reset(); | |
| } | |
| group.enqueue_task([&state]() { | |
| state.submit(); | |
| }); | |
| } | |
| // Scale to swapchain if needed. | |
| if (swapchain_physical_index == RenderResource::Unused) | |
| { | |
| auto &group = composer.begin_pipeline_stage(); | |
| group.set_desc("render-queue-swapchain-scale"); | |
| group.enqueue_task([this, &device_]() { | |
| enqueue_swapchain_scale_pass(device_); | |
| device_.flush_frame(); | |
| }); | |
| } | |
| else | |
| { | |
| auto &group = composer.begin_pipeline_stage(); | |
| group.set_desc("render-queue-flush"); | |
| group.enqueue_task([&device_]() { | |
| device_.flush_frame(); | |
| }); | |
| } | |
| } | |
| void RenderGraph::setup_physical_buffer(Vulkan::Device &device_, unsigned attachment) | |
| { | |
| auto &att = physical_dimensions[attachment]; | |
| Vulkan::BufferCreateInfo info = {}; | |
| info.size = att.buffer_info.size; | |
| info.usage = att.buffer_info.usage; | |
| info.domain = Vulkan::BufferDomain::Device; | |
| // Zero-initialize buffers. TODO: Make this configurable. | |
| info.misc = Vulkan::BUFFER_MISC_ZERO_INITIALIZE_BIT; | |
| bool need_buffer = true; | |
| if (physical_buffers[attachment]) | |
| { | |
| if ((att.flags & ATTACHMENT_INFO_PERSISTENT_BIT) != 0 && | |
| physical_buffers[attachment]->get_create_info().size == info.size && | |
| (physical_buffers[attachment]->get_create_info().usage & info.usage) == info.usage) | |
| { | |
| need_buffer = false; | |
| } | |
| } | |
| if (need_buffer) | |
| { | |
| physical_buffers[attachment] = device_.create_buffer(info, nullptr); | |
| device_.set_name(*physical_buffers[attachment], att.name.c_str()); | |
| physical_events[attachment] = {}; | |
| } | |
| } | |
| void RenderGraph::setup_physical_image(Vulkan::Device &device_, unsigned attachment) | |
| { | |
| auto &att = physical_dimensions[attachment]; | |
| if (physical_aliases[attachment] != RenderResource::Unused) | |
| { | |
| physical_image_attachments[attachment] = physical_image_attachments[physical_aliases[attachment]]; | |
| physical_attachments[attachment] = &physical_image_attachments[attachment]->get_view(); | |
| physical_events[attachment] = {}; | |
| return; | |
| } | |
| bool need_image = true; | |
| VkImageUsageFlags usage = att.image_usage; | |
| Vulkan::ImageMiscFlags misc = 0; | |
| VkImageCreateFlags flags = 0; | |
| if ((att.flags & ATTACHMENT_INFO_UNORM_SRGB_ALIAS_BIT) != 0) | |
| misc |= Vulkan::IMAGE_MISC_MUTABLE_SRGB_BIT; | |
| if (att.is_storage_image()) | |
| flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; | |
| if (physical_image_attachments[attachment]) | |
| { | |
| if ((att.flags & ATTACHMENT_INFO_PERSISTENT_BIT) != 0 && | |
| physical_image_attachments[attachment]->get_create_info().format == att.format && | |
| physical_image_attachments[attachment]->get_create_info().width == att.width && | |
| physical_image_attachments[attachment]->get_create_info().height == att.height && | |
| physical_image_attachments[attachment]->get_create_info().depth == att.depth && | |
| physical_image_attachments[attachment]->get_create_info().samples == att.samples && | |
| (physical_image_attachments[attachment]->get_create_info().usage & usage) == usage && | |
| (physical_image_attachments[attachment]->get_create_info().flags & flags) == flags) | |
| { | |
| need_image = false; | |
| } | |
| } | |
| if (need_image) | |
| { | |
| Vulkan::ImageCreateInfo info; | |
| info.format = att.format; | |
| info.type = att.depth > 1 ? VK_IMAGE_TYPE_3D : VK_IMAGE_TYPE_2D; | |
| info.width = att.width; | |
| info.height = att.height; | |
| info.depth = att.depth; | |
| info.domain = Vulkan::ImageDomain::Physical; | |
| info.levels = att.levels; | |
| info.layers = att.layers; | |
| info.usage = usage; | |
| info.initial_layout = VK_IMAGE_LAYOUT_UNDEFINED; | |
| info.samples = static_cast<VkSampleCountFlagBits>(att.samples); | |
| info.flags = flags; | |
| if (Vulkan::format_has_depth_or_stencil_aspect(info.format)) | |
| info.usage &= ~VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; | |
| info.misc = misc; | |
| if (att.queues & (RENDER_GRAPH_QUEUE_GRAPHICS_BIT | RENDER_GRAPH_QUEUE_COMPUTE_BIT)) | |
| info.misc |= Vulkan::IMAGE_MISC_CONCURRENT_QUEUE_GRAPHICS_BIT; | |
| if (att.queues & RENDER_GRAPH_QUEUE_ASYNC_COMPUTE_BIT) | |
| info.misc |= Vulkan::IMAGE_MISC_CONCURRENT_QUEUE_ASYNC_COMPUTE_BIT; | |
| if (att.queues & RENDER_GRAPH_QUEUE_ASYNC_GRAPHICS_BIT) | |
| info.misc |= Vulkan::IMAGE_MISC_CONCURRENT_QUEUE_ASYNC_GRAPHICS_BIT; | |
| physical_image_attachments[attachment] = device_.create_image(info, nullptr); | |
| physical_image_attachments[attachment]->set_surface_transform(att.transform); | |
| // Just keep storage images in GENERAL layout. | |
| // There is no reason to try enabling compression. | |
| if (!physical_image_attachments[attachment]) | |
| LOGE("Failed to create render graph image!\n"); | |
| if (att.is_storage_image()) | |
| physical_image_attachments[attachment]->set_layout(Vulkan::Layout::General); | |
| device_.set_name(*physical_image_attachments[attachment], att.name.c_str()); | |
| physical_events[attachment] = {}; | |
| } | |
| physical_attachments[attachment] = &physical_image_attachments[attachment]->get_view(); | |
| } | |
| void RenderGraph::setup_attachments(Vulkan::Device &device_, Vulkan::ImageView *swapchain) | |
| { | |
| physical_attachments.clear(); | |
| physical_attachments.resize(physical_dimensions.size()); | |
| // Try to reuse the buffers if possible. | |
| physical_buffers.resize(physical_dimensions.size()); | |
| // Try to reuse render targets if possible. | |
| physical_image_attachments.resize(physical_dimensions.size()); | |
| physical_history_image_attachments.resize(physical_dimensions.size()); | |
| physical_events.resize(physical_dimensions.size()); | |
| physical_history_events.resize(physical_dimensions.size()); | |
| swapchain_attachment = swapchain; | |
| unsigned num_attachments = physical_dimensions.size(); | |
| for (unsigned i = 0; i < num_attachments; i++) | |
| { | |
| // Move over history attachments and events. | |
| if (physical_image_has_history[i]) | |
| { | |
| std::swap(physical_history_image_attachments[i], physical_image_attachments[i]); | |
| std::swap(physical_history_events[i], physical_events[i]); | |
| } | |
| auto &att = physical_dimensions[i]; | |
| if ((att.flags & ATTACHMENT_INFO_INTERNAL_PROXY_BIT) != 0) | |
| continue; | |
| if (att.buffer_info.size != 0) | |
| { | |
| setup_physical_buffer(device_, i); | |
| } | |
| else | |
| { | |
| if (att.is_storage_image()) | |
| setup_physical_image(device_, i); | |
| else if (i == swapchain_physical_index) | |
| physical_attachments[i] = swapchain; | |
| else if ((att.flags & ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT) != 0) | |
| { | |
| physical_image_attachments[i] = device_.get_transient_attachment( | |
| att.width, att.height, att.format, i, | |
| att.samples, att.layers); | |
| physical_attachments[i] = &physical_image_attachments[i]->get_view(); | |
| } | |
| else | |
| setup_physical_image(device_, i); | |
| } | |
| } | |
| // Assign concrete ImageViews to the render pass. | |
| for (auto &physical_pass : physical_passes) | |
| { | |
| unsigned layers = ~0u; | |
| unsigned num_color_attachments = physical_pass.physical_color_attachments.size(); | |
| for (unsigned i = 0; i < num_color_attachments; i++) | |
| { | |
| auto &att = physical_pass.render_pass_info.color_attachments[i]; | |
| att = physical_attachments[physical_pass.physical_color_attachments[i]]; | |
| if (att->get_image().get_create_info().domain == Vulkan::ImageDomain::Physical) | |
| layers = std::min(layers, att->get_image().get_create_info().layers); | |
| } | |
| if (physical_pass.physical_depth_stencil_attachment != RenderResource::Unused) | |
| { | |
| auto &ds = physical_pass.render_pass_info.depth_stencil; | |
| ds = physical_attachments[physical_pass.physical_depth_stencil_attachment]; | |
| if (ds->get_image().get_create_info().domain == Vulkan::ImageDomain::Physical) | |
| layers = std::min(layers, ds->get_image().get_create_info().layers); | |
| } | |
| else | |
| physical_pass.render_pass_info.depth_stencil = nullptr; | |
| physical_pass.layers = layers; | |
| } | |
| } | |
| void RenderGraph::traverse_dependencies(const RenderPass &pass, unsigned stack_count) | |
| { | |
| // For these kinds of resources, | |
| // make sure that we pull in the dependency right away so we can merge render passes if possible. | |
| if (pass.get_depth_stencil_input()) | |
| { | |
| depend_passes_recursive(pass, pass.get_depth_stencil_input()->get_write_passes(), | |
| stack_count, false, false, true); | |
| } | |
| for (auto *input : pass.get_attachment_inputs()) | |
| { | |
| bool self_dependency = pass.get_depth_stencil_output() == input; | |
| if (find(begin(pass.get_color_outputs()), end(pass.get_color_outputs()), input) != end(pass.get_color_outputs())) | |
| self_dependency = true; | |
| if (!self_dependency) | |
| depend_passes_recursive(pass, input->get_write_passes(), stack_count, false, false, true); | |
| } | |
| for (auto *input : pass.get_color_inputs()) | |
| { | |
| if (input) | |
| depend_passes_recursive(pass, input->get_write_passes(), stack_count, false, false, true); | |
| } | |
| for (auto *input : pass.get_color_scale_inputs()) | |
| { | |
| if (input) | |
| depend_passes_recursive(pass, input->get_write_passes(), stack_count, false, false, false); | |
| } | |
| for (auto *input : pass.get_blit_texture_inputs()) | |
| { | |
| if (input) | |
| depend_passes_recursive(pass, input->get_write_passes(), stack_count, false, false, false); | |
| } | |
| for (auto &input : pass.get_generic_texture_inputs()) | |
| depend_passes_recursive(pass, input.texture->get_write_passes(), stack_count, false, false, false); | |
| for (auto &input : pass.get_proxy_inputs()) | |
| depend_passes_recursive(pass, input.proxy->get_write_passes(), stack_count, false, false, false); | |
| for (auto *input : pass.get_storage_inputs()) | |
| { | |
| if (input) | |
| { | |
| // There might be no writers of this resource if it's used in a feedback fashion. | |
| depend_passes_recursive(pass, input->get_write_passes(), stack_count, true, false, false); | |
| // Deal with write-after-read hazards if a storage buffer is read in other passes | |
| // (feedback) before being updated. | |
| depend_passes_recursive(pass, input->get_read_passes(), stack_count, true, true, false); | |
| } | |
| } | |
| for (auto *input : pass.get_storage_texture_inputs()) | |
| { | |
| if (input) | |
| depend_passes_recursive(pass, input->get_write_passes(), stack_count, false, false, false); | |
| } | |
| for (auto &input : pass.get_generic_buffer_inputs()) | |
| { | |
| // There might be no writers of this resource if it's used in a feedback fashion. | |
| depend_passes_recursive(pass, input.buffer->get_write_passes(), stack_count, true, false, false); | |
| } | |
| } | |
| void RenderGraph::depend_passes_recursive(const RenderPass &self, const std::unordered_set<unsigned> &written_passes, | |
| unsigned stack_count, bool no_check, bool ignore_self, bool merge_dependency) | |
| { | |
| if (!no_check && written_passes.empty()) | |
| throw std::logic_error("No pass exists which writes to resource."); | |
| if (stack_count > passes.size()) | |
| throw std::logic_error("Cycle detected."); | |
| for (auto &pass : written_passes) | |
| if (pass != self.get_index()) | |
| pass_dependencies[self.get_index()].insert(pass); | |
| if (merge_dependency) | |
| for (auto &pass : written_passes) | |
| if (pass != self.get_index()) | |
| pass_merge_dependencies[self.get_index()].insert(pass); | |
| stack_count++; | |
| for (auto &pushed_pass : written_passes) | |
| { | |
| if (ignore_self && pushed_pass == self.get_index()) | |
| continue; | |
| else if (pushed_pass == self.get_index()) | |
| throw std::logic_error("Pass depends on itself."); | |
| pass_stack.push_back(pushed_pass); | |
| auto &pass = *passes[pushed_pass]; | |
| traverse_dependencies(pass, stack_count); | |
| } | |
| } | |
| void RenderGraph::reorder_passes(std::vector<unsigned> &flattened_passes) | |
| { | |
| // If a pass depends on an earlier pass via merge dependencies, | |
| // copy over dependencies to the dependees to avoid cases which can break subpass merging. | |
| // This is a "soft" dependency. If we ignore it, it's not a real problem. | |
| for (auto &pass_merge_deps : pass_merge_dependencies) | |
| { | |
| auto pass_index = unsigned(&pass_merge_deps - pass_merge_dependencies.data()); | |
| auto &pass_deps = pass_dependencies[pass_index]; | |
| for (auto &merge_dep : pass_merge_deps) | |
| { | |
| for (auto &dependee : pass_deps) | |
| { | |
| // Avoid cycles. | |
| if (depends_on_pass(dependee, merge_dep)) | |
| continue; | |
| if (merge_dep != dependee) | |
| pass_dependencies[merge_dep].insert(dependee); | |
| } | |
| } | |
| } | |
| // TODO: This is very inefficient, but should work okay for a reasonable amount of passes ... | |
| // But, reasonable amounts are always one more than what you'd think ... | |
| // Clarity in the algorithm is pretty important, because these things tend to be very annoying to debug. | |
| if (flattened_passes.size() <= 2) | |
| return; | |
| std::vector<unsigned> unscheduled_passes; | |
| unscheduled_passes.reserve(passes.size()); | |
| swap(flattened_passes, unscheduled_passes); | |
| const auto schedule = [&](unsigned index) { | |
| // Need to preserve the order of remaining elements. | |
| flattened_passes.push_back(unscheduled_passes[index]); | |
| std::move(unscheduled_passes.begin() + index + 1, | |
| unscheduled_passes.end(), | |
| unscheduled_passes.begin() + index); | |
| unscheduled_passes.pop_back(); | |
| }; | |
| schedule(0); | |
| while (!unscheduled_passes.empty()) | |
| { | |
| // Find the next pass to schedule. | |
| // We can pick any pass N, if the pass does not depend on anything left in unscheduled_passes. | |
| // unscheduled_passes[0] is always okay as a fallback, so unless we find something better, | |
| // we will at least pick that. | |
| // Ideally, we pick a pass which does not introduce any hard barrier. | |
| // A "hard barrier" here is where a pass depends directly on the pass before it forcing something ala vkCmdPipelineBarrier, | |
| // we would like to avoid this if possible. | |
| // Find the pass which has the optimal overlap factor which means the number of passes can be scheduled in-between | |
| // the depender, and the dependee. | |
| unsigned best_candidate = 0; | |
| unsigned best_overlap_factor = 0; | |
| for (unsigned i = 0; i < unscheduled_passes.size(); i++) | |
| { | |
| unsigned overlap_factor = 0; | |
| // Always try to merge passes if possible on tilers. | |
| // This might not make sense on desktop however, | |
| // so we can conditionally enable this path depending on our GPU. | |
| if (pass_merge_dependencies[unscheduled_passes[i]].count(flattened_passes.back())) | |
| { | |
| overlap_factor = ~0u; | |
| } | |
| else | |
| { | |
| for (auto itr = flattened_passes.rbegin(); itr != flattened_passes.rend(); ++itr) | |
| { | |
| if (depends_on_pass(unscheduled_passes[i], *itr)) | |
| break; | |
| overlap_factor++; | |
| } | |
| } | |
| if (overlap_factor <= best_overlap_factor) | |
| continue; | |
| bool possible_candidate = true; | |
| for (unsigned j = 0; j < i; j++) | |
| { | |
| if (depends_on_pass(unscheduled_passes[i], unscheduled_passes[j])) | |
| { | |
| possible_candidate = false; | |
| break; | |
| } | |
| } | |
| if (!possible_candidate) | |
| continue; | |
| best_candidate = i; | |
| best_overlap_factor = overlap_factor; | |
| } | |
| schedule(best_candidate); | |
| } | |
| } | |
| bool RenderGraph::depends_on_pass(unsigned dst_pass, unsigned src_pass) | |
| { | |
| if (dst_pass == src_pass) | |
| return true; | |
| for (auto &dep : pass_dependencies[dst_pass]) | |
| { | |
| if (depends_on_pass(dep, src_pass)) | |
| return true; | |
| } | |
| return false; | |
| } | |
| void RenderGraph::bake() | |
| { | |
| for (auto &pass : passes) | |
| pass->setup_dependencies(); | |
| // First, validate that the graph is sane. | |
| validate_passes(); | |
| auto itr = resource_to_index.find(backbuffer_source); | |
| if (itr == end(resource_to_index)) | |
| throw std::logic_error("Backbuffer source does not exist."); | |
| pass_stack.clear(); | |
| pass_dependencies.clear(); | |
| pass_merge_dependencies.clear(); | |
| pass_dependencies.resize(passes.size()); | |
| pass_merge_dependencies.resize(passes.size()); | |
| // Work our way back from the backbuffer, and sort out all the dependencies. | |
| auto &backbuffer_resource = *resources[itr->second]; | |
| if (backbuffer_resource.get_write_passes().empty()) | |
| throw std::logic_error("No pass exists which writes to resource."); | |
| for (auto &pass : backbuffer_resource.get_write_passes()) | |
| pass_stack.push_back(pass); | |
| auto tmp_pass_stack = pass_stack; | |
| for (auto &pushed_pass : tmp_pass_stack) | |
| { | |
| auto &pass = *passes[pushed_pass]; | |
| traverse_dependencies(pass, 0); | |
| } | |
| reverse(begin(pass_stack), end(pass_stack)); | |
| filter_passes(pass_stack); | |
| // Now, reorder passes to extract better pipelining. | |
| reorder_passes(pass_stack); | |
| // Now, we have a linear list of passes to submit in-order which would obey the dependencies. | |
| // Figure out which physical resources we need. Here we will alias resources which can trivially alias via renaming. | |
| // E.g. depth input -> depth output is just one physical attachment, similar with color. | |
| build_physical_resources(); | |
| // Next, try to merge adjacent passes together. | |
| build_physical_passes(); | |
| // After merging physical passes and resources, if an image resource is only used in a single physical pass, make it transient. | |
| build_transients(); | |
| // Now that we are done, we can make render passes. | |
| build_render_pass_info(); | |
| // For each render pass in isolation, figure out the barriers required. | |
| build_barriers(); | |
| // Check if the swapchain needs to be blitted to in case the geometry does not match the backbuffer, | |
| // or the usage of the image makes that impossible. | |
| swapchain_physical_index = resources[resource_to_index[backbuffer_source]]->get_physical_index(); | |
| auto &backbuffer_dim = physical_dimensions[swapchain_physical_index]; | |
| // If resource is touched in async-compute, we cannot alias with swapchain. | |
| // If resource is not transient, it's being used in multiple physical passes, | |
| // we can't use the implicit subpass dependencies for dealing with swapchain. | |
| bool can_alias_backbuffer = (backbuffer_dim.queues & compute_queues) == 0 && | |
| (backbuffer_dim.flags & ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT) != 0; | |
| // Resources which do not alias with the backbuffer should not be pre-rotated. | |
| for (auto &dim : physical_dimensions) | |
| if (&dim != &backbuffer_dim) | |
| dim.transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; | |
| LOGI("Backbuffer transform: %u\n", backbuffer_dim.transform); | |
| if (Vulkan::surface_transform_swaps_xy(backbuffer_dim.transform)) | |
| std::swap(backbuffer_dim.width, backbuffer_dim.height); | |
| backbuffer_dim.flags &= ~(ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT | ATTACHMENT_INFO_SUPPORTS_PREROTATE_BIT); | |
| backbuffer_dim.flags |= swapchain_dimensions.flags & ATTACHMENT_INFO_PERSISTENT_BIT; | |
| if (!can_alias_backbuffer || backbuffer_dim != swapchain_dimensions) | |
| { | |
| LOGW("Cannot alias with backbuffer, requires extra blit pass!\n"); | |
| LOGW(" Backbuffer: %u x %u, fmt: %u, transform: %u\n", | |
| backbuffer_dim.width, backbuffer_dim.height, | |
| backbuffer_dim.format, backbuffer_dim.transform); | |
| LOGW(" Swapchain: %u x %u, fmt: %u, transform: %u\n", | |
| swapchain_dimensions.width, swapchain_dimensions.height, | |
| swapchain_dimensions.format, swapchain_dimensions.transform); | |
| swapchain_physical_index = RenderResource::Unused; | |
| if ((backbuffer_dim.queues & RENDER_GRAPH_QUEUE_GRAPHICS_BIT) == 0) | |
| backbuffer_dim.queues |= RENDER_GRAPH_QUEUE_ASYNC_GRAPHICS_BIT; | |
| else | |
| backbuffer_dim.queues |= RENDER_GRAPH_QUEUE_GRAPHICS_BIT; | |
| // We will need to sample from the image to blit to backbuffer. | |
| backbuffer_dim.image_usage |= VK_IMAGE_USAGE_SAMPLED_BIT; | |
| // Don't use pre-transform if we can't alias anyways. | |
| if (Vulkan::surface_transform_swaps_xy(backbuffer_dim.transform)) | |
| std::swap(backbuffer_dim.width, backbuffer_dim.height); | |
| backbuffer_dim.transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; | |
| } | |
| else | |
| physical_dimensions[swapchain_physical_index].flags |= ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT; | |
| // Based on our render graph, figure out the barriers we actually need. | |
| // Some barriers are implicit (transients), and some are redundant, i.e. same texture read in multiple passes. | |
| build_physical_barriers(); | |
| // Figure out which images can alias with each other. | |
| // Also build virtual "transfer" barriers. These things only copy events over to other physical resources. | |
| build_aliases(); | |
| for (auto &physical_pass : physical_passes) | |
| for (auto pass : physical_pass.passes) | |
| passes[pass]->setup(*device); | |
| } | |
| ResourceDimensions RenderGraph::get_resource_dimensions(const RenderBufferResource &resource) const | |
| { | |
| ResourceDimensions dim; | |
| auto &info = resource.get_buffer_info(); | |
| dim.buffer_info = info; | |
| dim.buffer_info.usage |= resource.get_buffer_usage(); | |
| dim.flags |= info.flags; | |
| dim.name = resource.get_name(); | |
| return dim; | |
| } | |
| ResourceDimensions RenderGraph::get_resource_dimensions(const RenderTextureResource &resource) const | |
| { | |
| ResourceDimensions dim; | |
| auto &info = resource.get_attachment_info(); | |
| dim.layers = info.layers; | |
| dim.samples = info.samples; | |
| dim.format = info.format; | |
| dim.queues = resource.get_used_queues(); | |
| dim.image_usage = info.aux_usage | resource.get_image_usage(); | |
| dim.name = resource.get_name(); | |
| dim.flags = info.flags & ~(ATTACHMENT_INFO_SUPPORTS_PREROTATE_BIT | ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT); | |
| if (resource.get_transient_state()) | |
| dim.flags |= ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT; | |
| // Mark the resource as potentially supporting pre-rotate. | |
| // If this resource ends up aliasing with the swapchain, it might go through. | |
| if ((info.flags & ATTACHMENT_INFO_SUPPORTS_PREROTATE_BIT) != 0) | |
| dim.transform = swapchain_dimensions.transform; | |
| switch (info.size_class) | |
| { | |
| case SizeClass::SwapchainRelative: | |
| dim.width = std::max(unsigned(muglm::ceil(info.size_x * swapchain_dimensions.width)), 1u); | |
| dim.height = std::max(unsigned(muglm::ceil(info.size_y * swapchain_dimensions.height)), 1u); | |
| dim.depth = std::max(unsigned(muglm::ceil(info.size_z)), 1u); | |
| if (Vulkan::surface_transform_swaps_xy(swapchain_dimensions.transform)) | |
| std::swap(dim.width, dim.height); | |
| break; | |
| case SizeClass::Absolute: | |
| dim.width = std::max(unsigned(info.size_x), 1u); | |
| dim.height = std::max(unsigned(info.size_y), 1u); | |
| dim.depth = std::max(unsigned(info.size_z), 1u); | |
| break; | |
| case SizeClass::InputRelative: | |
| { | |
| auto itr = resource_to_index.find(info.size_relative_name); | |
| if (itr == end(resource_to_index)) | |
| throw std::logic_error("Resource does not exist."); | |
| auto &input = static_cast<RenderTextureResource &>(*resources[itr->second]); | |
| auto input_dim = get_resource_dimensions(input); | |
| dim.width = std::max(unsigned(muglm::ceil(input_dim.width * info.size_x)), 1u); | |
| dim.height = std::max(unsigned(muglm::ceil(input_dim.height * info.size_y)), 1u); | |
| dim.depth = std::max(unsigned(muglm::ceil(input_dim.depth * info.size_z)), 1u); | |
| break; | |
| } | |
| } | |
| if (dim.format == VK_FORMAT_UNDEFINED) | |
| dim.format = swapchain_dimensions.format; | |
| const auto num_levels = [](unsigned width, unsigned height, unsigned depth) -> unsigned { | |
| unsigned levels = 0; | |
| unsigned max_dim = std::max(std::max(width, height), depth); | |
| while (max_dim) | |
| { | |
| levels++; | |
| max_dim >>= 1; | |
| } | |
| return levels; | |
| }; | |
| dim.levels = std::min(num_levels(dim.width, dim.height, dim.depth), info.levels == 0 ? ~0u : info.levels); | |
| return dim; | |
| } | |
| void RenderGraph::build_physical_barriers() | |
| { | |
| auto barrier_itr = begin(pass_barriers); | |
| const auto flush_access_to_invalidate = [](VkAccessFlags flags) -> VkAccessFlags { | |
| if (flags & VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT) | |
| flags |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; | |
| if (flags & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT) | |
| flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; | |
| if (flags & VK_ACCESS_SHADER_WRITE_BIT) | |
| flags |= VK_ACCESS_SHADER_READ_BIT; | |
| return flags; | |
| }; | |
| const auto flush_stage_to_invalidate = [](VkPipelineStageFlags flags) -> VkPipelineStageFlags { | |
| if (flags & VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT) | |
| flags |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; | |
| return flags; | |
| }; | |
| struct ResourceState | |
| { | |
| VkImageLayout initial_layout = VK_IMAGE_LAYOUT_UNDEFINED; | |
| VkImageLayout final_layout = VK_IMAGE_LAYOUT_UNDEFINED; | |
| VkAccessFlags invalidated_types = 0; | |
| VkAccessFlags flushed_types = 0; | |
| VkPipelineStageFlags invalidated_stages = 0; | |
| VkPipelineStageFlags flushed_stages = 0; | |
| }; | |
| // To handle state inside a physical pass. | |
| std::vector<ResourceState> resource_state; | |
| resource_state.reserve(physical_dimensions.size()); | |
| for (auto &physical_pass : physical_passes) | |
| { | |
| resource_state.clear(); | |
| resource_state.resize(physical_dimensions.size()); | |
| // Go over all physical passes, and observe their use of barriers. | |
| // In multipass, only the first and last barriers need to be considered externally. | |
| // Compute never has multipass. | |
| unsigned subpasses = physical_pass.passes.size(); | |
| for (unsigned i = 0; i < subpasses; i++, ++barrier_itr) | |
| { | |
| auto &barriers = *barrier_itr; | |
| auto &invalidates = barriers.invalidate; | |
| auto &flushes = barriers.flush; | |
| for (auto &invalidate : invalidates) | |
| { | |
| auto &res = resource_state[invalidate.resource_index]; | |
| // Transients and swapchain images are handled implicitly. | |
| if ((physical_dimensions[invalidate.resource_index].flags & ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT) != 0 || | |
| invalidate.resource_index == swapchain_physical_index) | |
| { | |
| continue; | |
| } | |
| if (invalidate.history) | |
| { | |
| auto itr = find_if(begin(physical_pass.invalidate), end(physical_pass.invalidate), [&](const Barrier &b) -> bool { | |
| return b.resource_index == invalidate.resource_index && b.history; | |
| }); | |
| if (itr == end(physical_pass.invalidate)) | |
| { | |
| // Storage images should just be in GENERAL all the time instead of SHADER_READ_ONLY_OPTIMAL. | |
| auto layout = physical_dimensions[invalidate.resource_index].is_storage_image() ? | |
| VK_IMAGE_LAYOUT_GENERAL : | |
| invalidate.layout; | |
| // Special case history barriers. They are a bit different from other barriers. | |
| // We just need to ensure the layout is right and that we avoid write-after-read. | |
| // Even if we see these barriers in multiple render passes, they will not emit multiple barriers. | |
| physical_pass.invalidate.push_back( | |
| { invalidate.resource_index, layout, invalidate.access, invalidate.stages, true }); | |
| physical_pass.flush.push_back( | |
| { invalidate.resource_index, layout, 0, invalidate.stages, true }); | |
| } | |
| continue; | |
| } | |
| // Only the first use of a resource in a physical pass needs to be handled externally. | |
| if (res.initial_layout == VK_IMAGE_LAYOUT_UNDEFINED) | |
| { | |
| res.invalidated_types |= invalidate.access; | |
| res.invalidated_stages |= invalidate.stages; | |
| // Storage images should just be in GENERAL all the time instead of SHADER_READ_ONLY_OPTIMAL. | |
| if (physical_dimensions[invalidate.resource_index].is_storage_image()) | |
| res.initial_layout = VK_IMAGE_LAYOUT_GENERAL; | |
| else | |
| res.initial_layout = invalidate.layout; | |
| } | |
| // A read-only invalidation can change the layout. | |
| if (physical_dimensions[invalidate.resource_index].is_storage_image()) | |
| res.final_layout = VK_IMAGE_LAYOUT_GENERAL; | |
| else | |
| res.final_layout = invalidate.layout; | |
| // All pending flushes have been invalidated in the appropriate stages already. | |
| // This is relevant if the invalidate happens in subpass #1 and beyond. | |
| res.flushed_types = 0; | |
| res.flushed_stages = 0; | |
| } | |
| for (auto &flush : flushes) | |
| { | |
| auto &res = resource_state[flush.resource_index]; | |
| // Transients are handled implicitly. | |
| if ((physical_dimensions[flush.resource_index].flags & ATTACHMENT_INFO_INTERNAL_TRANSIENT_BIT) != 0 || | |
| flush.resource_index == swapchain_physical_index) | |
| { | |
| continue; | |
| } | |
| // The last use of a resource in a physical pass needs to be handled externally. | |
| res.flushed_types |= flush.access; | |
| res.flushed_stages |= flush.stages; | |
| // Storage images should just be in GENERAL all the time instead of SHADER_READ_ONLY_OPTIMAL. | |
| if (physical_dimensions[flush.resource_index].is_storage_image()) | |
| res.final_layout = VK_IMAGE_LAYOUT_GENERAL; | |
| else | |
| res.final_layout = flush.layout; | |
| // If we didn't have an invalidation before first flush, we must invalidate first. | |
| // Only first flush in a render pass needs a matching invalidation. | |
| if (res.initial_layout == VK_IMAGE_LAYOUT_UNDEFINED) | |
| { | |
| // If we end in TRANSFER_SRC_OPTIMAL, we actually start in COLOR_ATTACHMENT_OPTIMAL. | |
| if (flush.layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) | |
| { | |
| res.initial_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | |
| res.invalidated_stages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | |
| res.invalidated_types = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; | |
| } | |
| else | |
| { | |
| res.initial_layout = flush.layout; | |
| res.invalidated_stages = flush_stage_to_invalidate(flush.stages); | |
| res.invalidated_types = flush_access_to_invalidate(flush.access); | |
| } | |
| // We're not reading the resource in this pass, so we might as well transition from UNDEFINED to discard the resource. | |
| physical_pass.discards.push_back(flush.resource_index); | |
| } | |
| } | |
| } | |
| // Now that the render pass has been studied, look at each resource individually and see how we need to deal | |
| // with the physical render pass as a whole. | |
| for (auto &resource : resource_state) | |
| { | |
| // Resource was not touched in this pass. | |
| if (resource.final_layout == VK_IMAGE_LAYOUT_UNDEFINED && resource.initial_layout == VK_IMAGE_LAYOUT_UNDEFINED) | |
| continue; | |
| VK_ASSERT(resource.final_layout != VK_IMAGE_LAYOUT_UNDEFINED); | |
| unsigned index = unsigned(&resource - resource_state.data()); | |
| physical_pass.invalidate.push_back( | |
| { index, resource.initial_layout, resource.invalidated_types, resource.invalidated_stages, false }); | |
| if (resource.flushed_types) | |
| { | |
| // Did the pass write anything in this pass which needs to be flushed? | |
| physical_pass.flush.push_back({ index, resource.final_layout, resource.flushed_types, resource.flushed_stages, false }); | |
| } | |
| else if (resource.invalidated_types) | |
| { | |
| // Did the pass read anything in this pass which needs to be protected before it can be written? | |
| // Implement this as a flush with 0 access bits. | |
| // This is how Vulkan essentially implements a write-after-read hazard. | |
| // The only purpose of this flush barrier is to set the last pass which the resource was used as a stage. | |
| // Do not clear last_invalidate_pass, because we can still keep tacking on new access flags, etc. | |
| physical_pass.flush.push_back({ index, resource.final_layout, 0, resource.invalidated_stages, false }); | |
| } | |
| // If we end in TRANSFER_SRC_OPTIMAL, this is a sentinel for needing mipmapping, so enqueue that up here. | |
| if (resource.final_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) | |
| { | |
| physical_pass.mipmap_requests.push_back({ index, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | |
| VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }); | |
| } | |
| } | |
| } | |
| } | |
| void RenderGraph::build_barriers() | |
| { | |
| pass_barriers.clear(); | |
| pass_barriers.reserve(pass_stack.size()); | |
| const auto get_access = [&](std::vector<Barrier> &barriers, unsigned index, bool history) -> Barrier & { | |
| auto itr = find_if(begin(barriers), end(barriers), [index, history](const Barrier &b) { | |
| return index == b.resource_index && history == b.history; | |
| }); | |
| if (itr != end(barriers)) | |
| return *itr; | |
| else | |
| { | |
| barriers.push_back({ index, VK_IMAGE_LAYOUT_UNDEFINED, 0, 0, history }); | |
| return barriers.back(); | |
| } | |
| }; | |
| for (auto &index : pass_stack) | |
| { | |
| auto &pass = *passes[index]; | |
| Barriers barriers; | |
| const auto get_invalidate_access = [&](unsigned i, bool history) -> Barrier & { | |
| return get_access(barriers.invalidate, i, history); | |
| }; | |
| const auto get_flush_access = [&](unsigned i) -> Barrier & { | |
| return get_access(barriers.flush, i, false); | |
| }; | |
| for (auto &input : pass.get_generic_buffer_inputs()) | |
| { | |
| auto &barrier = get_invalidate_access(input.buffer->get_physical_index(), false); | |
| barrier.access |= input.access; | |
| barrier.stages |= input.stages; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = input.layout; | |
| } | |
| for (auto &input : pass.get_generic_texture_inputs()) | |
| { | |
| auto &barrier = get_invalidate_access(input.texture->get_physical_index(), false); | |
| barrier.access |= input.access; | |
| barrier.stages |= input.stages; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = input.layout; | |
| } | |
| for (auto *input : pass.get_history_inputs()) | |
| { | |
| auto &barrier = get_invalidate_access(input->get_physical_index(), true); | |
| barrier.access |= VK_ACCESS_SHADER_READ_BIT; | |
| if ((pass.get_queue() & compute_queues) == 0) | |
| barrier.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; // TODO: Pick appropriate stage. | |
| else | |
| barrier.stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; | |
| } | |
| for (auto *input : pass.get_attachment_inputs()) | |
| { | |
| if (pass.get_queue() & compute_queues) | |
| throw std::logic_error("Only graphics passes can have input attachments."); | |
| auto &barrier = get_invalidate_access(input->get_physical_index(), false); | |
| barrier.access |= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; | |
| if (Vulkan::format_has_depth_or_stencil_aspect(input->get_attachment_info().format)) | |
| { | |
| // Need DEPTH_ATTACHMENT_READ here to satisfy loadOp = LOAD. | |
| barrier.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; | |
| } | |
| else | |
| { | |
| // Need COLOR_ATTACHMENT_READ here to satisfy loadOp = LOAD. | |
| barrier.access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | |
| } | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; | |
| } | |
| for (auto *input : pass.get_storage_inputs()) | |
| { | |
| if (!input) | |
| continue; | |
| auto &barrier = get_invalidate_access(input->get_physical_index(), false); | |
| barrier.access |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; | |
| if ((pass.get_queue() & compute_queues) == 0) | |
| barrier.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; // TODO: Pick appropriate stage. | |
| else | |
| barrier.stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| } | |
| for (auto &input : pass.get_proxy_inputs()) | |
| { | |
| auto &barrier = get_invalidate_access(input.proxy->get_physical_index(), false); | |
| // We will use semaphores to deal with proxies, skip access. | |
| barrier.stages |= input.stages; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = input.layout; | |
| } | |
| for (auto *input : pass.get_storage_texture_inputs()) | |
| { | |
| if (!input) | |
| continue; | |
| auto &barrier = get_invalidate_access(input->get_physical_index(), false); | |
| barrier.access |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; | |
| if ((pass.get_queue() & compute_queues) == 0) | |
| barrier.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; // TODO: Pick appropriate stage. | |
| else | |
| barrier.stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| } | |
| for (auto *input : pass.get_blit_texture_inputs()) | |
| { | |
| if (!input) | |
| continue; | |
| auto &barrier = get_invalidate_access(input->get_physical_index(), false); | |
| barrier.access |= VK_ACCESS_TRANSFER_WRITE_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_TRANSFER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; | |
| } | |
| for (auto *input : pass.get_color_inputs()) | |
| { | |
| if (!input) | |
| continue; | |
| if (pass.get_queue() & compute_queues) | |
| throw std::logic_error("Only graphics passes can have color inputs."); | |
| auto &barrier = get_invalidate_access(input->get_physical_index(), false); | |
| barrier.access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | |
| // If the attachment is also bound as an input attachment (programmable blending) | |
| // we need VK_IMAGE_LAYOUT_GENERAL. | |
| if (barrier.layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) | |
| barrier.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| else if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| else | |
| barrier.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | |
| } | |
| for (auto *input : pass.get_color_scale_inputs()) | |
| { | |
| if (!input) | |
| continue; | |
| if (pass.get_queue() & compute_queues) | |
| throw std::logic_error("Only graphics passes can have scaled color inputs."); | |
| auto &barrier = get_invalidate_access(input->get_physical_index(), false); | |
| barrier.access |= VK_ACCESS_SHADER_READ_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; | |
| } | |
| for (auto *output : pass.get_color_outputs()) | |
| { | |
| if (pass.get_queue() & compute_queues) | |
| throw std::logic_error("Only graphics passes can have color outputs."); | |
| auto &barrier = get_flush_access(output->get_physical_index()); | |
| if ((physical_dimensions[output->get_physical_index()].levels > 1) && | |
| (physical_dimensions[output->get_physical_index()].flags & ATTACHMENT_INFO_MIPGEN_BIT) != 0) | |
| { | |
| // access should be 0 here. generate_mipmaps will take care of invalidation needed. | |
| barrier.access |= VK_ACCESS_TRANSFER_READ_BIT; // Validation layers complain without this. | |
| barrier.stages |= VK_PIPELINE_STAGE_TRANSFER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; | |
| } | |
| else | |
| { | |
| barrier.access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | |
| // If the attachment is also bound as an input attachment (programmable blending) | |
| // we need VK_IMAGE_LAYOUT_GENERAL. | |
| if (barrier.layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL || | |
| barrier.layout == VK_IMAGE_LAYOUT_GENERAL) | |
| { | |
| barrier.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| } | |
| else if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| else | |
| barrier.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | |
| } | |
| } | |
| for (auto *output : pass.get_resolve_outputs()) | |
| { | |
| if (pass.get_queue() & compute_queues) | |
| throw std::logic_error("Only graphics passes can resolve outputs."); | |
| auto &barrier = get_flush_access(output->get_physical_index()); | |
| barrier.access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | |
| } | |
| for (auto *output : pass.get_blit_texture_outputs()) | |
| { | |
| auto &barrier = get_invalidate_access(output->get_physical_index(), false); | |
| barrier.access |= VK_ACCESS_TRANSFER_WRITE_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_TRANSFER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; | |
| } | |
| for (auto *output : pass.get_storage_outputs()) | |
| { | |
| auto &barrier = get_flush_access(output->get_physical_index()); | |
| barrier.access |= VK_ACCESS_SHADER_WRITE_BIT; | |
| if ((pass.get_queue() & compute_queues) == 0) | |
| barrier.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; // TODO: Pick appropriate stage. | |
| else | |
| barrier.stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| } | |
| for (auto &output : pass.get_proxy_outputs()) | |
| { | |
| auto &barrier = get_flush_access(output.proxy->get_physical_index()); | |
| // We will use semaphores to deal with proxies, skip access. | |
| barrier.stages |= output.stages; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = output.layout; | |
| } | |
| for (auto *output : pass.get_transfer_outputs()) | |
| { | |
| auto &barrier = get_flush_access(output->get_physical_index()); | |
| barrier.access |= VK_ACCESS_TRANSFER_WRITE_BIT; | |
| barrier.stages |= VK_PIPELINE_STAGE_TRANSFER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| } | |
| for (auto *output : pass.get_storage_texture_outputs()) | |
| { | |
| auto &barrier = get_flush_access(output->get_physical_index()); | |
| barrier.access |= VK_ACCESS_SHADER_WRITE_BIT; | |
| if ((pass.get_queue() & compute_queues) == 0) | |
| barrier.stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; // TODO: Pick appropriate stage. | |
| else | |
| barrier.stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | |
| if (barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| barrier.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| } | |
| auto *output = pass.get_depth_stencil_output(); | |
| auto *input = pass.get_depth_stencil_input(); | |
| if ((output || input) && (pass.get_queue() & compute_queues)) | |
| throw std::logic_error("Only graphics passes can have depth attachments."); | |
| if (output && input) | |
| { | |
| auto &dst_barrier = get_invalidate_access(input->get_physical_index(), false); | |
| auto &src_barrier = get_flush_access(output->get_physical_index()); | |
| if (dst_barrier.layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) | |
| dst_barrier.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| else if (dst_barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| else | |
| dst_barrier.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | |
| dst_barrier.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | |
| dst_barrier.stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; | |
| src_barrier.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | |
| src_barrier.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | |
| src_barrier.stages |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; | |
| } | |
| else if (input) | |
| { | |
| auto &dst_barrier = get_invalidate_access(input->get_physical_index(), false); | |
| if (dst_barrier.layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) | |
| dst_barrier.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; | |
| else if (dst_barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| else | |
| dst_barrier.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; | |
| dst_barrier.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; | |
| dst_barrier.stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; | |
| } | |
| else if (output) | |
| { | |
| auto &src_barrier = get_flush_access(output->get_physical_index()); | |
| if (src_barrier.layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) | |
| src_barrier.layout = VK_IMAGE_LAYOUT_GENERAL; | |
| else if (src_barrier.layout != VK_IMAGE_LAYOUT_UNDEFINED) | |
| throw std::logic_error("Layout mismatch."); | |
| else | |
| src_barrier.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | |
| src_barrier.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | |
| src_barrier.stages |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; | |
| } | |
| pass_barriers.push_back(std::move(barriers)); | |
| } | |
| } | |
| void RenderGraph::filter_passes(std::vector<unsigned> &list) | |
| { | |
| std::unordered_set<unsigned> seen; | |
| auto output_itr = begin(list); | |
| for (auto itr = begin(list); itr != end(list); ++itr) | |
| { | |
| if (!seen.count(*itr)) | |
| { | |
| *output_itr = *itr; | |
| seen.insert(*itr); | |
| ++output_itr; | |
| } | |
| } | |
| list.erase(output_itr, end(list)); | |
| } | |
| void RenderGraph::enable_timestamps(bool enable) | |
| { | |
| enabled_timestamps = enable; | |
| } | |
| void RenderGraph::add_external_lock_interface(const std::string &name, RenderPassExternalLockInterface *iface) | |
| { | |
| external_lock_interfaces[name] = iface; | |
| } | |
| RenderPassExternalLockInterface *RenderGraph::find_external_lock_interface(const std::string &name) const | |
| { | |
| auto itr = external_lock_interfaces.find(name); | |
| if (itr != end(external_lock_interfaces)) | |
| return itr->second; | |
| else | |
| return nullptr; | |
| } | |
| void RenderGraph::reset() | |
| { | |
| passes.clear(); | |
| resources.clear(); | |
| pass_to_index.clear(); | |
| resource_to_index.clear(); | |
| external_lock_interfaces.clear(); | |
| physical_passes.clear(); | |
| physical_dimensions.clear(); | |
| physical_attachments.clear(); | |
| physical_buffers.clear(); | |
| physical_image_attachments.clear(); | |
| physical_events.clear(); | |
| physical_history_events.clear(); | |
| physical_history_image_attachments.clear(); | |
| } | |
| } |