Permalink
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Vulkan/examples/computecullandlod/computecullandlod.cpp
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixes #748
853 lines (722 sloc)
31.4 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Vulkan Example - Compute shader culling and LOD using indirect rendering | |
| * | |
| * Copyright (C) 2016-2022 by Sascha Willems - www.saschawillems.de | |
| * | |
| * This code is licensed under the MIT license (MIT) (http://opensource.org/licenses/MIT) | |
| * | |
| */ | |
| #include "vulkanexamplebase.h" | |
| #include "VulkanglTFModel.h" | |
| #include "frustum.hpp" | |
| #define VERTEX_BUFFER_BIND_ID 0 | |
| #define INSTANCE_BUFFER_BIND_ID 1 | |
| #define ENABLE_VALIDATION false | |
| // Total number of objects (^3) in the scene | |
| #if defined(__ANDROID__) | |
| #define OBJECT_COUNT 32 | |
| #else | |
| #define OBJECT_COUNT 64 | |
| #endif | |
| #define MAX_LOD_LEVEL 5 | |
| class VulkanExample : public VulkanExampleBase | |
| { | |
| public: | |
| bool fixedFrustum = false; | |
| // The model contains multiple versions of a single object with different levels of detail | |
| vkglTF::Model lodModel; | |
| // Per-instance data block | |
| struct InstanceData { | |
| glm::vec3 pos; | |
| float scale; | |
| }; | |
| // Contains the instanced data | |
| vks::Buffer instanceBuffer; | |
| // Contains the indirect drawing commands | |
| vks::Buffer indirectCommandsBuffer; | |
| vks::Buffer indirectDrawCountBuffer; | |
| // Indirect draw statistics (updated via compute) | |
| struct { | |
| uint32_t drawCount; // Total number of indirect draw counts to be issued | |
| uint32_t lodCount[MAX_LOD_LEVEL + 1]; // Statistics for number of draws per LOD level (written by compute shader) | |
| } indirectStats; | |
| // Store the indirect draw commands containing index offsets and instance count per object | |
| std::vector<VkDrawIndexedIndirectCommand> indirectCommands; | |
| struct { | |
| glm::mat4 projection; | |
| glm::mat4 modelview; | |
| glm::vec4 cameraPos; | |
| glm::vec4 frustumPlanes[6]; | |
| } uboScene; | |
| struct { | |
| vks::Buffer scene; | |
| } uniformData; | |
| struct { | |
| VkPipeline plants; | |
| } pipelines; | |
| VkPipelineLayout pipelineLayout; | |
| VkDescriptorSet descriptorSet; | |
| VkDescriptorSetLayout descriptorSetLayout; | |
| // Resources for the compute part of the example | |
| struct { | |
| vks::Buffer lodLevelsBuffers; // Contains index start and counts for the different lod levels | |
| VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics) | |
| VkCommandPool commandPool; // Use a separate command pool (queue family may differ from the one used for graphics) | |
| VkCommandBuffer commandBuffer; // Command buffer storing the dispatch commands and barriers | |
| VkFence fence; // Synchronization fence to avoid rewriting compute CB if still in use | |
| VkSemaphore semaphore; // Used as a wait semaphore for graphics submission | |
| VkDescriptorSetLayout descriptorSetLayout; // Compute shader binding layout | |
| VkDescriptorSet descriptorSet; // Compute shader bindings | |
| VkPipelineLayout pipelineLayout; // Layout of the compute pipeline | |
| VkPipeline pipeline; // Compute pipeline for updating particle positions | |
| } compute; | |
| // View frustum for culling invisible objects | |
| vks::Frustum frustum; | |
| uint32_t objectCount = 0; | |
| VulkanExample() : VulkanExampleBase(ENABLE_VALIDATION) | |
| { | |
| title = "Vulkan Example - Compute cull and lod"; | |
| camera.type = Camera::CameraType::firstperson; | |
| camera.setPerspective(60.0f, (float)width / (float)height, 0.1f, 512.0f); | |
| camera.setTranslation(glm::vec3(0.5f, 0.0f, 0.0f)); | |
| camera.movementSpeed = 5.0f; | |
| memset(&indirectStats, 0, sizeof(indirectStats)); | |
| } | |
| ~VulkanExample() | |
| { | |
| vkDestroyPipeline(device, pipelines.plants, nullptr); | |
| vkDestroyPipelineLayout(device, pipelineLayout, nullptr); | |
| vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr); | |
| instanceBuffer.destroy(); | |
| indirectCommandsBuffer.destroy(); | |
| uniformData.scene.destroy(); | |
| indirectDrawCountBuffer.destroy(); | |
| compute.lodLevelsBuffers.destroy(); | |
| vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr); | |
| vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr); | |
| vkDestroyPipeline(device, compute.pipeline, nullptr); | |
| vkDestroyFence(device, compute.fence, nullptr); | |
| vkDestroyCommandPool(device, compute.commandPool, nullptr); | |
| vkDestroySemaphore(device, compute.semaphore, nullptr); | |
| } | |
| virtual void getEnabledFeatures() | |
| { | |
| // Enable multi draw indirect if supported | |
| if (deviceFeatures.multiDrawIndirect) { | |
| enabledFeatures.multiDrawIndirect = VK_TRUE; | |
| } | |
| } | |
| void buildCommandBuffers() | |
| { | |
| VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); | |
| VkClearValue clearValues[2]; | |
| clearValues[0].color = { { 0.18f, 0.27f, 0.5f, 0.0f } }; | |
| clearValues[1].depthStencil = { 1.0f, 0 }; | |
| VkRenderPassBeginInfo renderPassBeginInfo = vks::initializers::renderPassBeginInfo(); | |
| renderPassBeginInfo.renderPass = renderPass; | |
| renderPassBeginInfo.renderArea.extent.width = width; | |
| renderPassBeginInfo.renderArea.extent.height = height; | |
| renderPassBeginInfo.clearValueCount = 2; | |
| renderPassBeginInfo.pClearValues = clearValues; | |
| for (int32_t i = 0; i < drawCmdBuffers.size(); ++i) | |
| { | |
| // Set target frame buffer | |
| renderPassBeginInfo.framebuffer = frameBuffers[i]; | |
| VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo)); | |
| // Acquire barrier | |
| if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) | |
| { | |
| VkBufferMemoryBarrier buffer_barrier = | |
| { | |
| VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | |
| nullptr, | |
| 0, | |
| VK_ACCESS_INDIRECT_COMMAND_READ_BIT, | |
| vulkanDevice->queueFamilyIndices.compute, | |
| vulkanDevice->queueFamilyIndices.graphics, | |
| indirectCommandsBuffer.buffer, | |
| 0, | |
| indirectCommandsBuffer.descriptor.range | |
| }; | |
| vkCmdPipelineBarrier( | |
| drawCmdBuffers[i], | |
| VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, | |
| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, | |
| 0, | |
| 0, nullptr, | |
| 1, &buffer_barrier, | |
| 0, nullptr); | |
| } | |
| vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); | |
| VkViewport viewport = vks::initializers::viewport((float)width, (float)height, 0.0f, 1.0f); | |
| vkCmdSetViewport(drawCmdBuffers[i], 0, 1, &viewport); | |
| VkRect2D scissor = vks::initializers::rect2D(width, height, 0, 0); | |
| vkCmdSetScissor(drawCmdBuffers[i], 0, 1, &scissor); | |
| VkDeviceSize offsets[1] = { 0 }; | |
| vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSet, 0, NULL); | |
| // Mesh containing the LODs | |
| vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelines.plants); | |
| vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &lodModel.vertices.buffer, offsets); | |
| vkCmdBindVertexBuffers(drawCmdBuffers[i], INSTANCE_BUFFER_BIND_ID, 1, &instanceBuffer.buffer, offsets); | |
| vkCmdBindIndexBuffer(drawCmdBuffers[i], lodModel.indices.buffer, 0, VK_INDEX_TYPE_UINT32); | |
| if (vulkanDevice->features.multiDrawIndirect) | |
| { | |
| vkCmdDrawIndexedIndirect(drawCmdBuffers[i], indirectCommandsBuffer.buffer, 0, static_cast<uint32_t>(indirectCommands.size()), sizeof(VkDrawIndexedIndirectCommand)); | |
| } | |
| else | |
| { | |
| // If multi draw is not available, we must issue separate draw commands | |
| for (auto j = 0; j < indirectCommands.size(); j++) | |
| { | |
| vkCmdDrawIndexedIndirect(drawCmdBuffers[i], indirectCommandsBuffer.buffer, j * sizeof(VkDrawIndexedIndirectCommand), 1, sizeof(VkDrawIndexedIndirectCommand)); | |
| } | |
| } | |
| drawUI(drawCmdBuffers[i]); | |
| vkCmdEndRenderPass(drawCmdBuffers[i]); | |
| // Release barrier | |
| if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) | |
| { | |
| VkBufferMemoryBarrier buffer_barrier = | |
| { | |
| VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | |
| nullptr, | |
| VK_ACCESS_INDIRECT_COMMAND_READ_BIT, | |
| 0, | |
| vulkanDevice->queueFamilyIndices.graphics, | |
| vulkanDevice->queueFamilyIndices.compute, | |
| indirectCommandsBuffer.buffer, | |
| 0, | |
| indirectCommandsBuffer.descriptor.range | |
| }; | |
| vkCmdPipelineBarrier( | |
| drawCmdBuffers[i], | |
| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, | |
| VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, | |
| 0, | |
| 0, nullptr, | |
| 1, &buffer_barrier, | |
| 0, nullptr); | |
| } | |
| VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i])); | |
| } | |
| } | |
| void loadAssets() | |
| { | |
| const uint32_t glTFLoadingFlags = vkglTF::FileLoadingFlags::PreTransformVertices | vkglTF::FileLoadingFlags::PreMultiplyVertexColors | vkglTF::FileLoadingFlags::FlipY; | |
| lodModel.loadFromFile(getAssetPath() + "models/suzanne_lods.gltf", vulkanDevice, queue, glTFLoadingFlags); | |
| } | |
| void buildComputeCommandBuffer() | |
| { | |
| VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); | |
| VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffer, &cmdBufInfo)); | |
| // Acquire barrier | |
| // Add memory barrier to ensure that the indirect commands have been consumed before the compute shader updates them | |
| if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) | |
| { | |
| VkBufferMemoryBarrier buffer_barrier = | |
| { | |
| VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | |
| nullptr, | |
| 0, | |
| VK_ACCESS_SHADER_WRITE_BIT, | |
| vulkanDevice->queueFamilyIndices.graphics, | |
| vulkanDevice->queueFamilyIndices.compute, | |
| indirectCommandsBuffer.buffer, | |
| 0, | |
| indirectCommandsBuffer.descriptor.range | |
| }; | |
| vkCmdPipelineBarrier( | |
| compute.commandBuffer, | |
| VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, | |
| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | |
| VK_FLAGS_NONE, | |
| 0, nullptr, | |
| 1, &buffer_barrier, | |
| 0, nullptr); | |
| } | |
| vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline); | |
| vkCmdBindDescriptorSets(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineLayout, 0, 1, &compute.descriptorSet, 0, 0); | |
| // Clear the buffer that the compute shader pass will write statistics and draw calls to | |
| vkCmdFillBuffer(compute.commandBuffer, indirectDrawCountBuffer.buffer, 0, indirectCommandsBuffer.descriptor.range, 0); | |
| // This barrier ensures that the fill command is finished before the compute shader can start writing to the buffer | |
| VkMemoryBarrier memoryBarrier = vks::initializers::memoryBarrier(); | |
| memoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | |
| memoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | |
| vkCmdPipelineBarrier( | |
| compute.commandBuffer, | |
| VK_PIPELINE_STAGE_TRANSFER_BIT, | |
| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | |
| VK_FLAGS_NONE, | |
| 1, &memoryBarrier, | |
| 0, nullptr, | |
| 0, nullptr); | |
| // Dispatch the compute job | |
| // The compute shader will do the frustum culling and adjust the indirect draw calls depending on object visibility. | |
| // It also determines the lod to use depending on distance to the viewer. | |
| vkCmdDispatch(compute.commandBuffer, objectCount / 16, 1, 1); | |
| // Release barrier | |
| // Add memory barrier to ensure that the compute shader has finished writing the indirect command buffer before it's consumed | |
| if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) | |
| { | |
| VkBufferMemoryBarrier buffer_barrier = | |
| { | |
| VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | |
| nullptr, | |
| VK_ACCESS_SHADER_WRITE_BIT, | |
| 0, | |
| vulkanDevice->queueFamilyIndices.compute, | |
| vulkanDevice->queueFamilyIndices.graphics, | |
| indirectCommandsBuffer.buffer, | |
| 0, | |
| indirectCommandsBuffer.descriptor.range | |
| }; | |
| vkCmdPipelineBarrier( | |
| compute.commandBuffer, | |
| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | |
| VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, | |
| VK_FLAGS_NONE, | |
| 0, nullptr, | |
| 1, &buffer_barrier, | |
| 0, nullptr); | |
| } | |
| // todo: barrier for indirect stats buffer? | |
| vkEndCommandBuffer(compute.commandBuffer); | |
| } | |
| void setupDescriptorPool() | |
| { | |
| std::vector<VkDescriptorPoolSize> poolSizes = { | |
| vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2), | |
| vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 4) | |
| }; | |
| VkDescriptorPoolCreateInfo descriptorPoolInfo = vks::initializers::descriptorPoolCreateInfo(poolSizes, 2); | |
| VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool)); | |
| } | |
| void setupDescriptorSetLayout() | |
| { | |
| std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = { | |
| // Binding 0: Vertex shader uniform buffer | |
| vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT,0), | |
| }; | |
| VkDescriptorSetLayoutCreateInfo descriptorLayout = vks::initializers::descriptorSetLayoutCreateInfo(setLayoutBindings); | |
| VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &descriptorSetLayout)); | |
| VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = vks::initializers::pipelineLayoutCreateInfo(&descriptorSetLayout, 1); | |
| VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pPipelineLayoutCreateInfo, nullptr, &pipelineLayout)); | |
| } | |
| void setupDescriptorSet() | |
| { | |
| VkDescriptorSetAllocateInfo allocInfo = vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1); | |
| VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet)); | |
| std::vector<VkWriteDescriptorSet> writeDescriptorSets = { | |
| // Binding 0: Vertex shader uniform buffer | |
| vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, &uniformData.scene.descriptor), | |
| }; | |
| vkUpdateDescriptorSets(device, static_cast<uint32_t>(writeDescriptorSets.size()), writeDescriptorSets.data(), 0, nullptr); | |
| } | |
| void preparePipelines() | |
| { | |
| // This example uses two different input states, one for the instanced part and one for non-instanced rendering | |
| VkPipelineVertexInputStateCreateInfo inputState = vks::initializers::pipelineVertexInputStateCreateInfo(); | |
| std::vector<VkVertexInputBindingDescription> bindingDescriptions; | |
| std::vector<VkVertexInputAttributeDescription> attributeDescriptions; | |
| // Vertex input bindings | |
| // The instancing pipeline uses a vertex input state with two bindings | |
| bindingDescriptions = { | |
| // Binding point 0: Mesh vertex layout description at per-vertex rate | |
| vks::initializers::vertexInputBindingDescription(VERTEX_BUFFER_BIND_ID, sizeof(vkglTF::Vertex), VK_VERTEX_INPUT_RATE_VERTEX), | |
| // Binding point 1: Instanced data at per-instance rate | |
| vks::initializers::vertexInputBindingDescription(INSTANCE_BUFFER_BIND_ID, sizeof(InstanceData), VK_VERTEX_INPUT_RATE_INSTANCE) | |
| }; | |
| // Vertex attribute bindings | |
| attributeDescriptions = { | |
| // Per-vertex attributes | |
| // These are advanced for each vertex fetched by the vertex shader | |
| vks::initializers::vertexInputAttributeDescription(VERTEX_BUFFER_BIND_ID, 0, VK_FORMAT_R32G32B32_SFLOAT, offsetof(vkglTF::Vertex, pos)), // Location 0: Position | |
| vks::initializers::vertexInputAttributeDescription(VERTEX_BUFFER_BIND_ID, 1, VK_FORMAT_R32G32B32_SFLOAT, offsetof(vkglTF::Vertex, normal)), // Location 1: Normal | |
| vks::initializers::vertexInputAttributeDescription(VERTEX_BUFFER_BIND_ID, 2, VK_FORMAT_R32G32B32_SFLOAT, offsetof(vkglTF::Vertex, color)), // Location 2: Texture coordinates | |
| // Per-Instance attributes | |
| // These are fetched for each instance rendered | |
| vks::initializers::vertexInputAttributeDescription(INSTANCE_BUFFER_BIND_ID, 4, VK_FORMAT_R32G32B32_SFLOAT, offsetof(InstanceData, pos)), // Location 4: Position | |
| vks::initializers::vertexInputAttributeDescription(INSTANCE_BUFFER_BIND_ID, 5, VK_FORMAT_R32_SFLOAT, offsetof(InstanceData, scale)), // Location 5: Scale | |
| }; | |
| inputState.pVertexBindingDescriptions = bindingDescriptions.data(); | |
| inputState.pVertexAttributeDescriptions = attributeDescriptions.data(); | |
| inputState.vertexBindingDescriptionCount = static_cast<uint32_t>(bindingDescriptions.size()); | |
| inputState.vertexAttributeDescriptionCount = static_cast<uint32_t>(attributeDescriptions.size()); | |
| VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = vks::initializers::pipelineInputAssemblyStateCreateInfo(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0, VK_FALSE); | |
| VkPipelineRasterizationStateCreateInfo rasterizationState = vks::initializers::pipelineRasterizationStateCreateInfo(VK_POLYGON_MODE_FILL, VK_CULL_MODE_BACK_BIT, VK_FRONT_FACE_COUNTER_CLOCKWISE, 0); | |
| VkPipelineColorBlendAttachmentState blendAttachmentState = vks::initializers::pipelineColorBlendAttachmentState(0xf, VK_FALSE); | |
| VkPipelineColorBlendStateCreateInfo colorBlendState = vks::initializers::pipelineColorBlendStateCreateInfo(1, &blendAttachmentState); | |
| VkPipelineDepthStencilStateCreateInfo depthStencilState = vks::initializers::pipelineDepthStencilStateCreateInfo(VK_TRUE, VK_TRUE, VK_COMPARE_OP_LESS_OR_EQUAL); | |
| VkPipelineViewportStateCreateInfo viewportState = vks::initializers::pipelineViewportStateCreateInfo(1, 1, 0); | |
| VkPipelineMultisampleStateCreateInfo multisampleState = vks::initializers::pipelineMultisampleStateCreateInfo(VK_SAMPLE_COUNT_1_BIT, 0); | |
| std::vector<VkDynamicState> dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; | |
| VkPipelineDynamicStateCreateInfo dynamicState = vks::initializers::pipelineDynamicStateCreateInfo(dynamicStateEnables); | |
| std::array<VkPipelineShaderStageCreateInfo, 2> shaderStages; | |
| VkGraphicsPipelineCreateInfo pipelineCreateInfo = vks::initializers::pipelineCreateInfo(pipelineLayout, renderPass); | |
| pipelineCreateInfo.pVertexInputState = &inputState; | |
| pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState; | |
| pipelineCreateInfo.pRasterizationState = &rasterizationState; | |
| pipelineCreateInfo.pColorBlendState = &colorBlendState; | |
| pipelineCreateInfo.pMultisampleState = &multisampleState; | |
| pipelineCreateInfo.pViewportState = &viewportState; | |
| pipelineCreateInfo.pDepthStencilState = &depthStencilState; | |
| pipelineCreateInfo.pDynamicState = &dynamicState; | |
| pipelineCreateInfo.stageCount = static_cast<uint32_t>(shaderStages.size()); | |
| pipelineCreateInfo.pStages = shaderStages.data(); | |
| // Indirect (and instanced) pipeline for the plants | |
| shaderStages[0] = loadShader(getShadersPath() + "computecullandlod/indirectdraw.vert.spv", VK_SHADER_STAGE_VERTEX_BIT); | |
| shaderStages[1] = loadShader(getShadersPath() + "computecullandlod/indirectdraw.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT); | |
| VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &pipelines.plants)); | |
| } | |
| void prepareBuffers() | |
| { | |
| objectCount = OBJECT_COUNT * OBJECT_COUNT * OBJECT_COUNT; | |
| vks::Buffer stagingBuffer; | |
| std::vector<InstanceData> instanceData(objectCount); | |
| indirectCommands.resize(objectCount); | |
| // Indirect draw commands | |
| for (uint32_t x = 0; x < OBJECT_COUNT; x++) | |
| { | |
| for (uint32_t y = 0; y < OBJECT_COUNT; y++) | |
| { | |
| for (uint32_t z = 0; z < OBJECT_COUNT; z++) | |
| { | |
| uint32_t index = x + y * OBJECT_COUNT + z * OBJECT_COUNT * OBJECT_COUNT; | |
| indirectCommands[index].instanceCount = 1; | |
| indirectCommands[index].firstInstance = index; | |
| // firstIndex and indexCount are written by the compute shader | |
| } | |
| } | |
| } | |
| indirectStats.drawCount = static_cast<uint32_t>(indirectCommands.size()); | |
| VK_CHECK_RESULT(vulkanDevice->createBuffer( | |
| VK_BUFFER_USAGE_TRANSFER_SRC_BIT, | |
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, | |
| &stagingBuffer, | |
| indirectCommands.size() * sizeof(VkDrawIndexedIndirectCommand), | |
| indirectCommands.data())); | |
| VK_CHECK_RESULT(vulkanDevice->createBuffer( | |
| VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | |
| VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, | |
| &indirectCommandsBuffer, | |
| stagingBuffer.size)); | |
| vulkanDevice->copyBuffer(&stagingBuffer, &indirectCommandsBuffer, queue); | |
| stagingBuffer.destroy(); | |
| VK_CHECK_RESULT(vulkanDevice->createBuffer( | |
| VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | |
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, | |
| &indirectDrawCountBuffer, | |
| sizeof(indirectStats))); | |
| // Map for host access | |
| VK_CHECK_RESULT(indirectDrawCountBuffer.map()); | |
| // Instance data | |
| for (uint32_t x = 0; x < OBJECT_COUNT; x++) | |
| { | |
| for (uint32_t y = 0; y < OBJECT_COUNT; y++) | |
| { | |
| for (uint32_t z = 0; z < OBJECT_COUNT; z++) | |
| { | |
| uint32_t index = x + y * OBJECT_COUNT + z * OBJECT_COUNT * OBJECT_COUNT; | |
| instanceData[index].pos = glm::vec3((float)x, (float)y, (float)z) - glm::vec3((float)OBJECT_COUNT / 2.0f); | |
| instanceData[index].scale = 2.0f; | |
| } | |
| } | |
| } | |
| VK_CHECK_RESULT(vulkanDevice->createBuffer( | |
| VK_BUFFER_USAGE_TRANSFER_SRC_BIT, | |
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, | |
| &stagingBuffer, | |
| instanceData.size() * sizeof(InstanceData), | |
| instanceData.data())); | |
| VK_CHECK_RESULT(vulkanDevice->createBuffer( | |
| VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | |
| VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, | |
| &instanceBuffer, | |
| stagingBuffer.size)); | |
| // Copy from staging buffer to instance buffer | |
| VkCommandBuffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); | |
| VkBufferCopy copyRegion = {}; | |
| copyRegion.size = stagingBuffer.size; | |
| vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, instanceBuffer.buffer, 1, ©Region); | |
| // Add an initial release barrier to the graphics queue, | |
| // so that when the compute command buffer executes for the first time | |
| // it doesn't complain about a lack of a corresponding "release" to its "acquire" | |
| if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) | |
| { VkBufferMemoryBarrier buffer_barrier = | |
| { | |
| VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | |
| nullptr, | |
| VK_ACCESS_INDIRECT_COMMAND_READ_BIT, | |
| 0, | |
| vulkanDevice->queueFamilyIndices.graphics, | |
| vulkanDevice->queueFamilyIndices.compute, | |
| indirectCommandsBuffer.buffer, | |
| 0, | |
| indirectCommandsBuffer.descriptor.range | |
| }; | |
| vkCmdPipelineBarrier( | |
| copyCmd, | |
| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, | |
| VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, | |
| 0, | |
| 0, nullptr, | |
| 1, &buffer_barrier, | |
| 0, nullptr); | |
| } | |
| vulkanDevice->flushCommandBuffer(copyCmd, queue, true); | |
| stagingBuffer.destroy(); | |
| // Shader storage buffer containing index offsets and counts for the LODs | |
| struct LOD | |
| { | |
| uint32_t firstIndex; | |
| uint32_t indexCount; | |
| float distance; | |
| float _pad0; | |
| }; | |
| std::vector<LOD> LODLevels; | |
| uint32_t n = 0; | |
| for (auto node : lodModel.nodes) | |
| { | |
| LOD lod; | |
| lod.firstIndex = node->mesh->primitives[0]->firstIndex; // First index for this LOD | |
| lod.indexCount = node->mesh->primitives[0]->indexCount; // Index count for this LOD | |
| lod.distance = 5.0f + n * 5.0f; // Starting distance (to viewer) for this LOD | |
| n++; | |
| LODLevels.push_back(lod); | |
| } | |
| VK_CHECK_RESULT(vulkanDevice->createBuffer( | |
| VK_BUFFER_USAGE_TRANSFER_SRC_BIT, | |
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, | |
| &stagingBuffer, | |
| LODLevels.size() * sizeof(LOD), | |
| LODLevels.data())); | |
| VK_CHECK_RESULT(vulkanDevice->createBuffer( | |
| VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | |
| VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, | |
| &compute.lodLevelsBuffers, | |
| stagingBuffer.size)); | |
| vulkanDevice->copyBuffer(&stagingBuffer, &compute.lodLevelsBuffers, queue); | |
| stagingBuffer.destroy(); | |
| // Scene uniform buffer | |
| VK_CHECK_RESULT(vulkanDevice->createBuffer( | |
| VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, | |
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, | |
| &uniformData.scene, | |
| sizeof(uboScene))); | |
| VK_CHECK_RESULT(uniformData.scene.map()); | |
| updateUniformBuffer(true); | |
| } | |
| void prepareCompute() | |
| { | |
| // Get a compute capable device queue | |
| vkGetDeviceQueue(device, vulkanDevice->queueFamilyIndices.compute, 0, &compute.queue); | |
| // Create compute pipeline | |
| // Compute pipelines are created separate from graphics pipelines even if they use the same queue (family index) | |
| std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = { | |
| // Binding 0: Instance input data buffer | |
| vks::initializers::descriptorSetLayoutBinding( | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| VK_SHADER_STAGE_COMPUTE_BIT, | |
| 0), | |
| // Binding 1: Indirect draw command output buffer (input) | |
| vks::initializers::descriptorSetLayoutBinding( | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| VK_SHADER_STAGE_COMPUTE_BIT, | |
| 1), | |
| // Binding 2: Uniform buffer with global matrices (input) | |
| vks::initializers::descriptorSetLayoutBinding( | |
| VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, | |
| VK_SHADER_STAGE_COMPUTE_BIT, | |
| 2), | |
| // Binding 3: Indirect draw stats (output) | |
| vks::initializers::descriptorSetLayoutBinding( | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| VK_SHADER_STAGE_COMPUTE_BIT, | |
| 3), | |
| // Binding 4: LOD info (input) | |
| vks::initializers::descriptorSetLayoutBinding( | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| VK_SHADER_STAGE_COMPUTE_BIT, | |
| 4), | |
| }; | |
| VkDescriptorSetLayoutCreateInfo descriptorLayout = | |
| vks::initializers::descriptorSetLayoutCreateInfo( | |
| setLayoutBindings.data(), | |
| static_cast<uint32_t>(setLayoutBindings.size())); | |
| VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &compute.descriptorSetLayout)); | |
| VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = | |
| vks::initializers::pipelineLayoutCreateInfo( | |
| &compute.descriptorSetLayout, | |
| 1); | |
| VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pPipelineLayoutCreateInfo, nullptr, &compute.pipelineLayout)); | |
| VkDescriptorSetAllocateInfo allocInfo = | |
| vks::initializers::descriptorSetAllocateInfo( | |
| descriptorPool, | |
| &compute.descriptorSetLayout, | |
| 1); | |
| VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &compute.descriptorSet)); | |
| std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets = | |
| { | |
| // Binding 0: Instance input data buffer | |
| vks::initializers::writeDescriptorSet( | |
| compute.descriptorSet, | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| 0, | |
| &instanceBuffer.descriptor), | |
| // Binding 1: Indirect draw command output buffer | |
| vks::initializers::writeDescriptorSet( | |
| compute.descriptorSet, | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| 1, | |
| &indirectCommandsBuffer.descriptor), | |
| // Binding 2: Uniform buffer with global matrices | |
| vks::initializers::writeDescriptorSet( | |
| compute.descriptorSet, | |
| VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, | |
| 2, | |
| &uniformData.scene.descriptor), | |
| // Binding 3: Atomic counter (written in shader) | |
| vks::initializers::writeDescriptorSet( | |
| compute.descriptorSet, | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| 3, | |
| &indirectDrawCountBuffer.descriptor), | |
| // Binding 4: LOD info | |
| vks::initializers::writeDescriptorSet( | |
| compute.descriptorSet, | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| 4, | |
| &compute.lodLevelsBuffers.descriptor) | |
| }; | |
| vkUpdateDescriptorSets(device, static_cast<uint32_t>(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL); | |
| // Create pipeline | |
| VkComputePipelineCreateInfo computePipelineCreateInfo = vks::initializers::computePipelineCreateInfo(compute.pipelineLayout, 0); | |
| computePipelineCreateInfo.stage = loadShader(getShadersPath() + "computecullandlod/cull.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT); | |
| // Use specialization constants to pass max. level of detail (determined by no. of meshes) | |
| VkSpecializationMapEntry specializationEntry{}; | |
| specializationEntry.constantID = 0; | |
| specializationEntry.offset = 0; | |
| specializationEntry.size = sizeof(uint32_t); | |
| uint32_t specializationData = static_cast<uint32_t>(lodModel.nodes.size()) - 1; | |
| VkSpecializationInfo specializationInfo; | |
| specializationInfo.mapEntryCount = 1; | |
| specializationInfo.pMapEntries = &specializationEntry; | |
| specializationInfo.dataSize = sizeof(specializationData); | |
| specializationInfo.pData = &specializationData; | |
| computePipelineCreateInfo.stage.pSpecializationInfo = &specializationInfo; | |
| VK_CHECK_RESULT(vkCreateComputePipelines(device, pipelineCache, 1, &computePipelineCreateInfo, nullptr, &compute.pipeline)); | |
| // Separate command pool as queue family for compute may be different than graphics | |
| VkCommandPoolCreateInfo cmdPoolInfo = {}; | |
| cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; | |
| cmdPoolInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; | |
| cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; | |
| VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool)); | |
| // Create a command buffer for compute operations | |
| VkCommandBufferAllocateInfo cmdBufAllocateInfo = | |
| vks::initializers::commandBufferAllocateInfo( | |
| compute.commandPool, | |
| VK_COMMAND_BUFFER_LEVEL_PRIMARY, | |
| 1); | |
| VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer)); | |
| // Fence for compute CB sync | |
| VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT); | |
| VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence)); | |
| VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo(); | |
| VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphore)); | |
| // Build a single command buffer containing the compute dispatch commands | |
| buildComputeCommandBuffer(); | |
| } | |
| void updateUniformBuffer(bool viewChanged) | |
| { | |
| if (viewChanged) | |
| { | |
| uboScene.projection = camera.matrices.perspective; | |
| uboScene.modelview = camera.matrices.view; | |
| if (!fixedFrustum) | |
| { | |
| uboScene.cameraPos = glm::vec4(camera.position, 1.0f) * -1.0f; | |
| frustum.update(uboScene.projection * uboScene.modelview); | |
| memcpy(uboScene.frustumPlanes, frustum.planes.data(), sizeof(glm::vec4) * 6); | |
| } | |
| } | |
| memcpy(uniformData.scene.mapped, &uboScene, sizeof(uboScene)); | |
| } | |
| void draw() | |
| { | |
| VulkanExampleBase::prepareFrame(); | |
| // Submit compute shader for frustum culling | |
| // Wait for fence to ensure that compute buffer writes have finished | |
| vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX); | |
| vkResetFences(device, 1, &compute.fence); | |
| VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); | |
| computeSubmitInfo.commandBufferCount = 1; | |
| computeSubmitInfo.pCommandBuffers = &compute.commandBuffer; | |
| computeSubmitInfo.signalSemaphoreCount = 1; | |
| computeSubmitInfo.pSignalSemaphores = &compute.semaphore; | |
| VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, VK_NULL_HANDLE)); | |
| // Submit graphics command buffer | |
| submitInfo.commandBufferCount = 1; | |
| submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer]; | |
| // Wait on present and compute semaphores | |
| std::array<VkPipelineStageFlags,2> stageFlags = { | |
| VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | |
| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | |
| }; | |
| std::array<VkSemaphore,2> waitSemaphores = { | |
| semaphores.presentComplete, // Wait for presentation to finished | |
| compute.semaphore // Wait for compute to finish | |
| }; | |
| submitInfo.pWaitSemaphores = waitSemaphores.data(); | |
| submitInfo.waitSemaphoreCount = static_cast<uint32_t>(waitSemaphores.size()); | |
| submitInfo.pWaitDstStageMask = stageFlags.data(); | |
| // Submit to queue | |
| VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, compute.fence)); | |
| VulkanExampleBase::submitFrame(); | |
| // Get draw count from compute | |
| memcpy(&indirectStats, indirectDrawCountBuffer.mapped, sizeof(indirectStats)); | |
| } | |
| void prepare() | |
| { | |
| VulkanExampleBase::prepare(); | |
| loadAssets(); | |
| prepareBuffers(); | |
| setupDescriptorSetLayout(); | |
| preparePipelines(); | |
| setupDescriptorPool(); | |
| setupDescriptorSet(); | |
| prepareCompute(); | |
| buildCommandBuffers(); | |
| prepared = true; | |
| } | |
| virtual void render() | |
| { | |
| if (!prepared) | |
| { | |
| return; | |
| } | |
| draw(); | |
| if (camera.updated) | |
| { | |
| updateUniformBuffer(true); | |
| } | |
| } | |
| virtual void viewChanged() | |
| { | |
| updateUniformBuffer(true); | |
| } | |
| virtual void OnUpdateUIOverlay(vks::UIOverlay *overlay) | |
| { | |
| if (overlay->header("Settings")) { | |
| if (overlay->checkBox("Freeze frustum", &fixedFrustum)) { | |
| updateUniformBuffer(true); | |
| } | |
| } | |
| if (overlay->header("Statistics")) { | |
| overlay->text("Visible objects: %d", indirectStats.drawCount); | |
| for (uint32_t i = 0; i < MAX_LOD_LEVEL + 1; i++) { | |
| overlay->text("LOD %d: %d", i, indirectStats.lodCount[i]); | |
| } | |
| } | |
| } | |
| }; | |
| VULKAN_EXAMPLE_MAIN() |