diff --git a/.gitignore b/.gitignore index fedf170..1cf8391 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,6 @@ compile_commands.json *.manifest # generated folders +_Bin/ _Build/ -_Compiler/ _NRD_SDK/ diff --git a/1-Deploy.bat b/1-Deploy.bat index 54e3af2..4cd318a 100644 --- a/1-Deploy.bat +++ b/1-Deploy.bat @@ -2,8 +2,8 @@ git submodule update --init --recursive -mkdir "_Compiler" +mkdir "_Build" -cd "_Compiler" +cd "_Build" cmake .. -A x64 cd .. diff --git a/1-Deploy.sh b/1-Deploy.sh index 9445ad5..3bfd2b9 100644 --- a/1-Deploy.sh +++ b/1-Deploy.sh @@ -6,8 +6,8 @@ chmod +x "4-Clean.sh" git submodule update --init --recursive -mkdir -p "_Compiler" +mkdir -p "_Build" -cd "_Compiler" +cd "_Build" cmake .. cd .. diff --git a/2-Build.bat b/2-Build.bat index b160ec3..ea85fbd 100644 --- a/2-Build.bat +++ b/2-Build.bat @@ -1,6 +1,6 @@ @echo off -cd "_Compiler" +cd "_Build" cmake --build . --config Release -j 4 cmake --build . --config Debug -j 4 cd .. diff --git a/2-Build.sh b/2-Build.sh index ae972c4..4368a7d 100644 --- a/2-Build.sh +++ b/2-Build.sh @@ -1,8 +1,8 @@ #!/bin/sh -mkdir -p "_Compiler" +mkdir -p "_Build" -cd "_Compiler" +cd "_Build" cmake .. cmake --build . --config Release -j 4 cmake --build . --config Debug -j 4 diff --git a/3-Prepare NRD SDK.bat b/3-Prepare NRD SDK.bat index ecfdaac..66e9d34 100644 --- a/3-Prepare NRD SDK.bat +++ b/3-Prepare NRD SDK.bat @@ -34,12 +34,12 @@ mkdir "_NRD_SDK\Shaders\Include" cd "_NRD_SDK" copy "..\%NRD_DIR%\Include\*" "Include" -copy "..\_Build\Debug\NRD.dll" "Lib\Debug" -copy "..\_Build\Debug\NRD.lib" "Lib\Debug" -copy "..\_Build\Debug\NRD.pdb" "Lib\Debug" -copy "..\_Build\Release\NRD.dll" "Lib\Release" -copy "..\_Build\Release\NRD.lib" "Lib\Release" -copy "..\_Build\Release\NRD.pdb" "Lib\Release" +copy "..\_Bin\Debug\NRD.dll" "Lib\Debug" +copy "..\_Bin\Debug\NRD.lib" "Lib\Debug" +copy "..\_Bin\Debug\NRD.pdb" "Lib\Debug" +copy "..\_Bin\Release\NRD.dll" "Lib\Release" +copy "..\_Bin\Release\NRD.lib" "Lib\Release" +copy "..\_Bin\Release\NRD.pdb" "Lib\Release" copy "..\%NRD_DIR%\Shaders\Include\NRD.hlsli" "Shaders\Include" copy "..\%NRD_DIR%\Shaders\Include\NRDEncoding.hlsli" "Shaders\Include" copy "..\%NRD_DIR%\LICENSE.txt" "." diff --git a/3-Prepare NRD SDK.sh b/3-Prepare NRD SDK.sh index 43388c9..958619e 100644 --- a/3-Prepare NRD SDK.sh +++ b/3-Prepare NRD SDK.sh @@ -13,8 +13,8 @@ mkdir -p "_NRD_SDK/Shaders/Include" cd "_NRD_SDK" cp -r ../$NRD_DIR/Include/ "Include" -cp -H ../_Build/Debug/libNRD.so "Lib/Debug" -cp -H ../_Build/Release/libNRD.so "Lib/Release" +cp -H ../_Bin/Debug/libNRD.so "Lib/Debug" +cp -H ../_Bin/Release/libNRD.so "Lib/Release" cp ../$NRD_DIR/Shaders/Include/NRD.hlsli "Shaders/Include" cp ../$NRD_DIR/Shaders/Include/NRDEncoding.hlsli "Shaders/Include" cp ../$NRD_DIR/LICENSE.txt "." diff --git a/4-Clean.bat b/4-Clean.bat index d01f753..46f07bb 100644 --- a/4-Clean.bat +++ b/4-Clean.bat @@ -2,7 +2,7 @@ if exist "build" rd /q /s "build" +if exist "_Bin" rd /q /s "_Bin" if exist "_Build" rd /q /s "_Build" -if exist "_Compiler" rd /q /s "_Compiler" if exist "_Shaders" rd /q /s "_Shaders" if exist "_NRD_SDK" rd /q /s "_NRD_SDK" diff --git a/4-Clean.sh b/4-Clean.sh index ce455c0..d2e5838 100644 --- a/4-Clean.sh +++ b/4-Clean.sh @@ -2,7 +2,7 @@ rm -rf "build" +rm -rf "_Bin" rm -rf "_Build" -rm -rf "_Compiler" rm -rf "_Shaders" rm -rf "_NRD_SDK" diff --git a/CMakeLists.txt b/CMakeLists.txt index 936eafc..0029541 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,17 +23,17 @@ endif () # Cached if (NOT IS_SUBMODULE) set (CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "") + set (CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/_Bin" CACHE STRING "") endif () -# Generate PDB for Release builds -set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") -set (CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") - set (NRD_DXC_CUSTOM_PATH "custom/path/to/dxc" CACHE STRING "Custom DXC to use if Vulkan SDK is not installed") set (NRD_SHADERS_PATH "" CACHE STRING "Shader output path override") set (NRD_NORMAL_ENCODING "2" CACHE STRING "Normal encoding variant (0-4, matches nrd::NormalEncoding)") set (NRD_ROUGHNESS_ENCODING "1" CACHE STRING "Roughness encoding variant (0-2, matches nrd::RoughnessEncoding)") -set (GLOBAL_BIN_OUTPUT_PATH "${CMAKE_SOURCE_DIR}/_Build" CACHE STRING "") + +# Generate PDB for Release builds +set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") +set (CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") # Create project file (READ "Include/NRD.h" ver_h) @@ -149,8 +149,8 @@ target_compile_options (${PROJECT_NAME} PRIVATE ${COMPILE_OPTIONS}) set_property (TARGET ${PROJECT_NAME} PROPERTY FOLDER "${PROJECT_NAME}") -set_target_properties (${PROJECT_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${GLOBAL_BIN_OUTPUT_PATH}/$") -message ("NRD output path: '${GLOBAL_BIN_OUTPUT_PATH}'") +set_target_properties (${PROJECT_NAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") +message ("NRD output path: '${CMAKE_RUNTIME_OUTPUT_DIRECTORY}'") # Shaders if (NOT NRD_DISABLE_SHADER_COMPILATION) @@ -161,7 +161,7 @@ if (NOT NRD_DISABLE_SHADER_COMPILATION) # External/ShaderMake if (NOT TARGET ShaderMake) - set (SHADERMAKE_BIN_OUTPUT_PATH ${GLOBAL_BIN_OUTPUT_PATH} CACHE STRING "") + set (SHADERMAKE_BIN_OUTPUT_PATH ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} CACHE STRING "") add_subdirectory (External/ShaderMake) endif () diff --git a/External/ShaderMake b/External/ShaderMake index 8a82d0b..1cb630c 160000 --- a/External/ShaderMake +++ b/External/ShaderMake @@ -1 +1 @@ -Subproject commit 8a82d0bc646e01ec7ae718b0f240904f5ac79662 +Subproject commit 1cb630c7caad44c2133dcf39f5061ab8ee82d6f8 diff --git a/Include/NRD.h b/Include/NRD.h index 6b31ed4..f7bcf9b 100644 --- a/Include/NRD.h +++ b/Include/NRD.h @@ -29,8 +29,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define NRD_VERSION_MAJOR 4 #define NRD_VERSION_MINOR 3 -#define NRD_VERSION_BUILD 4 -#define NRD_VERSION_DATE "20 October 2023" +#define NRD_VERSION_BUILD 5 +#define NRD_VERSION_DATE "25 October 2023" #if defined(_MSC_VER) #define NRD_CALL __fastcall diff --git a/Integration/NRDIntegration.h b/Integration/NRDIntegration.h index 610aa9c..2f09117 100644 --- a/Integration/NRDIntegration.h +++ b/Integration/NRDIntegration.h @@ -23,15 +23,15 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #include #define NRD_INTEGRATION_MAJOR 1 -#define NRD_INTEGRATION_MINOR 6 -#define NRD_INTEGRATION_DATE "30 April 2023" +#define NRD_INTEGRATION_MINOR 7 +#define NRD_INTEGRATION_DATE "25 October 2023" #define NRD_INTEGRATION 1 #define NRD_INTEGRATION_DEBUG_LOGGING 0 #ifndef NRD_INTEGRATION_ASSERT #include - #define NRD_INTEGRATION_ASSERT(expr, msg) (assert(expr && msg)) + #define NRD_INTEGRATION_ASSERT(expr, msg) assert(expr && msg) #endif // User inputs / outputs are not mipmapped, thus only 1 entry is needed. @@ -58,12 +58,16 @@ inline void NrdIntegration_SetResource(NrdUserPool& pool, nrd::ResourceType slot class NrdIntegration { public: - // The application must provide number of buffered frames, it's needed to guarantee that - // constant data and descriptor sets are not overwritten while being executed on the GPU. - // Usually it's 2-3 frames. - NrdIntegration(uint32_t bufferedFramesNum, const char* persistentName = "") : + // bufferedFramesNum (usually 2-3 frames): + // The application must provide number of buffered frames, it's needed to guarantee that + // constant data and descriptor sets are not overwritten while being executed on the GPU. + // enableDescriptorCaching: + // true - enables descriptor caching for the whole lifetime of an NrdIntegration instance + // false - descriptors are cached only within a single "Denoise" call + NrdIntegration(uint32_t bufferedFramesNum, bool enableDescriptorCaching, const char* persistentName = "") : m_Name(persistentName) , m_BufferedFramesNum(bufferedFramesNum) + , m_IsDescriptorCachingEnabled(enableDescriptorCaching) {} ~NrdIntegration() @@ -72,22 +76,16 @@ class NrdIntegration // There is no "Resize" functionality, because NRD full recreation costs nothing. // The main cost comes from render targets resizing, which needs to be done in any case // (call Destroy beforehand) - bool Initialize(const nrd::InstanceCreationDesc& instanceCreationDesc, nri::Device& nriDevice, - const nri::CoreInterface& nriCore, const nri::HelperInterface& nriHelper); + bool Initialize(const nrd::InstanceCreationDesc& instanceCreationDesc, nri::Device& nriDevice, const nri::CoreInterface& nriCore, const nri::HelperInterface& nriHelper); // Must be called once on a frame start - void NewFrame(uint32_t frameIndex); + void NewFrame(); - // Explcitly calls eponymous NRD API functions + // Explicitly calls eponymous NRD API functions bool SetCommonSettings(const nrd::CommonSettings& commonSettings); bool SetDenoiserSettings(nrd::Identifier denoiser, const void* denoiserSettings); - // Better use "enableDescriptorCaching = true" if resources are not changing between frames - // (i.e. not suballocated from a heap) - void Denoise(const nrd::Identifier* denoisers, uint32_t denoisersNum, - nri::CommandBuffer& commandBuffer, const NrdUserPool& userPool, - bool enableDescriptorCaching - ); + void Denoise(const nrd::Identifier* denoisers, uint32_t denoisersNum, nri::CommandBuffer& commandBuffer, const NrdUserPool& userPool); // This function assumes that the device is in the IDLE state, i.e. there is no work in flight void Destroy(); @@ -110,22 +108,19 @@ class NrdIntegration void CreateResources(); void AllocateAndBindMemory(); - void Dispatch - ( - nri::CommandBuffer& commandBuffer, nri::DescriptorPool& descriptorPool, - const nrd::DispatchDesc& dispatchDesc, const NrdUserPool& userPool, - bool enableDescriptorCaching - ); + void Dispatch(nri::CommandBuffer& commandBuffer, nri::DescriptorPool& descriptorPool, const nrd::DispatchDesc& dispatchDesc, const NrdUserPool& userPool); private: std::vector m_TexturePool; - std::map m_Descriptors; + std::map m_CachedDescriptors; + std::vector> m_DescriptorsInFlight; std::vector m_ResourceState; std::vector m_PipelineLayouts; std::vector m_Pipelines; std::vector m_MemoryAllocations; std::vector m_Samplers; - std::array m_DescriptorPools = {}; + std::vector m_DescriptorPools = {}; + std::vector m_DescriptorSetSamplers = {}; const nri::CoreInterface* m_NRI = nullptr; const nri::HelperInterface* m_NRIHelper = nullptr; nri::Device* m_Device = nullptr; @@ -140,7 +135,9 @@ class NrdIntegration uint32_t m_ConstantBufferOffset = 0; uint32_t m_BufferedFramesNum = 0; uint32_t m_DescriptorPoolIndex = 0; + uint32_t m_FrameIndex = 0; bool m_IsShadersReloadRequested = false; + bool m_IsDescriptorCachingEnabled = false; }; #define NRD_INTEGRATION_ABORT_ON_FAILURE(result) if ((result) != nri::Result::SUCCESS) NRD_INTEGRATION_ASSERT(false, "Abort on failure!") diff --git a/Integration/NRDIntegration.hpp b/Integration/NRDIntegration.hpp index cb3599d..2789ced 100644 --- a/Integration/NRDIntegration.hpp +++ b/Integration/NRDIntegration.hpp @@ -10,13 +10,11 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #include "NRDIntegration.h" -static_assert(NRD_VERSION_MAJOR >= 4 && NRD_VERSION_MINOR >= 2, "Unsupported NRD version!"); +static_assert(NRD_VERSION_MAJOR >= 4 && NRD_VERSION_MINOR >= 3, "Unsupported NRD version!"); static_assert(NRI_VERSION_MAJOR >= 1 && NRI_VERSION_MINOR >= 93, "Unsupported NRI version!"); -#if _WIN32 - #define NRD_INTEGRATION_ALLOCA _alloca -#else - #define NRD_INTEGRATION_ALLOCA alloca +#ifdef _WIN32 + #define alloca _alloca #endif constexpr std::array g_NRD_NrdToNriFormat = @@ -90,6 +88,8 @@ template constexpr T NRD_GetAlignedSize(const T& size, A bool NrdIntegration::Initialize(const nrd::InstanceCreationDesc& instanceCreationDesc, nri::Device& nriDevice, const nri::CoreInterface& nriCore, const nri::HelperInterface& nriHelper) { + NRD_INTEGRATION_ASSERT(!m_Instance, "Already initialized! Did you forget to call 'Destroy'?"); + const nri::DeviceDesc& deviceDesc = nriCore.GetDeviceDesc(nriDevice); if (deviceDesc.nriVersionMajor != NRI_VERSION_MAJOR || deviceDesc.nriVersionMinor != NRI_VERSION_MINOR) { @@ -149,7 +149,7 @@ void NrdIntegration::CreatePipelines() uint32_t descriptorSetResourcesIndex = instanceDesc.resourcesSpaceIndex == instanceDesc.constantBufferSpaceIndex ? 0 : (instanceDesc.resourcesSpaceIndex == instanceDesc.samplersSpaceIndex ? descriptorSetSamplersIndex : descriptorSetSamplersIndex + 1); uint32_t descriptorSetNum = std::max(descriptorSetSamplersIndex, descriptorSetResourcesIndex) + 1; - nri::DescriptorSetDesc* descriptorSetDescs = (nri::DescriptorSetDesc*)NRD_INTEGRATION_ALLOCA(sizeof(nri::DescriptorSetDesc) * descriptorSetNum); + nri::DescriptorSetDesc* descriptorSetDescs = (nri::DescriptorSetDesc*)alloca(sizeof(nri::DescriptorSetDesc) * descriptorSetNum); memset(descriptorSetDescs, 0, sizeof(nri::DescriptorSetDesc) * descriptorSetNum); nri::DescriptorSetDesc& descriptorSetConstantBuffer = descriptorSetDescs[0]; @@ -170,7 +170,7 @@ void NrdIntegration::CreatePipelines() } resourceRangesNum += 1; // samplers - nri::DescriptorRangeDesc* descriptorRanges = (nri::DescriptorRangeDesc*)NRD_INTEGRATION_ALLOCA(sizeof(nri::DescriptorRangeDesc) * resourceRangesNum); + nri::DescriptorRangeDesc* descriptorRanges = (nri::DescriptorRangeDesc*)alloca(sizeof(nri::DescriptorRangeDesc) * resourceRangesNum); memset(descriptorRanges, 0, sizeof(nri::DescriptorRangeDesc) * resourceRangesNum); nri::DescriptorRangeDesc* samplersRange = descriptorRanges; @@ -327,9 +327,9 @@ void NrdIntegration::CreateResources() #endif } - #if( NRD_INTEGRATION_DEBUG_LOGGING == 1 ) - printf("%s: %.1f Mb (permanent), %.1f Mb (transient)\n\n", m_Name, double(m_PermanentPoolSize) / (1024.0f * 1024.0f), double(m_TransientPoolSize) / (1024.0f * 1024.0f)); - #endif +#if( NRD_INTEGRATION_DEBUG_LOGGING == 1 ) + printf("%s: %.1f Mb (permanent), %.1f Mb (transient)\n\n", m_Name, double(m_PermanentPoolSize) / (1024.0f * 1024.0f), double(m_TransientPoolSize) / (1024.0f * 1024.0f)); +#endif // Samplers for (uint32_t i = 0; i < instanceDesc.samplersNum; i++) @@ -386,8 +386,15 @@ void NrdIntegration::CreateResources() descriptorPoolDesc.dynamicConstantBufferMaxNum = instanceDesc.descriptorPoolDesc.constantBuffersMaxNum; descriptorPoolDesc.samplerMaxNum = instanceDesc.descriptorPoolDesc.samplersMaxNum; - for (nri::DescriptorPool*& descriptorPool : m_DescriptorPools) + for (uint32_t i = 0; i < m_BufferedFramesNum; i++) + { + nri::DescriptorPool* descriptorPool = nullptr; NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->CreateDescriptorPool(*m_Device, descriptorPoolDesc, descriptorPool)); + m_DescriptorPools.push_back(descriptorPool); + + m_DescriptorSetSamplers.push_back(nullptr); + m_DescriptorsInFlight.push_back({}); + } } void NrdIntegration::AllocateAndBindMemory() @@ -416,39 +423,64 @@ void NrdIntegration::AllocateAndBindMemory() NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRIHelper->AllocateAndBindMemory(*m_Device, resourceGroupDesc, m_MemoryAllocations.data() + baseAllocation)); } -void NrdIntegration::NewFrame(uint32_t frameIndex) +void NrdIntegration::NewFrame() { - #if( NRD_INTEGRATION_DEBUG_LOGGING == 1 ) + NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?"); + +#if( NRD_INTEGRATION_DEBUG_LOGGING == 1 ) printf("%s (frame %u) ==============================================================================\n\n", m_Name, frameIndex); #endif - m_DescriptorPoolIndex = frameIndex % m_BufferedFramesNum; + m_DescriptorPoolIndex = m_FrameIndex % m_BufferedFramesNum; nri::DescriptorPool* descriptorPool = m_DescriptorPools[m_DescriptorPoolIndex]; m_NRI->ResetDescriptorPool(*descriptorPool); + + // Needs to be reset because the corresponding descriptor pool has been just reset + m_DescriptorSetSamplers[m_DescriptorPoolIndex] = nullptr; + + // Referenced by the GPU descriptors can't be destroyed... + if (!m_IsDescriptorCachingEnabled) + { + for (const auto& entry : m_DescriptorsInFlight[m_DescriptorPoolIndex]) + m_NRI->DestroyDescriptor(*entry); + m_DescriptorsInFlight[m_DescriptorPoolIndex].clear(); + } + + m_FrameIndex++; } bool NrdIntegration::SetCommonSettings(const nrd::CommonSettings& commonSettings) { + NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?"); + nrd::Result result = nrd::SetCommonSettings(*m_Instance, commonSettings); - assert(result == nrd::Result::SUCCESS); + NRD_INTEGRATION_ASSERT(result == nrd::Result::SUCCESS, "nrd::SetCommonSettings(): failed!"); return result == nrd::Result::SUCCESS; } bool NrdIntegration::SetDenoiserSettings(nrd::Identifier denoiser, const void* denoiserSettings) { + NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?"); + nrd::Result result = nrd::SetDenoiserSettings(*m_Instance, denoiser, denoiserSettings); - assert(result == nrd::Result::SUCCESS); + NRD_INTEGRATION_ASSERT(result == nrd::Result::SUCCESS, "nrd::SetDenoiserSettings(): failed!"); return result == nrd::Result::SUCCESS; } -void NrdIntegration::Denoise(const nrd::Identifier* denoisers, uint32_t denoisersNum, nri::CommandBuffer& commandBuffer, const NrdUserPool& userPool, bool enableDescriptorCaching) +void NrdIntegration::Denoise(const nrd::Identifier* denoisers, uint32_t denoisersNum, nri::CommandBuffer& commandBuffer, const NrdUserPool& userPool) { + NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?"); + const nrd::DispatchDesc* dispatchDescs = nullptr; uint32_t dispatchDescsNum = 0; nrd::GetComputeDispatches(*m_Instance, denoisers, denoisersNum, dispatchDescs, dispatchDescsNum); + // Even if descriptor caching is disabled it's better to cache descriptors inside a single "Denoise" call + if (!m_IsDescriptorCachingEnabled) + m_CachedDescriptors.clear(); + nri::DescriptorPool* descriptorPool = m_DescriptorPools[m_DescriptorPoolIndex]; m_NRI->CmdSetDescriptorPool(commandBuffer, *descriptorPool); @@ -457,13 +489,13 @@ void NrdIntegration::Denoise(const nrd::Identifier* denoisers, uint32_t denoiser const nrd::DispatchDesc& dispatchDesc = dispatchDescs[i]; m_NRI->CmdBeginAnnotation(commandBuffer, dispatchDesc.name); - Dispatch(commandBuffer, *descriptorPool, dispatchDesc, userPool, enableDescriptorCaching); + Dispatch(commandBuffer, *descriptorPool, dispatchDesc, userPool); m_NRI->CmdEndAnnotation(commandBuffer); } } -void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::DescriptorPool& descriptorPool, const nrd::DispatchDesc& dispatchDesc, const NrdUserPool& userPool, bool enableDescriptorCaching) +void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::DescriptorPool& descriptorPool, const nrd::DispatchDesc& dispatchDesc, const NrdUserPool& userPool) { const nrd::InstanceDesc& instanceDesc = nrd::GetInstanceDesc(*m_Instance); const nrd::PipelineDesc& pipelineDesc = instanceDesc.pipelines[dispatchDesc.pipelineIndex]; @@ -472,13 +504,13 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor for (uint32_t i = 0; i < dispatchDesc.resourcesNum; i++) transitionNum += dispatchDesc.resources[i].mipNum; - nri::Descriptor** descriptors = (nri::Descriptor**)NRD_INTEGRATION_ALLOCA(sizeof(nri::Descriptor*) * dispatchDesc.resourcesNum); + nri::Descriptor** descriptors = (nri::Descriptor**)alloca(sizeof(nri::Descriptor*) * dispatchDesc.resourcesNum); memset(descriptors, 0, sizeof(nri::Descriptor*) * dispatchDesc.resourcesNum); - nri::DescriptorRangeUpdateDesc* resourceRanges = (nri::DescriptorRangeUpdateDesc*)NRD_INTEGRATION_ALLOCA(sizeof(nri::DescriptorRangeUpdateDesc) * pipelineDesc.resourceRangesNum); + nri::DescriptorRangeUpdateDesc* resourceRanges = (nri::DescriptorRangeUpdateDesc*)alloca(sizeof(nri::DescriptorRangeUpdateDesc) * pipelineDesc.resourceRangesNum); memset(resourceRanges, 0, sizeof(nri::DescriptorRangeUpdateDesc) * pipelineDesc.resourceRangesNum); - nri::TextureTransitionBarrierDesc* transitions = (nri::TextureTransitionBarrierDesc*)NRD_INTEGRATION_ALLOCA(sizeof(nri::TextureTransitionBarrierDesc) * transitionNum); + nri::TextureTransitionBarrierDesc* transitions = (nri::TextureTransitionBarrierDesc*)alloca(sizeof(nri::TextureTransitionBarrierDesc) * transitionNum); memset(transitions, 0, sizeof(nri::TextureTransitionBarrierDesc) * transitionNum); nri::TransitionBarrierDesc transitionBarriers = {}; @@ -507,7 +539,7 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor nrdTexture = (NrdIntegrationTexture*)&userPool[(uint32_t)nrdResource.type]; NRD_INTEGRATION_ASSERT(nrdTexture && nrdTexture->subresourceStates && nrdTexture->subresourceStates->texture, "'userPool' entry can't be NULL if it's in use!"); - NRD_INTEGRATION_ASSERT(nrdTexture->format != nri::Format::UNKNOWN, "Format must be a valid format!"); + NRD_INTEGRATION_ASSERT(nrdTexture->format != nri::Format::UNKNOWN, "Format must be valid!"); } const nri::AccessBits nextAccess = nrdResource.stateNeeded == nrd::DescriptorType::TEXTURE ? nri::AccessBits::SHADER_RESOURCE : nri::AccessBits::SHADER_RESOURCE_STORAGE; @@ -523,14 +555,15 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor uint64_t resource = m_NRI->GetTextureNativeObject(*nrdTexture->subresourceStates->texture, 0); uint64_t key = NRD_CreateDescriptorKey(resource, isStorage, (uint8_t)nrdResource.mipOffset, (uint8_t)nrdResource.mipNum); - const auto& entry = enableDescriptorCaching ? m_Descriptors.find(key) : m_Descriptors.end(); + const auto& entry = m_CachedDescriptors.find(key); nri::Descriptor* descriptor = nullptr; - if (entry == m_Descriptors.end()) + if (entry == m_CachedDescriptors.end()) { nri::Texture2DViewDesc desc = {nrdTexture->subresourceStates->texture, isStorage ? nri::Texture2DViewType::SHADER_RESOURCE_STORAGE_2D : nri::Texture2DViewType::SHADER_RESOURCE_2D, nrdTexture->format, nrdResource.mipOffset, nrdResource.mipNum}; NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->CreateTexture2DView(desc, descriptor)); - m_Descriptors.insert( std::make_pair(key, descriptor) ); + m_CachedDescriptors.insert( std::make_pair(key, descriptor) ); + m_DescriptorsInFlight[m_DescriptorPoolIndex].push_back(descriptor); } else descriptor = entry->second; @@ -543,12 +576,16 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor uint32_t descriptorSetSamplersIndex = instanceDesc.constantBufferSpaceIndex == instanceDesc.samplersSpaceIndex ? 0 : 1; uint32_t descriptorSetResourcesIndex = instanceDesc.resourcesSpaceIndex == instanceDesc.constantBufferSpaceIndex ? 0 : (instanceDesc.resourcesSpaceIndex == instanceDesc.samplersSpaceIndex ? descriptorSetSamplersIndex : descriptorSetSamplersIndex + 1); uint32_t descriptorSetNum = std::max(descriptorSetSamplersIndex, descriptorSetResourcesIndex) + 1; + bool samplersAreInSeparateSet = instanceDesc.samplersSpaceIndex != instanceDesc.constantBufferSpaceIndex && instanceDesc.samplersSpaceIndex != instanceDesc.resourcesSpaceIndex; - nri::DescriptorSet** descriptorSets = (nri::DescriptorSet**)NRD_INTEGRATION_ALLOCA(sizeof(nri::DescriptorSet*) * descriptorSetNum); + nri::DescriptorSet** descriptorSets = (nri::DescriptorSet**)alloca(sizeof(nri::DescriptorSet*) * descriptorSetNum); nri::PipelineLayout* pipelineLayout = m_PipelineLayouts[dispatchDesc.pipelineIndex]; for (uint32_t i = 0; i < descriptorSetNum; i++) - NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->AllocateDescriptorSets(descriptorPool, *pipelineLayout, i, &descriptorSets[i], 1, nri::WHOLE_DEVICE_GROUP, 0)); + { + if (!samplersAreInSeparateSet || i != descriptorSetSamplersIndex) + NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->AllocateDescriptorSets(descriptorPool, *pipelineLayout, i, &descriptorSets[i], 1, nri::WHOLE_DEVICE_GROUP, 0)); + } // Updating constants uint32_t dynamicConstantBufferOffset = 0; @@ -569,8 +606,20 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor } // Updating samplers - nri::DescriptorRangeUpdateDesc samplersDescriptorRange = {m_Samplers.data(), instanceDesc.samplersNum, 0}; - m_NRI->UpdateDescriptorRanges(*descriptorSets[descriptorSetSamplersIndex], nri::WHOLE_DEVICE_GROUP, 0, 1, &samplersDescriptorRange); + const nri::DescriptorRangeUpdateDesc samplersDescriptorRange = {m_Samplers.data(), instanceDesc.samplersNum, 0}; + if (samplersAreInSeparateSet) + { + nri::DescriptorSet*& descriptorSetSamplers = m_DescriptorSetSamplers[m_DescriptorPoolIndex]; + if (!descriptorSetSamplers) + { + NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->AllocateDescriptorSets(descriptorPool, *pipelineLayout, descriptorSetSamplersIndex, &descriptorSetSamplers, 1, nri::WHOLE_DEVICE_GROUP, 0)); + m_NRI->UpdateDescriptorRanges(*descriptorSetSamplers, nri::WHOLE_DEVICE_GROUP, 0, 1, &samplersDescriptorRange); + } + + descriptorSets[descriptorSetSamplersIndex] = descriptorSetSamplers; + } + else + m_NRI->UpdateDescriptorRanges(*descriptorSets[descriptorSetSamplersIndex], nri::WHOLE_DEVICE_GROUP, 0, 1, &samplersDescriptorRange); // Updating resources m_NRI->UpdateDescriptorRanges(*descriptorSets[descriptorSetResourcesIndex], nri::WHOLE_DEVICE_GROUP, instanceDesc.samplersSpaceIndex == instanceDesc.resourcesSpaceIndex ? 1 : 0, pipelineDesc.resourceRangesNum, resourceRanges); @@ -587,14 +636,6 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor m_NRI->CmdDispatch(commandBuffer, dispatchDesc.gridWidth, dispatchDesc.gridHeight, 1); - // Cleanup - if (!enableDescriptorCaching) - { - for (const auto& entry : m_Descriptors) - m_NRI->DestroyDescriptor(*entry.second); - m_Descriptors.clear(); - } - // Debug logging #if( NRD_INTEGRATION_DEBUG_LOGGING == 1 ) printf("Pipeline #%u : %s\n\t", dispatchDesc.pipelineIndex, dispatchDesc.name); @@ -623,15 +664,21 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor void NrdIntegration::Destroy() { - m_NRI->DestroyDescriptor(*m_ConstantBufferView); - m_ConstantBufferView = nullptr; + NRD_INTEGRATION_ASSERT(m_Instance, "Already destroyed! Did you forget to call 'Initialize'?"); + m_ResourceState.clear(); + + m_NRI->DestroyDescriptor(*m_ConstantBufferView); m_NRI->DestroyBuffer(*m_ConstantBuffer); - m_ConstantBuffer = nullptr; - for (const auto& entry : m_Descriptors) - m_NRI->DestroyDescriptor(*entry.second); - m_Descriptors.clear(); + for (auto& descriptors : m_DescriptorsInFlight) + { + for (const auto& entry : descriptors) + m_NRI->DestroyDescriptor(*entry); + descriptors.clear(); + } + m_DescriptorsInFlight.clear(); + m_CachedDescriptors.clear(); for (const NrdIntegrationTexture& nrdTexture : m_TexturePool) m_NRI->DestroyTexture(*(nri::Texture*)nrdTexture.subresourceStates->texture); @@ -655,16 +702,26 @@ void NrdIntegration::Destroy() for (nri::DescriptorPool* descriptorPool : m_DescriptorPools) m_NRI->DestroyDescriptorPool(*descriptorPool); + m_DescriptorPools.clear(); + m_DescriptorSetSamplers.clear(); nrd::DestroyInstance(*m_Instance); - m_Instance = nullptr; m_NRI = nullptr; + m_NRIHelper = nullptr; m_Device = nullptr; + m_ConstantBuffer = nullptr; + m_ConstantBufferView = nullptr; + m_Instance = nullptr; + m_Name = nullptr; m_PermanentPoolSize = 0; m_TransientPoolSize = 0; m_ConstantBufferSize = 0; m_ConstantBufferViewSize = 0; m_ConstantBufferOffset = 0; + m_BufferedFramesNum = 0; + m_DescriptorPoolIndex = 0; + m_FrameIndex = 0; m_IsShadersReloadRequested = false; + m_IsDescriptorCachingEnabled = false; } diff --git a/README.md b/README.md index 2e122ea..02560dc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# NVIDIA REAL-TIME DENOISERS v4.3.4 (NRD) +# NVIDIA REAL-TIME DENOISERS v4.3.5 (NRD) [![Build NRD SDK](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml/badge.svg)](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml) diff --git a/Resources/Version.h b/Resources/Version.h index 354ebd3..a24b198 100644 --- a/Resources/Version.h +++ b/Resources/Version.h @@ -23,6 +23,6 @@ Versioning rules: #define VERSION_MAJOR 4 #define VERSION_MINOR 3 -#define VERSION_BUILD 4 +#define VERSION_BUILD 5 #define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD encoding=NRD_NORMAL_ENCODING.NRD_ROUGHNESS_ENCODING) diff --git a/Source/InstanceImpl.cpp b/Source/InstanceImpl.cpp index c30f1ae..959dfb6 100644 --- a/Source/InstanceImpl.cpp +++ b/Source/InstanceImpl.cpp @@ -612,6 +612,10 @@ void nrd::InstanceImpl::PrepareDesc() m_Desc.transientPool = m_TransientPool.data(); m_Desc.transientPoolSize = (uint32_t)m_TransientPool.size(); + const bool samplersAreInSeparateSet = NRD_SAMPLERS_SPACE_INDEX != NRD_CONSTANT_BUFFER_SPACE_INDEX && NRD_SAMPLERS_SPACE_INDEX != NRD_RESOURCES_SPACE_INDEX; + if (samplersAreInSeparateSet) + m_Desc.descriptorPoolDesc.samplersMaxNum += m_Desc.samplersNum; + // Calculate descriptor heap (sets) requirements for (InternalDispatchDesc& dispatchDesc : m_Dispatches) { @@ -628,7 +632,9 @@ void nrd::InstanceImpl::PrepareDesc() } m_Desc.descriptorPoolDesc.setsMaxNum += dispatchDesc.maxRepeatsNum; - m_Desc.descriptorPoolDesc.samplersMaxNum += dispatchDesc.maxRepeatsNum * m_Desc.samplersNum; + + if (!samplersAreInSeparateSet) + m_Desc.descriptorPoolDesc.samplersMaxNum += dispatchDesc.maxRepeatsNum * m_Desc.samplersNum; if (dispatchDesc.constantBufferDataSize != 0) { @@ -641,7 +647,9 @@ void nrd::InstanceImpl::PrepareDesc() uint32_t clearNum = (uint32_t)m_ClearResources.size(); m_Desc.descriptorPoolDesc.storageTexturesMaxNum += clearNum; m_Desc.descriptorPoolDesc.setsMaxNum += clearNum; - m_Desc.descriptorPoolDesc.samplersMaxNum += clearNum * m_Desc.samplersNum; + + if (!samplersAreInSeparateSet) + m_Desc.descriptorPoolDesc.samplersMaxNum += clearNum * m_Desc.samplersNum; // Assign resources for (PipelineDesc& pipelineDesc : m_Pipelines)