Skip to content

Commit

Permalink
Merge branch 'ralston/dev/dlfg_image_index_race_fix' into 'main'
Browse files Browse the repository at this point in the history
Attempt to fix a data race in DLFG

See merge request lightspeedrtx/dxvk-remix-nv!708
  • Loading branch information
anon-apple committed Feb 23, 2024
2 parents 03866c2 + 51098d0 commit 8cd5568
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 29 deletions.
7 changes: 4 additions & 3 deletions src/d3d9/d3d9_swapchain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,7 @@ namespace dxvk {
if (i + 1 >= SyncInterval)
m_context->signal(m_frameLatencySignal, m_frameId);

SubmitPresent(sync, i);
SubmitPresent(sync, i, imageIndex);
}

// Rotate swap chain buffers so that the back
Expand Down Expand Up @@ -1194,7 +1194,7 @@ namespace dxvk {
}


void D3D9SwapChainEx::SubmitPresent(const vk::PresenterSync& Sync, uint32_t FrameId) {
void D3D9SwapChainEx::SubmitPresent(const vk::PresenterSync& Sync, uint32_t FrameId, uint32_t imageIndex) {
ScopedCpuProfileZone();

// NV-DXVK start: Reflex integration
Expand All @@ -1209,6 +1209,7 @@ namespace dxvk {

m_parent->EmitCs([this,
cReflexFrameId = currentReflexFrameId,
cAcquiredImageIndex = imageIndex,
cFrameId = FrameId,
cSync = Sync,
cHud = m_hud,
Expand All @@ -1234,7 +1235,7 @@ namespace dxvk {
// (unless the workaround is enabled as this requires that the Present markers stay where they usually are).
const bool insertReflexPresentMarkers = !m_context->isDLFGEnabled() || (__DLFG_REFLEX_WORKAROUND != 0);

m_device->presentImage(cReflexFrameId, insertReflexPresentMarkers, GetPresenter(), &m_presentStatus);
m_device->presentImage(cReflexFrameId, insertReflexPresentMarkers, cAcquiredImageIndex, GetPresenter(), &m_presentStatus);
// NV-DXVK end
});

Expand Down
2 changes: 1 addition & 1 deletion src/d3d9/d3d9_swapchain.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ namespace dxvk {

void PresentImage(UINT PresentInterval);

void SubmitPresent(const vk::PresenterSync& Sync, uint32_t FrameId);
void SubmitPresent(const vk::PresenterSync& Sync, uint32_t FrameId, uint32_t imageIndex);

void SynchronizePresent();

Expand Down
3 changes: 3 additions & 0 deletions src/dxvk/dxvk_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ namespace dxvk {
void DxvkDevice::presentImage(
std::uint64_t cachedReflexFrameId,
bool insertReflexPresentMarkers,
std::uint32_t cachedAcquiredImageIndex,
const Rc<vk::Presenter>& presenter,
DxvkSubmitStatus* status
) {
Expand All @@ -370,6 +371,8 @@ namespace dxvk {
presentInfo.presenter = presenter;
presentInfo.cachedReflexFrameId = cachedReflexFrameId;
presentInfo.insertReflexPresentMarkers = insertReflexPresentMarkers;
presentInfo.cachedAcquiredImageIndex = cachedAcquiredImageIndex;

m_submissionQueue.present(presentInfo, status);

incrementPresentCount();
Expand Down
4 changes: 4 additions & 0 deletions src/dxvk/dxvk_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,13 +480,17 @@ namespace dxvk {
* can be retrieved with \ref waitForSubmission.
* \param [in] cachedReflexFrameId The Reflex frame ID at the time of calling, cached so Reflex can have
* consistent frame IDs throughout the dispatches of an application frame.
* \param [in] insertReflexPresentMarkers A flag to indicate if Reflex present begin/end markers should be placed
* around the image presentation.
* \param [in] cachedAcquiredImageIndex The acquired image index to use for presentation at the time of calling.
* \param [in] presenter The presenter
* \param [out] status Present status
* \param [in] swapchainImage DxvkImageView for the swapchain image being presented
*/
void presentImage(
std::uint64_t cachedReflexFrameId,
bool insertReflexPresentMarkers,
std::uint32_t cachedAcquiredImageIndex,
const Rc<vk::Presenter>& presenter,
DxvkSubmitStatus* status
);
Expand Down
6 changes: 5 additions & 1 deletion src/dxvk/dxvk_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,12 @@ namespace dxvk {
}
// NV-DXVK end

// NV-DXVK start: DLFG acquired image information retrieval
const auto cachedAcquiredImageIndex = entry.present.cachedAcquiredImageIndex;
// NV-DXVK end

// m_device->vkd()->vkQueueWaitIdle(m_device->queues().graphics.queueHandle);
status = entry.present.presenter->presentImage(&entry.status->result, entry.present, m_currentFrameInterpolationData);
status = entry.present.presenter->presentImage(&entry.status->result, entry.present, m_currentFrameInterpolationData, cachedAcquiredImageIndex);
// if both submit and DLFG+present run on the same queue, then we need to wait for present to avoid racing on the queue
#if __DLFG_USE_GRAPHICS_QUEUE
entry.present.presenter->synchronize();
Expand Down
3 changes: 3 additions & 0 deletions src/dxvk/dxvk_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,15 @@ namespace dxvk {
*/
struct DxvkPresentInfo {
Rc<vk::Presenter> presenter;
// Represents the Reflex Frame ID at the time of image presentation for usage in presentation submission.
uint64_t cachedReflexFrameId;
// Note: This flag is specifically used when the DXVK Queue should insert Reflex present
// markers rather than the presenter currently in use. This is done because some presenters
// (namely the DLFG Presenter) will insert their own Reflex markers due to having more complex
// requirements.
bool insertReflexPresentMarkers;
// Represents the acquired presenter image index at the time of image presentation for usage in presentation submission.
uint32_t cachedAcquiredImageIndex;
};


Expand Down
49 changes: 29 additions & 20 deletions src/dxvk/rtx_render/rtx_dlfg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,8 @@ namespace dxvk {

VkResult DxvkDLFGPresenter::presentImage(std::atomic<VkResult>* status,
const DxvkPresentInfo& presentInfo,
const DxvkFrameInterpolationInfo& frameInterpolationInfo) {
const DxvkFrameInterpolationInfo& frameInterpolationInfo,
std::uint32_t acquiredImageIndex) {
VkResult lastStatus = m_lastPresentStatus;
if (lastStatus != VK_SUCCESS) {
*status = lastStatus;
Expand All @@ -197,11 +198,16 @@ namespace dxvk {

*status = VK_EVENT_SET;

std::unique_lock<dxvk::mutex> lock(m_presentThread.mutex);
assert(m_backbufferInFlight[m_backbufferIndex] == false);
m_backbufferInFlight[m_backbufferIndex] = true;
m_presentQueue.push({ status, m_backbufferIndex, presentInfo, frameInterpolationInfo });
m_presentThread.condWorkAvailable.notify_all();
{
std::unique_lock<dxvk::mutex> lock(m_presentThread.mutex);

assert(m_backbufferInFlight[acquiredImageIndex] == false);
m_backbufferInFlight[acquiredImageIndex] = true;

m_presentQueue.push({ status, acquiredImageIndex, presentInfo, frameInterpolationInfo });

m_presentThread.condWorkAvailable.notify_all();
}

return VK_EVENT_SET;
}
Expand Down Expand Up @@ -349,7 +355,10 @@ namespace dxvk {
DxvkDLFGScopeGuard signalWorkConsumed([&]() {
// m_device->vkd()->vkQueueWaitIdle(m_device->queues().__DLFG_QUEUE.queueHandle);
present.status->store(m_lastPresentStatus);
m_backbufferInFlight[present.m_backbufferIndex] = false;

assert(m_backbufferInFlight[present.acquiredImageIndex] == true);
m_backbufferInFlight[present.acquiredImageIndex] = false;

m_presentQueue.pop();
m_presentThread.condWorkConsumed.notify_all();
});
Expand Down Expand Up @@ -379,7 +388,7 @@ namespace dxvk {
DxvkDLFGCommandList* m_cmdList = nullptr;
DxvkDLFGImageBarrierSet<4> barriers;

VkSemaphore backbufferWaitSemaphore = m_backbufferPresentSemaphores[present.m_backbufferIndex]->handle();
VkSemaphore backbufferWaitSemaphore = m_backbufferPresentSemaphores[present.acquiredImageIndex]->handle();

// if true, present only interpolated frames (for debugging)
constexpr bool kSkipRealFrames = false;
Expand Down Expand Up @@ -424,7 +433,7 @@ namespace dxvk {
m_swapchainImageLayouts[swapchainIndex],
VK_IMAGE_LAYOUT_GENERAL);

barriers.addBarrier(m_backbufferImages[present.m_backbufferIndex]->handle(),
barriers.addBarrier(m_backbufferImages[present.acquiredImageIndex]->handle(),
VK_IMAGE_ASPECT_COLOR_BIT,
VK_ACCESS_NONE,
VK_ACCESS_SHADER_READ_BIT,
Expand Down Expand Up @@ -460,7 +469,7 @@ namespace dxvk {
present.frameInterpolation.camera,
m_swapchainImageViews[swapchainIndex],
m_dlfgBeginSemaphore->handle(),
m_backbufferViews[present.m_backbufferIndex],
m_backbufferViews[present.acquiredImageIndex],
present.frameInterpolation.motionVectors,
present.frameInterpolation.depth,
false);
Expand All @@ -478,7 +487,7 @@ namespace dxvk {
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR);

barriers.addBarrier(m_backbufferImages[present.m_backbufferIndex]->handle(),
barriers.addBarrier(m_backbufferImages[present.acquiredImageIndex]->handle(),
VK_IMAGE_ASPECT_COLOR_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
Expand Down Expand Up @@ -509,7 +518,7 @@ namespace dxvk {
m_cmdList->addSignalSemaphore(swapchainSync.present);
if (kSkipRealFrames) {
// if we're skipping real frames, then signal the acquire and frame ID semaphores here
m_cmdList->addSignalSemaphore(m_backbufferAcquireSemaphores[present.m_backbufferIndex]->handle());
m_cmdList->addSignalSemaphore(m_backbufferAcquireSemaphores[present.acquiredImageIndex]->handle());

// skip signaling if the semaphore is already at the value we are signaling to, since the VK spec forbids this (yes, really)
// this happens after init where the value is already 0 on frame 0, and may also happen during wraparound
Expand All @@ -532,7 +541,7 @@ namespace dxvk {

// present the interpolated frame
reflex.beginOutOfBandPresent(present.present.cachedReflexFrameId);
m_lastPresentStatus = vk::Presenter::presentImage(present.status, present.present, present.frameInterpolation);
m_lastPresentStatus = vk::Presenter::presentImage(present.status, present.present, present.frameInterpolation, present.acquiredImageIndex);
reflex.endOutOfBandPresent(present.present.cachedReflexFrameId);

if (m_lastPresentStatus != VK_SUCCESS) {
Expand Down Expand Up @@ -567,7 +576,7 @@ namespace dxvk {
{
ScopedGpuProfileZone_Present(m_device, m_cmdList->getCmdBuffer(), "DLFG real frame blit barriers");

barriers.addBarrier(m_backbufferImages[present.m_backbufferIndex]->handle(),
barriers.addBarrier(m_backbufferImages[present.acquiredImageIndex]->handle(),
VK_IMAGE_ASPECT_COLOR_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_TRANSFER_READ_BIT,
Expand All @@ -578,7 +587,7 @@ namespace dxvk {
VK_IMAGE_ASPECT_COLOR_BIT,
VK_ACCESS_MEMORY_READ_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT,
m_swapchainImageLayouts[present.m_backbufferIndex],
m_swapchainImageLayouts[present.acquiredImageIndex],
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);

barriers.record(m_device, *m_cmdList, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
Expand All @@ -592,13 +601,13 @@ namespace dxvk {
};

m_device->vkd()->vkCmdCopyImage(m_cmdList->getCmdBuffer(),
m_backbufferImages[present.m_backbufferIndex]->handle(),
m_backbufferImages[present.acquiredImageIndex]->handle(),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
swapchainImage.image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1, &copy);

barriers.addBarrier(m_backbufferImages[present.m_backbufferIndex]->handle(),
barriers.addBarrier(m_backbufferImages[present.acquiredImageIndex]->handle(),
VK_IMAGE_ASPECT_COLOR_BIT,
VK_ACCESS_TRANSFER_READ_BIT,
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
Expand All @@ -612,7 +621,7 @@ namespace dxvk {
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR);

m_swapchainImageLayouts[present.m_backbufferIndex] = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
m_swapchainImageLayouts[present.acquiredImageIndex] = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;

barriers.record(m_device, *m_cmdList, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
Expand All @@ -628,7 +637,7 @@ namespace dxvk {
// * the frame end semaphore
m_cmdList->addWaitSemaphore(backbufferWaitSemaphore); // if we ran interpolation, this will be null (which is a no-op)
m_cmdList->addWaitSemaphore(swapchainSync.acquire);
m_cmdList->addSignalSemaphore(m_backbufferAcquireSemaphores[present.m_backbufferIndex]->handle());
m_cmdList->addSignalSemaphore(m_backbufferAcquireSemaphores[present.acquiredImageIndex]->handle());
if (!present.frameInterpolation.valid() || kSkipPacerSemaphoreWait) {
m_cmdList->addSignalSemaphore(swapchainSync.present);
}
Expand Down Expand Up @@ -677,7 +686,7 @@ namespace dxvk {
// see comments around __DLFG_REFLEX_WORKAROUND define in rtx_ngx_wrapper.h for more details
reflex.beginOutOfBandPresent(present.present.cachedReflexFrameId);
#endif
m_lastPresentStatus = vk::Presenter::presentImage(present.status, present.present, present.frameInterpolation);
m_lastPresentStatus = vk::Presenter::presentImage(present.status, present.present, present.frameInterpolation, present.acquiredImageIndex);
#if !__DLFG_REFLEX_WORKAROUND
reflex.endPresentation(present.present.cachedReflexFrameId);
#else
Expand Down
7 changes: 5 additions & 2 deletions src/dxvk/rtx_render/rtx_dlfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,10 @@ namespace dxvk {
vk::PresenterImage getImage(uint32_t index) const override;

VkResult acquireNextImage(vk::PresenterSync& sync, uint32_t& index) override;
VkResult presentImage(std::atomic<VkResult>* status, const DxvkPresentInfo& presentInfo, const DxvkFrameInterpolationInfo& frameInterpolationInfo) override;
VkResult presentImage(std::atomic<VkResult>* status,
const DxvkPresentInfo& presentInfo,
const DxvkFrameInterpolationInfo& frameInterpolationInfo,
std::uint32_t acquiredImageIndex) override;
VkResult recreateSwapChain(const vk::PresenterDesc& desc) override;

// waits for all queued frames to be consumed
Expand Down Expand Up @@ -159,7 +162,7 @@ namespace dxvk {

struct PresentJob {
std::atomic<VkResult>* status;
uint32_t m_backbufferIndex;
uint32_t acquiredImageIndex;
DxvkPresentInfo present;
DxvkFrameInterpolationInfo frameInterpolation;
};
Expand Down
3 changes: 2 additions & 1 deletion src/vulkan/vulkan_presenter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ namespace dxvk::vk {
// NV-DXVK start: DLFG integration
std::atomic<VkResult>*,
const DxvkPresentInfo&,
const DxvkFrameInterpolationInfo&
const DxvkFrameInterpolationInfo&,
std::uint32_t
// NV-DXVK end
) {
ScopedCpuProfileZone();
Expand Down
4 changes: 3 additions & 1 deletion src/vulkan/vulkan_presenter.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
*/
#pragma once

#include <cstdint>
#include <vector>

#include "../util/log/log.h"
Expand Down Expand Up @@ -203,7 +204,8 @@ namespace dxvk::vk {
// xxxnsubtil: these are never used by vk::Presenter, only the DLFG presenter needs them --- split this method!
std::atomic<VkResult>* presentStatus,
const DxvkPresentInfo& presentInfo,
const DxvkFrameInterpolationInfo& frameInterpolationInfo
const DxvkFrameInterpolationInfo& frameInterpolationInfo,
std::uint32_t acquiredImageIndex
// NV-DXVK end
);

Expand Down

0 comments on commit 8cd5568

Please sign in to comment.