Skip to content

Commit

Permalink
Merge branch 'yaobin2/neeDarkening_mergeTask4' into 'main'
Browse files Browse the repository at this point in the history
[REMIX-3000] Improve the cache's ability to store more lights and thus lowered emissive noise.

* Support triangle range in the case so that a nee candidate can store more than one triangles.
* Explore new triangle candidates. Randomly sample a few triangles beyond the given candidate range to discover more emissive triangles.
* Add spatial reuse. Spatial reuse allows neighbor cells to share information to each other.

See merge request lightspeedrtx/dxvk-remix-nv!838
  • Loading branch information
Yaobin Ouyang committed May 10, 2024
2 parents 17b5f6f + 66f059b commit f012e38
Show file tree
Hide file tree
Showing 8 changed files with 219 additions and 18 deletions.
6 changes: 6 additions & 0 deletions RtxOptions.md
Original file line number Diff line number Diff line change
Expand Up @@ -366,11 +366,17 @@ Tables below enumerate all the options and their defaults set by RTX Remix. Note
|rtx.neeCache.enableMIS|bool|True|Enable MIS\.|
|rtx.neeCache.enableModeAfterFirstBounce|int|1|NEE Cache enable mode on a second and higher bounces\. 0 means off, 1 means enabled for specular rays only, 2 means always enabled\.|
|rtx.neeCache.enableOnFirstBounce|bool|True|Enable NEE Cache on a first bounce\.|
|rtx.neeCache.enableSpatialReuse|bool|True|Enable NEE cell share statistics information with neighbors\.|
|rtx.neeCache.enableTriangleExploration|bool|True|Explore emissive triangle candidates in the same object\.|
|rtx.neeCache.enableUpdate|bool|True|Enable Update\.|
|rtx.neeCache.learningRate|float|0.02|Learning rate\. Higher values makes the cache adapt to lighting changes more quickly\.|
|rtx.neeCache.minRange|float|400|The range for lowest level cells\.|
|rtx.neeCache.resolution|float|8|Cell resolution\. Higher values mean smaller cells\.|
|rtx.neeCache.specularFactor|float|1|Specular component factor\.|
|rtx.neeCache.triangleExplorationAcceptRangeRatio|float|0.33|Accept index range to search range ratio, when triangle exploration is enabled\.|
|rtx.neeCache.triangleExplorationMaxRange|int|20|Index range to explore, when triangle exploration is enabled\.|
|rtx.neeCache.triangleExplorationProbability|float|0.05|The probability to explore new triangles\.|
|rtx.neeCache.triangleExplorationRangeRatio|float|0.1|Index range to triangle count ratio, when triangle exploration is enabled\.|
|rtx.neeCache.uniformSamplingProbability|float|0.1|Uniform sampling probability\.|
|rtx.nisPreset|int|1|Adjusts NIS scaling factor, trades quality for performance\.|
|rtx.numFramesToKeepBLAS|int|4||
Expand Down
11 changes: 11 additions & 0 deletions src/dxvk/rtx_render/rtx_nee_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ namespace dxvk {
ImGui::DragFloat("Cell Resolution", &resolutionObject(), 0.01f, 0.01f, 100.0f, "%.3f");
ImGui::DragFloat("Min Range", &minRangeObject(), 1.f, 0.1f, 10000.0f, "%.3f");
ImGui::Checkbox("Approximate Particle Lighting", &approximateParticleLightingObject());
ImGui::Checkbox("Enable Triangle Exploration", &enableTriangleExplorationObject());
ImGui::DragFloat("Triangle Exploration Probability", &triangleExplorationProbabilityObject(), 1.f, 0.0f, 1.0f, "%.3f");
ImGui::DragFloat("Triangle Exploration Range Ratio", &triangleExplorationRangeRatioObject(), 1.f, 0.0f, 1.0f, "%.3f");
ImGui::DragFloat("Triangle Exploration Accept Range Ratio", &triangleExplorationAcceptRangeRatioObject(), 1.f, 0.0f, 1.0f, "%.3f");
ImGui::DragInt("Triangle Exploration Max Range", &triangleExplorationMaxRangeObject(), 0.1f, 1, 1000, "%d", ImGuiSliderFlags_AlwaysClamp);
ImGui::Checkbox("Enable Spatial Reuse", &enableSpatialReuseObject());
}

void NeeCachePass::setRaytraceArgs(RaytraceArgs& constants, bool resetHistory) const {
Expand All @@ -108,6 +114,11 @@ namespace dxvk {
constants.neeCacheArgs.resolution = resolution();
constants.neeCacheArgs.minRange = minRange() * RtxOptions::Get()->sceneScale();
constants.neeCacheArgs.approximateParticleLighting = approximateParticleLighting();
constants.neeCacheArgs.triangleExplorationRangeRatio = triangleExplorationRangeRatio();
constants.neeCacheArgs.triangleExplorationAcceptRangeRatio = triangleExplorationAcceptRangeRatio();
constants.neeCacheArgs.triangleExplorationMaxRange = triangleExplorationMaxRange();
constants.neeCacheArgs.triangleExplorationProbability = enableTriangleExploration() ? triangleExplorationProbability() : 0.0f;
constants.neeCacheArgs.enableSpatialReuse = enableSpatialReuse();

static uvec2 oldResolution {0, 0};
constants.neeCacheArgs.clearCache = resetHistory || oldResolution.x != constants.camera.resolution.x || oldResolution.y != constants.camera.resolution.y;
Expand Down
6 changes: 6 additions & 0 deletions src/dxvk/rtx_render/rtx_nee_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ namespace dxvk {
RTX_OPTION("rtx.neeCache", float, emissiveTextureSampleFootprintScale, 1.0, "Emissive texture sample footprint scale.");
RTX_OPTION("rtx.neeCache", bool, approximateParticleLighting, true, "Use particle albedo as emissive color.");
RTX_OPTION("rtx.neeCache", float, ageCullingSpeed, 0.02, "This threshold determines culling speed of an old triangle. A triangle that is not detected for several frames will be deemed less important and culled quicker.");
RTX_OPTION("rtx.neeCache", bool, enableTriangleExploration, true, "Explore emissive triangle candidates in the same object.");
RTX_OPTION("rtx.neeCache", float, triangleExplorationProbability, 0.05, "The probability to explore new triangles.");
RTX_OPTION("rtx.neeCache", int, triangleExplorationMaxRange, 20, "Index range to explore, when triangle exploration is enabled.");
RTX_OPTION("rtx.neeCache", float, triangleExplorationRangeRatio, 0.1, "Index range to triangle count ratio, when triangle exploration is enabled.");
RTX_OPTION("rtx.neeCache", float, triangleExplorationAcceptRangeRatio, 0.33, "Accept index range to search range ratio, when triangle exploration is enabled.");
RTX_OPTION("rtx.neeCache", bool, enableSpatialReuse, true, "Enable NEE cell share statistics information with neighbors.");
private:
Rc<vk::DeviceFn> m_vkd;
};
Expand Down
26 changes: 20 additions & 6 deletions src/dxvk/shaders/rtx/algorithm/nee_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,23 @@ struct NEECandidate
m_data.x = (m_data.x & 0xffffff) | (thresholdI << 24);
}

static NEECandidate create(uint surfaceID, uint primitiveID)
int getRange()
{
return (m_data.y >> 24) & 0xff;
}

[mutating] void setRange(uint range)
{
m_data.y = (m_data.y & 0xffffff) | (range << 24);
}

static NEECandidate create(uint surfaceID, uint primitiveID, uint range)
{
NEECandidate nee;
nee.m_data = 0;
nee.setSurfaceID(surfaceID);
nee.setPrimitiveID(primitiveID);
nee.setRange(range);
return nee;
}

Expand Down Expand Up @@ -564,9 +575,11 @@ struct NEECell
for (; address < endAddress; address += getCandidateSize())
{
NEECandidate candidate = NEECandidate.create(NeeCache.Load2(address));
if (candidate.getPrimitiveID() == primitiveID && candidate.getSurfaceID() == surfaceID)
int firstPrimitiveID = candidate.getPrimitiveID();
int range = candidate.getRange();
if (primitiveID >= firstPrimitiveID && primitiveID < firstPrimitiveID + range && candidate.getSurfaceID() == surfaceID)
{
return candidate.getSampleProbability();
return candidate.getSampleProbability() / float(range);
}
}
return float16_t(0.0);
Expand Down Expand Up @@ -623,17 +636,18 @@ struct NEECell

// This function is mainly for debug purposes. The function "getCachedLightSample()" is an optimized version for this function.
// Samples from "getCachedLightSample()" should converge to the same result as this function.
LightSample getLightSample(StructuredBuffer<uint> PrimitiveIDPrefixSumBuffer, vec3 randomNumber, vec3 position, float16_t coneRadius, float16_t coneSpreadAngle, out uint triangleID)
LightSample getLightSample(StructuredBuffer<uint> PrimitiveIDPrefixSumBuffer, vec4 randomNumber, vec3 position, float16_t coneRadius, float16_t coneSpreadAngle, out uint triangleID)
{
LightSample lightSampleTriangle;
// Sample cached triangles
float lightObjectPdf = 0;
NEECandidate candidate = sampleCandidate(randomNumber.x, lightObjectPdf);
// Sample the selected triangle
vec2 uv = vec2(randomNumber.y, randomNumber.z);
vec3 uvw = vec3(randomNumber.y, randomNumber.z, randomNumber.w);
float area;
int primitiveIndex = candidate.getPrimitiveID();
lightSampleTriangle = NEECacheUtils.calculateLightSampleFromCandidate(
candidate.getSurfaceID(), candidate.getPrimitiveID(), PrimitiveIDPrefixSumBuffer, uv, lightObjectPdf, position, coneRadius, coneSpreadAngle, area);
candidate.getSurfaceID(), primitiveIndex, candidate.getRange(), PrimitiveIDPrefixSumBuffer, uvw, lightObjectPdf, position, coneRadius, coneSpreadAngle, area);
triangleID = -1;
return lightSampleTriangle;
}
Expand Down
43 changes: 39 additions & 4 deletions src/dxvk/shaders/rtx/algorithm/nee_cache_light.slangh
Original file line number Diff line number Diff line change
Expand Up @@ -228,22 +228,57 @@ struct NEECacheUtils
}

static LightSample calculateLightSampleFromCandidate(
int surfaceIndex, int primitiveIndex, StructuredBuffer<uint> PrimitiveIDPrefixSumBuffer, vec2 uv, float trianglePdf, vec3 shadingPosition, float16_t coneRadius, float16_t spreadAngle,
int surfaceIndex, inout int primitiveIndex, int range, StructuredBuffer<uint> PrimitiveIDPrefixSumBuffer, vec3 uvw, float trianglePdf, vec3 shadingPosition, float16_t coneRadius, float16_t spreadAngle,
out float triangleArea, bool useSolidAnglePdf = true, int footprintMode = kFootprintFromRayOrigin)
{
Surface surface = getSurface(surfaceIndex);

int triangleCount = NEECacheUtils.getSurfacePrimitiveCount(surfaceIndex, PrimitiveIDPrefixSumBuffer);
if (shouldSampleObject(surface))
{
int triangleCount = NEECacheUtils.getSurfacePrimitiveCount(surfaceIndex, PrimitiveIDPrefixSumBuffer);
primitiveIndex = int(triangleCount * uv.x);
// Sample the whole object for emissive particles.
primitiveIndex = int(triangleCount * uvw.z);
trianglePdf /= triangleCount;
}
else
{
trianglePdf /= float(range);
float triangleExplorationProbability = cb.neeCacheArgs.triangleExplorationProbability;
if (uvw.z < triangleExplorationProbability && range < 255)
{
// Sample a triangle outside the given range with a low probability to search potential emissive triangles.
// The code here is slightly biased because we use the probability for triangles inside the range,
// an unbiased way should consider these triangles in NEECell.searchCandidate() and do MIS for them.
// But that increases code complexity and lower performance.
int gab = cb.neeCacheArgs.triangleExplorationRangeRatio * triangleCount;
float rand = uvw.z / triangleExplorationProbability;
if (rand < 0.5)
{
int randOffset = min(cb.neeCacheArgs.triangleExplorationMaxRange, (rand / 0.5) * gab + 1);
primitiveIndex = max(primitiveIndex - randOffset, 0);
}
else
{
int randOffset = min(cb.neeCacheArgs.triangleExplorationMaxRange, ((rand - 0.5) / 0.5) * gab + 1);
primitiveIndex = min(primitiveIndex + randOffset, triangleCount - 1);
}
}
else
{
// Sample a triangle inside the given range.
uvw.z = (uvw.z - triangleExplorationProbability) / (1.0 - triangleExplorationProbability);
primitiveIndex += range * uvw.z;
}
}

RayInteraction rayInteracton;
SurfaceInteraction surfaceInteraction;
getInteractions(surface, primitiveIndex, uv, shadingPosition, coneRadius, spreadAngle, footprintMode, rayInteracton, surfaceInteraction);
getInteractions(surface, primitiveIndex, uvw.xy, shadingPosition, coneRadius, spreadAngle, footprintMode, rayInteracton, surfaceInteraction);

if (shouldSampleObject(surface))
{
primitiveIndex = 0;
}
return calculateLightSample(surface, surfaceInteraction, rayInteracton, shadingPosition, trianglePdf, triangleArea, useSolidAnglePdf, footprintMode);
}

Expand Down
10 changes: 8 additions & 2 deletions src/dxvk/shaders/rtx/pass/integrate/integrate_nee.comp.slang
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,12 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh
float lightObjectPdf = 0;
NEECandidate candidate = cell.sampleCandidate(RAB_GetNextRandom(rtxdiRNG), lightObjectPdf);
int primitiveIndex = candidate.getPrimitiveID();
vec2 uv = vec2(RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG));
int range = candidate.getRange();
vec3 uvw = vec3(RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG));
float triangleArea;
float16_t spreadAngle = getLobeAngleFromMaterial(opaqueSurfaceMaterialInteraction);
lightSample = NEECacheUtils.calculateLightSampleFromCandidate(
candidate.getSurfaceID(), candidate.getPrimitiveID(), PrimitiveIDPrefixSum, uv, lightObjectPdf, minimalSurfaceInteraction.position,
candidate.getSurfaceID(), primitiveIndex, candidate.getRange(), PrimitiveIDPrefixSum, uvw, lightObjectPdf, minimalSurfaceInteraction.position,
minimalRayInteraction.coneRadius, spreadAngle, triangleArea, true, kFootprintFromRayOriginClamped);

bool isVisible = false;
Expand All @@ -162,6 +163,11 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh
if (any(diffuseLight + specularLight) > 0 && (threadIndex.x + threadIndex.y) % 4 == 0)
{
int prefixTask = NEECacheUtils.convertIDToPrefixSumID(candidate.getSurfaceID(), primitiveIndex, PrimitiveIDPrefixSum);
if (cb.neeCacheArgs.enableSpatialReuse)
{
jitter = sampleUniformIntRng(rtxdiRNG);
cell = NEECache.getCell(NEECache.pointToOffset(minimalSurfaceInteraction.position, minimalSurfaceInteraction.triangleNormal, jitter));
}
cell.insertSlotTask(prefixTask, diffuseLight + specularLight, false);
}

Expand Down
127 changes: 121 additions & 6 deletions src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache.comp.slang
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ int convertLightIndex(int lightIndex)
return lightMapping[lightIndex + (currentToPrevious ? 0 : cb.lightCount)];
}

// Sort tasks in descending order based on y component
void sortTask(uint batchIndex)
{
int listCount = min(32, s_candidateCount[batchIndex]);
Expand Down Expand Up @@ -196,7 +197,9 @@ void updateTriangleTask(NEECell cell, uint2 localIndex)
int taskID = localIndex.x;
uint2 value = cell.getSlotTaskValue(taskID);
int surfaceID, primitiveID;
value.x = convertToThisFramePrefixSumID(value.x, surfaceID, primitiveID);
uint oldRange = (value.x & 0xff000000);
value.x = convertToThisFramePrefixSumID(value.x & 0xffffff, surfaceID, primitiveID);
value.x |= oldRange;
int delta = max(value.y * cb.neeCacheArgs.learningRate, 1);
value.y = clamp(int(value.y) - delta, 0, 1 << 25);
if (value.x != NEE_CACHE_INVALID_ID && value.y > 0)
Expand Down Expand Up @@ -235,7 +238,108 @@ void updateTriangleTask(NEECell cell, uint2 localIndex)
}
}

mergeTask(value, existingCount, localIndex.y);
// Search if the given triangle is inside an existing candidate range.
// If not, insert it to the candidate list.
if (value.x != NEE_CACHE_INVALID_ID && value.y != 0)
{
bool found = false;
for (int j = 0; j < existingCount; ++j)
{
int firstPrimitiveID = (s_candidateList[j][localIndex.y].x & 0xffffff);
int range = (s_candidateList[j][localIndex.y].x >> 24);
if (value.x >= firstPrimitiveID && value.x < firstPrimitiveID + range)
{
s_candidateList[j][localIndex.y].y += value.y;
found = true;
break;
}
}

if(!found)
{
value.x = ((1 << 24) | value.x);
insertTask(value, localIndex.y);
}
}

}


GroupMemoryBarrierWithGroupSync();

// Change format. Put primitive ID to y component for sorting.
for (int i = localIndex.x; i < NEE_CACHE_HASH_TASK_COUNT && cb.neeCacheArgs.triangleExplorationProbability > 0; i += CANDIDATE_GROUP_SIZE)
{
uint2 data = s_candidateList[i][localIndex.y];
uint range = (data.x >> 24);
uint primitiveID = (data.x & 0xffffff);
uint2 newData = uint2(data.y, (primitiveID << 8) | range);
s_candidateList[i][localIndex.y] = newData;
}

GroupMemoryBarrierWithGroupSync();

// sort task based on primitive id
if (localIndex.x == 0 && cb.neeCacheArgs.triangleExplorationProbability > 0)
{
// Sort candidates based on their triangle IDs, which is in the y component.
sortTask(localIndex.y);

int count = s_candidateCount[localIndex.y];

uint totalValue = 0;
for (int i = 0; i < count; i++)
{
uint2 data = s_candidateList[i][localIndex.y];
uint newRange = (data.y & 0xff);
uint newPrefixSumID = (data.y >> 8);
uint newValue = data.x;
totalValue += newValue;
}
uint maxMergeValue = totalValue * 0.1;
int oldSurfaceID = -1;
int oldPrimitiveID = -1;
int oldPrefixSumID = -1;
int oldRange = 0;
int oldIndex = -1;
int oldValue = 0;
for (int i = 0; i < count; i++)
{
uint2 data = s_candidateList[i][localIndex.y];
uint newRange = (data.y & 0xff);
uint newPrefixSumID = (data.y >> 8);
uint newValue = data.x;

int newSurfaceID, newPrimitiveID;
NEECacheUtils.convertPrefixSumIDToID(newPrefixSumID, PrimitiveIDPrefixSum, newSurfaceID, newPrimitiveID);
int primitiveCount = NEECacheUtils.getSurfacePrimitiveCount(newSurfaceID, PrimitiveIDPrefixSum);
int gab = min(cb.neeCacheArgs.triangleExplorationMaxRange, cb.neeCacheArgs.triangleExplorationRangeRatio * primitiveCount) *
cb.neeCacheArgs.triangleExplorationAcceptRangeRatio + 1;
if (newSurfaceID == oldSurfaceID && oldPrimitiveID < newPrimitiveID + newRange + gab && oldPrimitiveID + oldRange - newPrimitiveID < 255 &&
oldValue + newValue < maxMergeValue)
{
// Merge
oldRange = oldPrimitiveID + oldRange - newPrimitiveID;
oldPrefixSumID = newPrefixSumID;
oldPrimitiveID = newPrimitiveID;
oldValue += newValue;
s_candidateList[oldIndex][localIndex.y] = uint2((oldRange << 24) | oldPrefixSumID, oldValue);
newValue = 0;
newRange = 0;
}
else
{
// New entry
oldSurfaceID = newSurfaceID;
oldPrimitiveID = newPrimitiveID;
oldPrefixSumID = newPrefixSumID;
oldRange = newRange;
oldIndex = i;
oldValue = newValue;
}

s_candidateList[i][localIndex.y] = uint2((newRange << 24) | newPrefixSumID, newValue);
}
}

GroupMemoryBarrierWithGroupSync();
Expand All @@ -248,6 +352,16 @@ void updateTriangleTask(NEECell cell, uint2 localIndex)
}
GroupMemoryBarrierWithGroupSync();

// Force every candidate contain only 1 triangle, if triangle exploration is disabled.
if (cb.neeCacheArgs.triangleExplorationProbability == 0.0)
{
uint2 task = s_candidateList[localIndex.x][localIndex.y];
task.x = (task.x & 0xffffff) | (1 << 24);

s_candidateList[localIndex.x][localIndex.y].x = task.x;
GroupMemoryBarrierWithGroupSync();
}

cell.setSlotTaskValue(localIndex.x, s_candidateList[localIndex.x][localIndex.y]);

if (localIndex.x == 0)
Expand All @@ -264,7 +378,7 @@ void updateTriangleTask(NEECell cell, uint2 localIndex)
uint2 task = s_candidateList[candidateID][localIndex.y];
int surfaceID, primitiveID;
NEECacheUtils.convertPrefixSumIDToID(task.x & 0xffffff, PrimitiveIDPrefixSum, surfaceID, primitiveID);
NEECandidate candidate = NEECandidate.create(surfaceID, primitiveID);
NEECandidate candidate = NEECandidate.create(surfaceID, primitiveID, max(1, task.x >> 24));

candidate.setSampleProbability(s_candidateLight[candidateID][localIndex.y]);
cell.setCandidate(candidateID, candidate);
Expand All @@ -286,17 +400,18 @@ void updateTriangleSample(NEECell cell, uint2 localIndex, uint3 threadIndex)
{
float16_t spreadAngle = float16_t(cb.screenSpacePixelSpreadHalfAngle);
float16_t coneRadius = 0.0;
vec2 uv = vec2(getNextSampleBlueNoise(rng), getNextSampleBlueNoise(rng));
vec3 uvw = vec3(getNextSampleBlueNoise(rng), getNextSampleBlueNoise(rng), getNextSampleBlueNoise(rng));
float area;
int primitiveID = candidate.getPrimitiveID();
LightSample lightSample = NEECacheUtils.calculateLightSampleFromCandidate(
candidate.getSurfaceID(), candidate.getPrimitiveID(), PrimitiveIDPrefixSum, uv, lightObjectPdf,
candidate.getSurfaceID(), primitiveID, candidate.getRange(), PrimitiveIDPrefixSum, uvw, lightObjectPdf,
/*cellCenter*/ 0, coneRadius, spreadAngle, area, false, kFootprintFromTextureCoordDiff);

sample.position = lightSample.position;
sample.normal = lightSample.normal;
sample.pdf = lightSample.solidAnglePdf;
sample.radiance = lightSample.radiance;
sample.triangleID = NEECacheUtils.convertIDToPrefixSumID(candidate.getSurfaceID(), candidate.getPrimitiveID(), PrimitiveIDPrefixSum);
sample.triangleID = NEECacheUtils.convertIDToPrefixSumID(candidate.getSurfaceID(), primitiveID, PrimitiveIDPrefixSum);
}
cell.setSample(i, sample);
}
Expand Down

0 comments on commit f012e38

Please sign in to comment.