Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
#version 450
#if SUBGROUPS
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x_id = 0) in;
#else
layout(local_size_x = 32) in;
#endif
#define CLUSTERER_BINDLESS
#include "clusterer_data.h"
layout(std140, set = 1, binding = 0) uniform ClustererParameters
{
ClustererParametersBindless parameters;
};
const uint MAX_TRIANGLES = 8;
struct CullSetup
{
vec4 data[4 * MAX_TRIANGLES];
};
layout(std430, set = 0, binding = 0) readonly buffer ClusterTransforms
{
ClustererBindlessTransforms cluster_transforms;
};
layout(std430, set = 0, binding = 2) readonly buffer CullingSetup
{
CullSetup data[];
} culling_setup;
layout(std430, set = 0, binding = 3) writeonly buffer Bitmask
{
uint bitmask[];
};
bool test_point_light(vec2 uv, vec2 uv_stride, uint light_index)
{
bool result;
vec4 screen_bb = culling_setup.data[light_index].data[0];
vec4 transformed_ranges = culling_setup.data[light_index].data[1];
vec4 clip_transform = culling_setup.data[light_index].data[2];
vec4 ellipsis_inv_radius = culling_setup.data[light_index].data[3];
mat2 clip_transform_mat = mat2(clip_transform.xy, clip_transform.zw);
if (ellipsis_inv_radius.x != 0.0)
{
vec2 intersection_center = 0.5 * (transformed_ranges.xz + transformed_ranges.yw);
vec2 clip_lo = uv;
vec2 clip_hi = uv + uv_stride;
clip_lo *= parameters.clip_scale.zw;
clip_hi *= parameters.clip_scale.zw;
vec2 dist_00 = clip_transform_mat * vec2(clip_lo.x, clip_lo.y) - intersection_center;
vec2 dist_01 = clip_transform_mat * vec2(clip_lo.x, clip_hi.y) - intersection_center;
vec2 dist_10 = clip_transform_mat * vec2(clip_hi.x, clip_lo.y) - intersection_center;
vec2 dist_11 = clip_transform_mat * vec2(clip_hi.x, clip_hi.y) - intersection_center;
dist_00 *= ellipsis_inv_radius.yz;
dist_01 *= ellipsis_inv_radius.yz;
dist_10 *= ellipsis_inv_radius.yz;
dist_11 *= ellipsis_inv_radius.yz;
float max_diag = max(distance(dist_00, dist_11), distance(dist_01, dist_10));
float min_sq_dist = 1.0 + max_diag;
min_sq_dist *= min_sq_dist;
vec4 d = vec4(
dot(dist_00, dist_00),
dot(dist_01, dist_01),
dot(dist_10, dist_10),
dot(dist_11, dist_11));
result = all(lessThan(d, vec4(min_sq_dist)));
}
else
{
result = all(bvec4(greaterThan(uv + uv_stride, screen_bb.xy), lessThan(uv, screen_bb.zw)));
}
return result;
}
bool test_spot_light(vec2 uv, vec2 uv_stride, uint light_index)
{
bool result;
uint num_triangles = floatBitsToUint(culling_setup.data[light_index].data[0].w);
if (num_triangles <= MAX_TRIANGLES)
{
result = false;
for (uint i = 0; i < num_triangles; i++)
{
vec4 screen_bb = culling_setup.data[light_index].data[4u * i + 3u];
if (all(bvec4(greaterThan(uv + uv_stride, screen_bb.xy), lessThan(uv, screen_bb.zw))))
{
vec3 base = culling_setup.data[light_index].data[4u * i].xyz;
vec3 dx = culling_setup.data[light_index].data[4u * i + 1u].xyz;
vec3 dy = culling_setup.data[light_index].data[4u * i + 2u].xyz;
base += dx * uv.x;
base += dy * uv.y;
base += mix(vec3(0.0), uv_stride.x * dx, greaterThan(dx, vec3(0.0)));
base += mix(vec3(0.0), uv_stride.y * dy, greaterThan(dy, vec3(0.0)));
if (all(greaterThan(base, vec3(0.0))))
{
result = true;
break;
}
}
}
}
else
result = true;
return result;
}
#if !SUBGROUPS
shared uint shared_mask;
#endif
void main()
{
#if SUBGROUPS
uvec2 TILE_SIZE = uvec2(8u, gl_SubgroupSize >> 3u);
uint local_index = gl_SubgroupInvocationID;
uvec2 tile = gl_WorkGroupID.yz;
tile.y = tile.y * gl_NumSubgroups + gl_SubgroupID;
uint chunk_index = gl_WorkGroupID.x;
vec2 tile_uv = 2.0 * vec2(tile * TILE_SIZE) * parameters.inv_resolution_xy - 1.0;
vec2 tile_uv_stride = (2.0 * vec2(TILE_SIZE)) * parameters.inv_resolution_xy;
bool passed;
uint type_mask = cluster_transforms.type_mask[chunk_index];
if (local_index < 32u)
{
bool point = (type_mask & (1u << local_index)) != 0u;
uint light_index = 32u * chunk_index + local_index;
if (point)
passed = test_point_light(tile_uv, tile_uv_stride, light_index);
else
passed = test_spot_light(tile_uv, tile_uv_stride, light_index);
}
else
passed = false;
uint ballot = subgroupBallot(passed).x;
uvec2 pixel = tile * TILE_SIZE + uvec2(local_index & 7u, local_index >> 3u);
vec2 uv = 2.0 * vec2(pixel) * parameters.inv_resolution_xy - 1.0;
vec2 uv_stride = 2.0 * parameters.inv_resolution_xy;
uint pixel_mask = 0u;
while (ballot != 0u)
{
int lsb = findLSB(ballot);
ballot &= ~uint(1 << lsb);
uint light_index = chunk_index * 32u + lsb;
bool point = (type_mask & uint(1 << lsb)) != 0u;
if (point)
passed = test_point_light(uv, uv_stride, light_index);
else
passed = test_spot_light(uv, uv_stride, light_index);
if (passed)
pixel_mask |= uint(1 << lsb);
}
uint linear_coord = pixel.y * parameters.resolution_xy.x + pixel.x;
uint base_index = linear_coord * parameters.num_lights_32;
bitmask[base_index + chunk_index] = pixel_mask;
#else
uvec2 pixel = gl_WorkGroupID.yz;
uint linear_coord = pixel.y * parameters.resolution_xy.x + pixel.x;
uint base_index = linear_coord * parameters.num_lights_32;
uint chunk_index = gl_WorkGroupID.x;
uint type_mask = cluster_transforms.type_mask[chunk_index];
uint local_index = gl_LocalInvocationIndex;
if (local_index == 0u)
shared_mask = 0u;
vec2 uv = 2.0 * vec2(pixel) * parameters.inv_resolution_xy - 1.0;
vec2 uv_stride = 2.0 * parameters.inv_resolution_xy;
barrier();
uint light_index = chunk_index * 32u + local_index;
if (light_index < parameters.num_lights)
{
if ((type_mask & (1u << local_index)) != 0u)
{
if (test_point_light(uv, uv_stride, light_index))
atomicOr(shared_mask, 1u << local_index);
}
else
{
if (test_spot_light(uv, uv_stride, light_index))
atomicOr(shared_mask, 1u << local_index);
}
}
barrier();
if (local_index == 0u)
bitmask[base_index + chunk_index] = shared_mask;
#endif
}