Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
216 lines (178 sloc)
6.54 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #version 450 | |
| #if SUBGROUPS | |
| #extension GL_KHR_shader_subgroup_ballot : require | |
| layout(local_size_x_id = 0) in; | |
| #else | |
| layout(local_size_x = 32) in; | |
| #endif | |
| #define CLUSTERER_BINDLESS | |
| #include "clusterer_data.h" | |
| layout(std140, set = 1, binding = 0) uniform ClustererParameters | |
| { | |
| ClustererParametersBindless parameters; | |
| }; | |
| const uint MAX_TRIANGLES = 8; | |
| struct CullSetup | |
| { | |
| vec4 data[4 * MAX_TRIANGLES]; | |
| }; | |
| layout(std430, set = 0, binding = 0) readonly buffer ClusterTransforms | |
| { | |
| ClustererBindlessTransforms cluster_transforms; | |
| }; | |
| layout(std430, set = 0, binding = 2) readonly buffer CullingSetup | |
| { | |
| CullSetup data[]; | |
| } culling_setup; | |
| layout(std430, set = 0, binding = 3) writeonly buffer Bitmask | |
| { | |
| uint bitmask[]; | |
| }; | |
| bool test_point_light(vec2 uv, vec2 uv_stride, uint light_index) | |
| { | |
| bool result; | |
| vec4 screen_bb = culling_setup.data[light_index].data[0]; | |
| vec4 transformed_ranges = culling_setup.data[light_index].data[1]; | |
| vec4 clip_transform = culling_setup.data[light_index].data[2]; | |
| vec4 ellipsis_inv_radius = culling_setup.data[light_index].data[3]; | |
| mat2 clip_transform_mat = mat2(clip_transform.xy, clip_transform.zw); | |
| if (ellipsis_inv_radius.x != 0.0) | |
| { | |
| vec2 intersection_center = 0.5 * (transformed_ranges.xz + transformed_ranges.yw); | |
| vec2 clip_lo = uv; | |
| vec2 clip_hi = uv + uv_stride; | |
| clip_lo *= parameters.clip_scale.zw; | |
| clip_hi *= parameters.clip_scale.zw; | |
| vec2 dist_00 = clip_transform_mat * vec2(clip_lo.x, clip_lo.y) - intersection_center; | |
| vec2 dist_01 = clip_transform_mat * vec2(clip_lo.x, clip_hi.y) - intersection_center; | |
| vec2 dist_10 = clip_transform_mat * vec2(clip_hi.x, clip_lo.y) - intersection_center; | |
| vec2 dist_11 = clip_transform_mat * vec2(clip_hi.x, clip_hi.y) - intersection_center; | |
| dist_00 *= ellipsis_inv_radius.yz; | |
| dist_01 *= ellipsis_inv_radius.yz; | |
| dist_10 *= ellipsis_inv_radius.yz; | |
| dist_11 *= ellipsis_inv_radius.yz; | |
| float max_diag = max(distance(dist_00, dist_11), distance(dist_01, dist_10)); | |
| float min_sq_dist = 1.0 + max_diag; | |
| min_sq_dist *= min_sq_dist; | |
| vec4 d = vec4( | |
| dot(dist_00, dist_00), | |
| dot(dist_01, dist_01), | |
| dot(dist_10, dist_10), | |
| dot(dist_11, dist_11)); | |
| result = all(lessThan(d, vec4(min_sq_dist))); | |
| } | |
| else | |
| { | |
| result = all(bvec4(greaterThan(uv + uv_stride, screen_bb.xy), lessThan(uv, screen_bb.zw))); | |
| } | |
| return result; | |
| } | |
| bool test_spot_light(vec2 uv, vec2 uv_stride, uint light_index) | |
| { | |
| bool result; | |
| uint num_triangles = floatBitsToUint(culling_setup.data[light_index].data[0].w); | |
| if (num_triangles <= MAX_TRIANGLES) | |
| { | |
| result = false; | |
| for (uint i = 0; i < num_triangles; i++) | |
| { | |
| vec4 screen_bb = culling_setup.data[light_index].data[4u * i + 3u]; | |
| if (all(bvec4(greaterThan(uv + uv_stride, screen_bb.xy), lessThan(uv, screen_bb.zw)))) | |
| { | |
| vec3 base = culling_setup.data[light_index].data[4u * i].xyz; | |
| vec3 dx = culling_setup.data[light_index].data[4u * i + 1u].xyz; | |
| vec3 dy = culling_setup.data[light_index].data[4u * i + 2u].xyz; | |
| base += dx * uv.x; | |
| base += dy * uv.y; | |
| base += mix(vec3(0.0), uv_stride.x * dx, greaterThan(dx, vec3(0.0))); | |
| base += mix(vec3(0.0), uv_stride.y * dy, greaterThan(dy, vec3(0.0))); | |
| if (all(greaterThan(base, vec3(0.0)))) | |
| { | |
| result = true; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| else | |
| result = true; | |
| return result; | |
| } | |
| #if !SUBGROUPS | |
| shared uint shared_mask; | |
| #endif | |
| void main() | |
| { | |
| #if SUBGROUPS | |
| uvec2 TILE_SIZE = uvec2(8u, gl_SubgroupSize >> 3u); | |
| uint local_index = gl_SubgroupInvocationID; | |
| uvec2 tile = gl_WorkGroupID.yz; | |
| tile.y = tile.y * gl_NumSubgroups + gl_SubgroupID; | |
| uint chunk_index = gl_WorkGroupID.x; | |
| vec2 tile_uv = 2.0 * vec2(tile * TILE_SIZE) * parameters.inv_resolution_xy - 1.0; | |
| vec2 tile_uv_stride = (2.0 * vec2(TILE_SIZE)) * parameters.inv_resolution_xy; | |
| bool passed; | |
| uint type_mask = cluster_transforms.type_mask[chunk_index]; | |
| if (local_index < 32u) | |
| { | |
| bool point = (type_mask & (1u << local_index)) != 0u; | |
| uint light_index = 32u * chunk_index + local_index; | |
| if (point) | |
| passed = test_point_light(tile_uv, tile_uv_stride, light_index); | |
| else | |
| passed = test_spot_light(tile_uv, tile_uv_stride, light_index); | |
| } | |
| else | |
| passed = false; | |
| uint ballot = subgroupBallot(passed).x; | |
| uvec2 pixel = tile * TILE_SIZE + uvec2(local_index & 7u, local_index >> 3u); | |
| vec2 uv = 2.0 * vec2(pixel) * parameters.inv_resolution_xy - 1.0; | |
| vec2 uv_stride = 2.0 * parameters.inv_resolution_xy; | |
| uint pixel_mask = 0u; | |
| while (ballot != 0u) | |
| { | |
| int lsb = findLSB(ballot); | |
| ballot &= ~uint(1 << lsb); | |
| uint light_index = chunk_index * 32u + lsb; | |
| bool point = (type_mask & uint(1 << lsb)) != 0u; | |
| if (point) | |
| passed = test_point_light(uv, uv_stride, light_index); | |
| else | |
| passed = test_spot_light(uv, uv_stride, light_index); | |
| if (passed) | |
| pixel_mask |= uint(1 << lsb); | |
| } | |
| uint linear_coord = pixel.y * parameters.resolution_xy.x + pixel.x; | |
| uint base_index = linear_coord * parameters.num_lights_32; | |
| bitmask[base_index + chunk_index] = pixel_mask; | |
| #else | |
| uvec2 pixel = gl_WorkGroupID.yz; | |
| uint linear_coord = pixel.y * parameters.resolution_xy.x + pixel.x; | |
| uint base_index = linear_coord * parameters.num_lights_32; | |
| uint chunk_index = gl_WorkGroupID.x; | |
| uint type_mask = cluster_transforms.type_mask[chunk_index]; | |
| uint local_index = gl_LocalInvocationIndex; | |
| if (local_index == 0u) | |
| shared_mask = 0u; | |
| vec2 uv = 2.0 * vec2(pixel) * parameters.inv_resolution_xy - 1.0; | |
| vec2 uv_stride = 2.0 * parameters.inv_resolution_xy; | |
| barrier(); | |
| uint light_index = chunk_index * 32u + local_index; | |
| if (light_index < parameters.num_lights) | |
| { | |
| if ((type_mask & (1u << local_index)) != 0u) | |
| { | |
| if (test_point_light(uv, uv_stride, light_index)) | |
| atomicOr(shared_mask, 1u << local_index); | |
| } | |
| else | |
| { | |
| if (test_spot_light(uv, uv_stride, light_index)) | |
| atomicOr(shared_mask, 1u << local_index); | |
| } | |
| } | |
| barrier(); | |
| if (local_index == 0u) | |
| bitmask[base_index + chunk_index] = shared_mask; | |
| #endif | |
| } |