Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
567 lines (481 sloc)
21.9 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #version 450 | |
| /* Copyright (c) 2020-2022 Hans-Kristian Arntzen | |
| * | |
| * Permission is hereby granted, free of charge, to any person obtaining | |
| * a copy of this software and associated documentation files (the | |
| * "Software"), to deal in the Software without restriction, including | |
| * without limitation the rights to use, copy, modify, merge, publish, | |
| * distribute, sublicense, and/or sell copies of the Software, and to | |
| * permit persons to whom the Software is furnished to do so, subject to | |
| * the following conditions: | |
| * | |
| * The above copyright notice and this permission notice shall be | |
| * included in all copies or substantial portions of the Software. | |
| * | |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
| * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
| * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
| */ | |
| #extension GL_EXT_samplerless_texture_functions : require | |
| layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; | |
| #include "bitextract.h" | |
| layout(set = 0, binding = 0, rgba8ui) writeonly uniform uimage2D uOutput; | |
| layout(set = 0, binding = 1) uniform utexture2D uInput; | |
| layout(push_constant) uniform Registers | |
| { | |
| ivec2 resolution; | |
| } registers; | |
| ivec2 build_coord() | |
| { | |
| ivec2 base = ivec2(gl_WorkGroupID.xy) * 8; | |
| base.x += 4 * (int(gl_LocalInvocationID.z) & 1); | |
| base.y += 2 * (int(gl_LocalInvocationID.z) & 2); | |
| base += ivec2(gl_LocalInvocationID.xy); | |
| return base; | |
| } | |
| const int weight_table2[4] = int[](0, 21, 43, 64); | |
| const int weight_table3[8] = int[](0, 9, 18, 27, 37, 46, 55, 64); | |
| const int weight_table4[16] = int[](0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64); | |
| #define P3(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ | |
| (((a) << 0) | ((b) << 2) | ((c) << 4) | ((d) << 6) | \ | |
| ((e) << 8) | ((f) << 10) | ((g) << 12) | ((h) << 14) | \ | |
| ((i) << 16) | ((j) << 18) | ((k) << 20) | ((l) << 22) | \ | |
| ((m) << 24) | ((n) << 26) | ((o) << 28) | ((p) << 30)) | |
| const int partition_table3[64] = int[]( | |
| P3(0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2), | |
| P3(0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1), | |
| P3(0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1), | |
| P3(0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1), | |
| P3(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2), | |
| P3(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2), | |
| P3(0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1), | |
| P3(0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1), | |
| P3(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2), | |
| P3(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2), | |
| P3(0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2), | |
| P3(0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2), | |
| P3(0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2), | |
| P3(0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2), | |
| P3(0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2), | |
| P3(0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0), | |
| P3(0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2), | |
| P3(0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0), | |
| P3(0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2), | |
| P3(0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1), | |
| P3(0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2), | |
| P3(0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1), | |
| P3(0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2), | |
| P3(0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0), | |
| P3(0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0), | |
| P3(0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2), | |
| P3(0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0), | |
| P3(0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1), | |
| P3(0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2), | |
| P3(0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2), | |
| P3(0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1), | |
| P3(0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1), | |
| P3(0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2), | |
| P3(0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1), | |
| P3(0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2), | |
| P3(0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0), | |
| P3(0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0), | |
| P3(0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0), | |
| P3(0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0), | |
| P3(0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1), | |
| P3(0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1), | |
| P3(0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2), | |
| P3(0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1), | |
| P3(0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2), | |
| P3(0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1), | |
| P3(0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1), | |
| P3(0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1), | |
| P3(0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1), | |
| P3(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2), | |
| P3(0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1), | |
| P3(0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2), | |
| P3(0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2), | |
| P3(0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2), | |
| P3(0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2), | |
| P3(0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2), | |
| P3(0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2), | |
| P3(0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2), | |
| P3(0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2), | |
| P3(0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2), | |
| P3(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2), | |
| P3(0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1), | |
| P3(0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2), | |
| P3(0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), | |
| P3(0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0)); | |
| #define P2(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ | |
| (((a) << 0) | ((b) << 1) | ((c) << 2) | ((d) << 3) | \ | |
| ((e) << 4) | ((f) << 5) | ((g) << 6) | ((h) << 7) | \ | |
| ((i) << 8) | ((j) << 9) | ((k) << 10) | ((l) << 11) | \ | |
| ((m) << 12) | ((n) << 13) | ((o) << 14) | ((p) << 15)) | |
| const int partition_table2[64] = int[]( | |
| P2(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1), | |
| P2(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1), | |
| P2(0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1), | |
| P2(0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1), | |
| P2(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1), | |
| P2(0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1), | |
| P2(0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1), | |
| P2(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1), | |
| P2(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1), | |
| P2(0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), | |
| P2(0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1), | |
| P2(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1), | |
| P2(0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), | |
| P2(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1), | |
| P2(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), | |
| P2(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1), | |
| P2(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1), | |
| P2(0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), | |
| P2(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0), | |
| P2(0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0), | |
| P2(0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), | |
| P2(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0), | |
| P2(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0), | |
| P2(0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1), | |
| P2(0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0), | |
| P2(0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0), | |
| P2(0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0), | |
| P2(0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0), | |
| P2(0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0), | |
| P2(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0), | |
| P2(0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0), | |
| P2(0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0), | |
| P2(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1), | |
| P2(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1), | |
| P2(0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0), | |
| P2(0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0), | |
| P2(0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0), | |
| P2(0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0), | |
| P2(0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1), | |
| P2(0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1), | |
| P2(0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0), | |
| P2(0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0), | |
| P2(0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0), | |
| P2(0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0), | |
| P2(0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0), | |
| P2(0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1), | |
| P2(0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1), | |
| P2(0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0), | |
| P2(0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0), | |
| P2(0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0), | |
| P2(0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0), | |
| P2(0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0), | |
| P2(0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1), | |
| P2(0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1), | |
| P2(0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0), | |
| P2(0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0), | |
| P2(0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1), | |
| P2(0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1), | |
| P2(0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1), | |
| P2(0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1), | |
| P2(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1), | |
| P2(0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0), | |
| P2(0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0), | |
| P2(0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1)); | |
| const int anchor_table2[64] = int[]( | |
| 15, 15, 15, 15, 15, 15, 15, 15, | |
| 15, 15, 15, 15, 15, 15, 15, 15, | |
| 15, 2, 8, 2, 2, 8, 8, 15, | |
| 2, 8, 2, 2, 8, 8, 2, 2, | |
| 15, 15, 6, 8, 2, 8, 15, 15, | |
| 2, 8, 2, 2, 2, 15, 15, 6, | |
| 6, 2, 6, 8, 15, 15, 2, 2, | |
| 15, 15, 15, 15, 15, 2, 2, 15); | |
| const ivec2 anchor_table3[64] = ivec2[]( | |
| ivec2(3, 15), ivec2(3, 8), ivec2(15, 8), ivec2(15, 3), ivec2(8, 15), ivec2(3, 15), ivec2(15, 3), ivec2(15, 8), | |
| ivec2(8, 15), ivec2(8, 15), ivec2(6, 15), ivec2(6, 15), ivec2(6, 15), ivec2(5, 15), ivec2(3, 15), ivec2(3, 8), | |
| ivec2(3, 15), ivec2(3, 8), ivec2(8, 15), ivec2(15, 3), ivec2(3, 15), ivec2(3, 8), ivec2(6, 15), ivec2(10, 8), | |
| ivec2(5, 3), ivec2(8, 15), ivec2(8, 6), ivec2(6, 10), ivec2(8, 15), ivec2(5, 15), ivec2(15, 10), ivec2(15, 8), | |
| ivec2(8, 15), ivec2(15, 3), ivec2(3, 15), ivec2(5, 10), ivec2(6, 10), ivec2(10, 8), ivec2(8, 9), ivec2(15, 10), | |
| ivec2(15, 6), ivec2(3, 15), ivec2(15, 8), ivec2(5, 15), ivec2(15, 3), ivec2(15, 6), ivec2(15, 6), ivec2(15, 8), | |
| ivec2(3, 15), ivec2(15, 3), ivec2(5, 15), ivec2(5, 15), ivec2(5, 15), ivec2(8, 15), ivec2(5, 15), ivec2(10, 15), | |
| ivec2(5, 15), ivec2(10, 15), ivec2(8, 15), ivec2(13, 15), ivec2(15, 3), ivec2(12, 15), ivec2(3, 15), ivec2(3, 8) | |
| ); | |
| struct DecodedInterpolation | |
| { | |
| uvec4 ep0, ep1; | |
| uint color_weight; | |
| uint alpha_weight; | |
| uint rotation; | |
| }; | |
| DecodedInterpolation decode_bc7_mode0(uvec4 payload, int linear_pixel) | |
| { | |
| int part_index = extract_bits(payload, 1, 4); | |
| int part = (partition_table3[part_index] >> (2 * linear_pixel)) & 3; | |
| int bit_offset = part * 8; | |
| int r0 = extract_bits(payload, 5 + bit_offset, 4); | |
| int r1 = extract_bits(payload, 9 + bit_offset, 4); | |
| int g0 = extract_bits(payload, 29 + bit_offset, 4); | |
| int g1 = extract_bits(payload, 33 + bit_offset, 4); | |
| int b0 = extract_bits(payload, 53 + bit_offset, 4); | |
| int b1 = extract_bits(payload, 57 + bit_offset, 4); | |
| int sep0 = extract_bits(payload, 77 + part * 2, 1); | |
| int sep1 = extract_bits(payload, 78 + part * 2, 1); | |
| ivec2 anchor_pixels = anchor_table3[part_index]; | |
| int index = extract_bits( | |
| payload, | |
| max(82 + linear_pixel * 3 - int(linear_pixel > anchor_pixels.x) - int(linear_pixel > anchor_pixels.y), 83), | |
| (linear_pixel == anchor_pixels.y || linear_pixel == anchor_pixels.x || linear_pixel == 0) ? 2 : 3); | |
| ivec3 rgb0 = ivec3(r0, g0, b0); | |
| ivec3 rgb1 = ivec3(r1, g1, b1); | |
| rgb0 = (rgb0 << 4) | (sep0 << 3) | (rgb0 >> 1); | |
| rgb1 = (rgb1 << 4) | (sep1 << 3) | (rgb1 >> 1); | |
| int w = weight_table3[index]; | |
| return DecodedInterpolation(uvec4(rgb0, 0xff), uvec4(rgb1, 0xff), w, w, 0); | |
| } | |
| DecodedInterpolation decode_bc7_mode1(uvec4 payload, int linear_pixel) | |
| { | |
| int part_index = extract_bits(payload, 2, 6); | |
| int part = (partition_table2[part_index] >> linear_pixel) & 1; | |
| int bit_offset = part * 12; | |
| int r0 = extract_bits(payload, 8 + bit_offset, 6); | |
| int r1 = extract_bits(payload, 14 + bit_offset, 6); | |
| int g0 = extract_bits(payload, 32 + bit_offset, 6); | |
| int g1 = extract_bits(payload, 38 + bit_offset, 6); | |
| int b0 = extract_bits(payload, 56 + bit_offset, 6); | |
| int b1 = extract_bits(payload, 62 + bit_offset, 6); | |
| int sep = extract_bits(payload, 80 + part, 1) << 1; | |
| int anchor_pixel = anchor_table2[part_index]; | |
| int index = extract_bits( | |
| payload, | |
| max(81 + linear_pixel * 3 - int(linear_pixel > anchor_pixel), 82), | |
| (linear_pixel == anchor_pixel || linear_pixel == 0) ? 2 : 3); | |
| ivec3 rgb0 = ivec3(r0, g0, b0); | |
| ivec3 rgb1 = ivec3(r1, g1, b1); | |
| rgb0 = (rgb0 << 2) | sep | (rgb0 >> 5); | |
| rgb1 = (rgb1 << 2) | sep | (rgb1 >> 5); | |
| int w = weight_table3[index]; | |
| return DecodedInterpolation(uvec4(rgb0, 0xff), uvec4(rgb1, 0xff), w, w, 0); | |
| } | |
| DecodedInterpolation decode_bc7_mode2(uvec4 payload, int linear_pixel) | |
| { | |
| int part_index = extract_bits(payload, 3, 6); | |
| int part = (partition_table3[part_index] >> (2 * linear_pixel)) & 3; | |
| int bit_offset = part * 10; | |
| int r0 = extract_bits(payload, 9 + bit_offset, 5); | |
| int r1 = extract_bits(payload, 14 + bit_offset, 5); | |
| int g0 = extract_bits(payload, 39 + bit_offset, 5); | |
| int g1 = extract_bits(payload, 44 + bit_offset, 5); | |
| int b0 = extract_bits(payload, 69 + bit_offset, 5); | |
| int b1 = extract_bits(payload, 74 + bit_offset, 5); | |
| ivec2 anchor_pixels = anchor_table3[part_index]; | |
| int index = extract_bits( | |
| payload, | |
| max(98 + linear_pixel * 2 - int(linear_pixel > anchor_pixels.x) - int(linear_pixel > anchor_pixels.y), 99), | |
| (linear_pixel == anchor_pixels.y || linear_pixel == anchor_pixels.x || linear_pixel == 0) ? 1 : 2); | |
| ivec3 rgb0 = ivec3(r0, g0, b0); | |
| ivec3 rgb1 = ivec3(r1, g1, b1); | |
| rgb0 = (rgb0 << 3) | (rgb0 >> 2); | |
| rgb1 = (rgb1 << 3) | (rgb1 >> 2); | |
| int w = weight_table2[index]; | |
| return DecodedInterpolation(uvec4(rgb0, 0xff), uvec4(rgb1, 0xff), w, w, 0); | |
| } | |
| DecodedInterpolation decode_bc7_mode3(uvec4 payload, int linear_pixel) | |
| { | |
| int part_index = extract_bits(payload, 4, 6); | |
| int part = (partition_table2[part_index] >> linear_pixel) & 1; | |
| int bit_offset = part * 14; | |
| int r0 = extract_bits(payload, 10 + bit_offset, 7); | |
| int r1 = extract_bits(payload, 17 + bit_offset, 7); | |
| int g0 = extract_bits(payload, 38 + bit_offset, 7); | |
| int g1 = extract_bits(payload, 45 + bit_offset, 7); | |
| int b0 = extract_bits(payload, 66 + bit_offset, 7); | |
| int b1 = extract_bits(payload, 73 + bit_offset, 7); | |
| int sep0 = extract_bits(payload, 94 + part * 2, 1); | |
| int sep1 = extract_bits(payload, 95 + part * 2, 1); | |
| int anchor_pixel = anchor_table2[part_index]; | |
| int index = extract_bits( | |
| payload, | |
| max(97 + linear_pixel * 2 - int(linear_pixel > anchor_pixel), 98), | |
| (linear_pixel == anchor_pixel || linear_pixel == 0) ? 1 : 2); | |
| ivec3 rgb0 = ivec3(r0, g0, b0); | |
| ivec3 rgb1 = ivec3(r1, g1, b1); | |
| rgb0 = (rgb0 << 1) | sep0; | |
| rgb1 = (rgb1 << 1) | sep1; | |
| int w = weight_table2[index]; | |
| return DecodedInterpolation(uvec4(rgb0, 0xff), uvec4(rgb1, 0xff), w, w, 0); | |
| } | |
| DecodedInterpolation decode_bc7_mode4(uvec4 payload, int linear_pixel) | |
| { | |
| int rot = extract_bits(payload, 5, 2); | |
| bool isb = (payload.x & 0x80u) != 0u; | |
| int r0 = extract_bits(payload, 8, 5); | |
| int r1 = extract_bits(payload, 13, 5); | |
| int g0 = extract_bits(payload, 18, 5); | |
| int g1 = extract_bits(payload, 23, 5); | |
| int b0 = extract_bits(payload, 28, 5); | |
| int b1 = extract_bits(payload, 33, 5); | |
| int a0 = extract_bits(payload, 38, 6); | |
| int a1 = extract_bits(payload, 44, 6); | |
| int primary_index = extract_bits( | |
| payload, | |
| max(49 + linear_pixel * 2, 50), | |
| linear_pixel == 0 ? 1 : 2); | |
| int secondary_index = extract_bits( | |
| payload, | |
| max(80 + linear_pixel * 3, 81), | |
| linear_pixel == 0 ? 2 : 3); | |
| int color_weight = weight_table2[primary_index]; | |
| int alpha_weight = weight_table3[secondary_index]; | |
| if (isb) | |
| { | |
| int tmp = color_weight; | |
| color_weight = alpha_weight; | |
| alpha_weight = tmp; | |
| } | |
| ivec3 rgb0 = ivec3(r0, g0, b0); | |
| ivec3 rgb1 = ivec3(r1, g1, b1); | |
| rgb0 = (rgb0 << 3) | (rgb0 >> 2); | |
| rgb1 = (rgb1 << 3) | (rgb1 >> 2); | |
| a0 = (a0 << 2) | (a0 >> 4); | |
| a1 = (a1 << 2) | (a1 >> 4); | |
| return DecodedInterpolation(uvec4(rgb0, a0), uvec4(rgb1, a1), color_weight, alpha_weight, rot); | |
| } | |
| DecodedInterpolation decode_bc7_mode5(uvec4 payload, int linear_pixel) | |
| { | |
| int rot = extract_bits(payload, 6, 2); | |
| int r0 = extract_bits(payload, 8, 7); | |
| int r1 = extract_bits(payload, 15, 7); | |
| int g0 = extract_bits(payload, 22, 7); | |
| int g1 = extract_bits(payload, 29, 7); | |
| int b0 = extract_bits(payload, 36, 7); | |
| int b1 = extract_bits(payload, 43, 7); | |
| int a0 = extract_bits(payload, 50, 8); | |
| int a1 = extract_bits(payload, 58, 8); | |
| int primary_index = extract_bits( | |
| payload, | |
| max(65 + linear_pixel * 2, 66), | |
| linear_pixel == 0 ? 1 : 2); | |
| int secondary_index = extract_bits( | |
| payload, | |
| max(96 + linear_pixel * 2, 97), | |
| linear_pixel == 0 ? 1 : 2); | |
| int color_weight = weight_table2[primary_index]; | |
| int alpha_weight = weight_table2[secondary_index]; | |
| ivec3 rgb0 = ivec3(r0, g0, b0); | |
| ivec3 rgb1 = ivec3(r1, g1, b1); | |
| rgb0 = (rgb0 << 1) | (rgb0 >> 6); | |
| rgb1 = (rgb1 << 1) | (rgb1 >> 6); | |
| return DecodedInterpolation(uvec4(rgb0, a0), uvec4(rgb1, a1), color_weight, alpha_weight, rot); | |
| } | |
| DecodedInterpolation decode_bc7_mode6(uvec4 payload, int linear_pixel) | |
| { | |
| int sep0 = extract_bits(payload, 63, 1); | |
| int sep1 = extract_bits(payload, 64, 1); | |
| int r0 = extract_bits(payload, 7, 7); | |
| int r1 = extract_bits(payload, 14, 7); | |
| int g0 = extract_bits(payload, 21, 7); | |
| int g1 = extract_bits(payload, 28, 7); | |
| int b0 = extract_bits(payload, 35, 7); | |
| int b1 = extract_bits(payload, 42, 7); | |
| int a0 = extract_bits(payload, 49, 7); | |
| int a1 = extract_bits(payload, 56, 7); | |
| ivec4 ep0 = ivec4(r0, g0, b0, a0) * 2 + sep0; | |
| ivec4 ep1 = ivec4(r1, g1, b1, a1) * 2 + sep1; | |
| int index = extract_bits( | |
| payload, | |
| max(64 + linear_pixel * 4, 65), | |
| linear_pixel == 0 ? 3 : 4); | |
| int w = weight_table4[index]; | |
| return DecodedInterpolation(ep0, ep1, w, w, 0); | |
| } | |
| DecodedInterpolation decode_bc7_mode7(uvec4 payload, int linear_pixel) | |
| { | |
| int part_index = extract_bits(payload, 8, 6); | |
| int part = (partition_table2[part_index] >> linear_pixel) & 1; | |
| int bit_offset = part * 10; | |
| int r0 = extract_bits(payload, 14 + bit_offset, 5); | |
| int r1 = extract_bits(payload, 19 + bit_offset, 5); | |
| int g0 = extract_bits(payload, 34 + bit_offset, 5); | |
| int g1 = extract_bits(payload, 39 + bit_offset, 5); | |
| int b0 = extract_bits(payload, 54 + bit_offset, 5); | |
| int b1 = extract_bits(payload, 59 + bit_offset, 5); | |
| int a0 = extract_bits(payload, 74 + bit_offset, 5); | |
| int a1 = extract_bits(payload, 79 + bit_offset, 5); | |
| int sep0 = extract_bits(payload, 94 + part * 2, 1); | |
| int sep1 = extract_bits(payload, 95 + part * 2, 1); | |
| int anchor_pixel = anchor_table2[part_index]; | |
| int index = extract_bits( | |
| payload, | |
| max(97 + linear_pixel * 2 - int(linear_pixel > anchor_pixel), 98), | |
| (linear_pixel == anchor_pixel || linear_pixel == 0) ? 1 : 2); | |
| ivec4 rgba0 = ivec4(r0, g0, b0, a0); | |
| ivec4 rgba1 = ivec4(r1, g1, b1, a1); | |
| rgba0 = (rgba0 << 3) | (rgba0 >> 3) | (sep0 << 2); | |
| rgba1 = (rgba1 << 3) | (rgba1 >> 3) | (sep1 << 2); | |
| int w = weight_table2[index]; | |
| return DecodedInterpolation(rgba0, rgba1, w, w, 0); | |
| } | |
| uvec4 interpolate_endpoint(DecodedInterpolation interp) | |
| { | |
| uvec3 rgb = (((64u - interp.color_weight) * interp.ep0.rgb + interp.color_weight * interp.ep1.rgb + 32) >> 6); | |
| uint a = (((64u - interp.alpha_weight) * interp.ep0.a + interp.alpha_weight * interp.ep1.a + 32) >> 6); | |
| uvec4 rgba = uvec4(rgb, a); | |
| switch (interp.rotation) | |
| { | |
| default: | |
| break; | |
| case 1u: | |
| rgba = rgba.agbr; | |
| break; | |
| case 2u: | |
| rgba = rgba.rabg; | |
| break; | |
| case 3u: | |
| rgba = rgba.rgab; | |
| break; | |
| } | |
| return rgba; | |
| } | |
| void main() | |
| { | |
| ivec2 coord = build_coord(); | |
| if (any(greaterThanEqual(coord, registers.resolution))) | |
| return; | |
| ivec2 tile_coord = coord >> 2; | |
| ivec2 pixel_coord = coord & 3; | |
| int linear_pixel = 4 * pixel_coord.y + pixel_coord.x; | |
| uvec4 payload = texelFetch(uInput, tile_coord, 0); | |
| DecodedInterpolation interp; | |
| int mode = findLSB(payload.x); | |
| switch (mode) | |
| { | |
| case 0: | |
| interp = decode_bc7_mode0(payload, linear_pixel); | |
| break; | |
| case 1: | |
| interp = decode_bc7_mode1(payload, linear_pixel); | |
| break; | |
| case 2: | |
| interp = decode_bc7_mode2(payload, linear_pixel); | |
| break; | |
| case 3: | |
| interp = decode_bc7_mode3(payload, linear_pixel); | |
| break; | |
| case 4: | |
| interp = decode_bc7_mode4(payload, linear_pixel); | |
| break; | |
| case 5: | |
| interp = decode_bc7_mode5(payload, linear_pixel); | |
| break; | |
| case 6: | |
| interp = decode_bc7_mode6(payload, linear_pixel); | |
| break; | |
| case 7: | |
| interp = decode_bc7_mode7(payload, linear_pixel); | |
| break; | |
| default: | |
| interp = DecodedInterpolation(uvec4(0), uvec4(0), 0, 0, 0); | |
| break; | |
| } | |
| uvec4 rgba_result = interpolate_endpoint(interp); | |
| imageStore(uOutput, coord, rgba_result); | |
| } |