Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
#version 450
/* Copyright (c) 2020-2022 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
#include "bitextract.h"
layout(set = 0, binding = 0, rgba8ui) writeonly uniform uimage2D uOutput;
layout(set = 0, binding = 1) uniform utexture2D uInput;
layout(push_constant) uniform Registers
{
ivec2 resolution;
} registers;
ivec2 build_coord()
{
ivec2 base = ivec2(gl_WorkGroupID.xy) * 8;
base.x += 4 * (int(gl_LocalInvocationID.z) & 1);
base.y += 2 * (int(gl_LocalInvocationID.z) & 2);
base += ivec2(gl_LocalInvocationID.xy);
return base;
}
const int weight_table2[4] = int[](0, 21, 43, 64);
const int weight_table3[8] = int[](0, 9, 18, 27, 37, 46, 55, 64);
const int weight_table4[16] = int[](0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64);
#define P3(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
(((a) << 0) | ((b) << 2) | ((c) << 4) | ((d) << 6) | \
((e) << 8) | ((f) << 10) | ((g) << 12) | ((h) << 14) | \
((i) << 16) | ((j) << 18) | ((k) << 20) | ((l) << 22) | \
((m) << 24) | ((n) << 26) | ((o) << 28) | ((p) << 30))
const int partition_table3[64] = int[](
P3(0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2),
P3(0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1),
P3(0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1),
P3(0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1),
P3(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2),
P3(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2),
P3(0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1),
P3(0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1),
P3(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2),
P3(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2),
P3(0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2),
P3(0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2),
P3(0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2),
P3(0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2),
P3(0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2),
P3(0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0),
P3(0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2),
P3(0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0),
P3(0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2),
P3(0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1),
P3(0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2),
P3(0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1),
P3(0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2),
P3(0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0),
P3(0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0),
P3(0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2),
P3(0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0),
P3(0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1),
P3(0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2),
P3(0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2),
P3(0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1),
P3(0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1),
P3(0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2),
P3(0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1),
P3(0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2),
P3(0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0),
P3(0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0),
P3(0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0),
P3(0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0),
P3(0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1),
P3(0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1),
P3(0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2),
P3(0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1),
P3(0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2),
P3(0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1),
P3(0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1),
P3(0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1),
P3(0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1),
P3(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2),
P3(0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1),
P3(0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2),
P3(0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2),
P3(0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2),
P3(0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2),
P3(0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2),
P3(0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2),
P3(0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2),
P3(0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2),
P3(0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2),
P3(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2),
P3(0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1),
P3(0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2),
P3(0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
P3(0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0));
#define P2(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
(((a) << 0) | ((b) << 1) | ((c) << 2) | ((d) << 3) | \
((e) << 4) | ((f) << 5) | ((g) << 6) | ((h) << 7) | \
((i) << 8) | ((j) << 9) | ((k) << 10) | ((l) << 11) | \
((m) << 12) | ((n) << 13) | ((o) << 14) | ((p) << 15))
const int partition_table2[64] = int[](
P2(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1),
P2(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1),
P2(0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1),
P2(0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1),
P2(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1),
P2(0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1),
P2(0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1),
P2(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1),
P2(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1),
P2(0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
P2(0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1),
P2(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1),
P2(0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
P2(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1),
P2(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
P2(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1),
P2(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1),
P2(0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
P2(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0),
P2(0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0),
P2(0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
P2(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0),
P2(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0),
P2(0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1),
P2(0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0),
P2(0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0),
P2(0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0),
P2(0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0),
P2(0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0),
P2(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0),
P2(0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0),
P2(0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0),
P2(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1),
P2(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1),
P2(0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0),
P2(0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0),
P2(0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0),
P2(0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0),
P2(0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1),
P2(0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1),
P2(0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0),
P2(0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0),
P2(0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0),
P2(0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0),
P2(0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0),
P2(0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1),
P2(0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1),
P2(0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0),
P2(0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0),
P2(0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0),
P2(0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0),
P2(0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0),
P2(0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1),
P2(0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1),
P2(0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0),
P2(0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0),
P2(0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1),
P2(0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1),
P2(0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1),
P2(0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1),
P2(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1),
P2(0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0),
P2(0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0),
P2(0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1));
const int anchor_table2[64] = int[](
15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15,
15, 2, 8, 2, 2, 8, 8, 15,
2, 8, 2, 2, 8, 8, 2, 2,
15, 15, 6, 8, 2, 8, 15, 15,
2, 8, 2, 2, 2, 15, 15, 6,
6, 2, 6, 8, 15, 15, 2, 2,
15, 15, 15, 15, 15, 2, 2, 15);
const ivec2 anchor_table3[64] = ivec2[](
ivec2(3, 15), ivec2(3, 8), ivec2(15, 8), ivec2(15, 3), ivec2(8, 15), ivec2(3, 15), ivec2(15, 3), ivec2(15, 8),
ivec2(8, 15), ivec2(8, 15), ivec2(6, 15), ivec2(6, 15), ivec2(6, 15), ivec2(5, 15), ivec2(3, 15), ivec2(3, 8),
ivec2(3, 15), ivec2(3, 8), ivec2(8, 15), ivec2(15, 3), ivec2(3, 15), ivec2(3, 8), ivec2(6, 15), ivec2(10, 8),
ivec2(5, 3), ivec2(8, 15), ivec2(8, 6), ivec2(6, 10), ivec2(8, 15), ivec2(5, 15), ivec2(15, 10), ivec2(15, 8),
ivec2(8, 15), ivec2(15, 3), ivec2(3, 15), ivec2(5, 10), ivec2(6, 10), ivec2(10, 8), ivec2(8, 9), ivec2(15, 10),
ivec2(15, 6), ivec2(3, 15), ivec2(15, 8), ivec2(5, 15), ivec2(15, 3), ivec2(15, 6), ivec2(15, 6), ivec2(15, 8),
ivec2(3, 15), ivec2(15, 3), ivec2(5, 15), ivec2(5, 15), ivec2(5, 15), ivec2(8, 15), ivec2(5, 15), ivec2(10, 15),
ivec2(5, 15), ivec2(10, 15), ivec2(8, 15), ivec2(13, 15), ivec2(15, 3), ivec2(12, 15), ivec2(3, 15), ivec2(3, 8)
);
struct DecodedInterpolation
{
uvec4 ep0, ep1;
uint color_weight;
uint alpha_weight;
uint rotation;
};
DecodedInterpolation decode_bc7_mode0(uvec4 payload, int linear_pixel)
{
int part_index = extract_bits(payload, 1, 4);
int part = (partition_table3[part_index] >> (2 * linear_pixel)) & 3;
int bit_offset = part * 8;
int r0 = extract_bits(payload, 5 + bit_offset, 4);
int r1 = extract_bits(payload, 9 + bit_offset, 4);
int g0 = extract_bits(payload, 29 + bit_offset, 4);
int g1 = extract_bits(payload, 33 + bit_offset, 4);
int b0 = extract_bits(payload, 53 + bit_offset, 4);
int b1 = extract_bits(payload, 57 + bit_offset, 4);
int sep0 = extract_bits(payload, 77 + part * 2, 1);
int sep1 = extract_bits(payload, 78 + part * 2, 1);
ivec2 anchor_pixels = anchor_table3[part_index];
int index = extract_bits(
payload,
max(82 + linear_pixel * 3 - int(linear_pixel > anchor_pixels.x) - int(linear_pixel > anchor_pixels.y), 83),
(linear_pixel == anchor_pixels.y || linear_pixel == anchor_pixels.x || linear_pixel == 0) ? 2 : 3);
ivec3 rgb0 = ivec3(r0, g0, b0);
ivec3 rgb1 = ivec3(r1, g1, b1);
rgb0 = (rgb0 << 4) | (sep0 << 3) | (rgb0 >> 1);
rgb1 = (rgb1 << 4) | (sep1 << 3) | (rgb1 >> 1);
int w = weight_table3[index];
return DecodedInterpolation(uvec4(rgb0, 0xff), uvec4(rgb1, 0xff), w, w, 0);
}
DecodedInterpolation decode_bc7_mode1(uvec4 payload, int linear_pixel)
{
int part_index = extract_bits(payload, 2, 6);
int part = (partition_table2[part_index] >> linear_pixel) & 1;
int bit_offset = part * 12;
int r0 = extract_bits(payload, 8 + bit_offset, 6);
int r1 = extract_bits(payload, 14 + bit_offset, 6);
int g0 = extract_bits(payload, 32 + bit_offset, 6);
int g1 = extract_bits(payload, 38 + bit_offset, 6);
int b0 = extract_bits(payload, 56 + bit_offset, 6);
int b1 = extract_bits(payload, 62 + bit_offset, 6);
int sep = extract_bits(payload, 80 + part, 1) << 1;
int anchor_pixel = anchor_table2[part_index];
int index = extract_bits(
payload,
max(81 + linear_pixel * 3 - int(linear_pixel > anchor_pixel), 82),
(linear_pixel == anchor_pixel || linear_pixel == 0) ? 2 : 3);
ivec3 rgb0 = ivec3(r0, g0, b0);
ivec3 rgb1 = ivec3(r1, g1, b1);
rgb0 = (rgb0 << 2) | sep | (rgb0 >> 5);
rgb1 = (rgb1 << 2) | sep | (rgb1 >> 5);
int w = weight_table3[index];
return DecodedInterpolation(uvec4(rgb0, 0xff), uvec4(rgb1, 0xff), w, w, 0);
}
DecodedInterpolation decode_bc7_mode2(uvec4 payload, int linear_pixel)
{
int part_index = extract_bits(payload, 3, 6);
int part = (partition_table3[part_index] >> (2 * linear_pixel)) & 3;
int bit_offset = part * 10;
int r0 = extract_bits(payload, 9 + bit_offset, 5);
int r1 = extract_bits(payload, 14 + bit_offset, 5);
int g0 = extract_bits(payload, 39 + bit_offset, 5);
int g1 = extract_bits(payload, 44 + bit_offset, 5);
int b0 = extract_bits(payload, 69 + bit_offset, 5);
int b1 = extract_bits(payload, 74 + bit_offset, 5);
ivec2 anchor_pixels = anchor_table3[part_index];
int index = extract_bits(
payload,
max(98 + linear_pixel * 2 - int(linear_pixel > anchor_pixels.x) - int(linear_pixel > anchor_pixels.y), 99),
(linear_pixel == anchor_pixels.y || linear_pixel == anchor_pixels.x || linear_pixel == 0) ? 1 : 2);
ivec3 rgb0 = ivec3(r0, g0, b0);
ivec3 rgb1 = ivec3(r1, g1, b1);
rgb0 = (rgb0 << 3) | (rgb0 >> 2);
rgb1 = (rgb1 << 3) | (rgb1 >> 2);
int w = weight_table2[index];
return DecodedInterpolation(uvec4(rgb0, 0xff), uvec4(rgb1, 0xff), w, w, 0);
}
DecodedInterpolation decode_bc7_mode3(uvec4 payload, int linear_pixel)
{
int part_index = extract_bits(payload, 4, 6);
int part = (partition_table2[part_index] >> linear_pixel) & 1;
int bit_offset = part * 14;
int r0 = extract_bits(payload, 10 + bit_offset, 7);
int r1 = extract_bits(payload, 17 + bit_offset, 7);
int g0 = extract_bits(payload, 38 + bit_offset, 7);
int g1 = extract_bits(payload, 45 + bit_offset, 7);
int b0 = extract_bits(payload, 66 + bit_offset, 7);
int b1 = extract_bits(payload, 73 + bit_offset, 7);
int sep0 = extract_bits(payload, 94 + part * 2, 1);
int sep1 = extract_bits(payload, 95 + part * 2, 1);
int anchor_pixel = anchor_table2[part_index];
int index = extract_bits(
payload,
max(97 + linear_pixel * 2 - int(linear_pixel > anchor_pixel), 98),
(linear_pixel == anchor_pixel || linear_pixel == 0) ? 1 : 2);
ivec3 rgb0 = ivec3(r0, g0, b0);
ivec3 rgb1 = ivec3(r1, g1, b1);
rgb0 = (rgb0 << 1) | sep0;
rgb1 = (rgb1 << 1) | sep1;
int w = weight_table2[index];
return DecodedInterpolation(uvec4(rgb0, 0xff), uvec4(rgb1, 0xff), w, w, 0);
}
DecodedInterpolation decode_bc7_mode4(uvec4 payload, int linear_pixel)
{
int rot = extract_bits(payload, 5, 2);
bool isb = (payload.x & 0x80u) != 0u;
int r0 = extract_bits(payload, 8, 5);
int r1 = extract_bits(payload, 13, 5);
int g0 = extract_bits(payload, 18, 5);
int g1 = extract_bits(payload, 23, 5);
int b0 = extract_bits(payload, 28, 5);
int b1 = extract_bits(payload, 33, 5);
int a0 = extract_bits(payload, 38, 6);
int a1 = extract_bits(payload, 44, 6);
int primary_index = extract_bits(
payload,
max(49 + linear_pixel * 2, 50),
linear_pixel == 0 ? 1 : 2);
int secondary_index = extract_bits(
payload,
max(80 + linear_pixel * 3, 81),
linear_pixel == 0 ? 2 : 3);
int color_weight = weight_table2[primary_index];
int alpha_weight = weight_table3[secondary_index];
if (isb)
{
int tmp = color_weight;
color_weight = alpha_weight;
alpha_weight = tmp;
}
ivec3 rgb0 = ivec3(r0, g0, b0);
ivec3 rgb1 = ivec3(r1, g1, b1);
rgb0 = (rgb0 << 3) | (rgb0 >> 2);
rgb1 = (rgb1 << 3) | (rgb1 >> 2);
a0 = (a0 << 2) | (a0 >> 4);
a1 = (a1 << 2) | (a1 >> 4);
return DecodedInterpolation(uvec4(rgb0, a0), uvec4(rgb1, a1), color_weight, alpha_weight, rot);
}
DecodedInterpolation decode_bc7_mode5(uvec4 payload, int linear_pixel)
{
int rot = extract_bits(payload, 6, 2);
int r0 = extract_bits(payload, 8, 7);
int r1 = extract_bits(payload, 15, 7);
int g0 = extract_bits(payload, 22, 7);
int g1 = extract_bits(payload, 29, 7);
int b0 = extract_bits(payload, 36, 7);
int b1 = extract_bits(payload, 43, 7);
int a0 = extract_bits(payload, 50, 8);
int a1 = extract_bits(payload, 58, 8);
int primary_index = extract_bits(
payload,
max(65 + linear_pixel * 2, 66),
linear_pixel == 0 ? 1 : 2);
int secondary_index = extract_bits(
payload,
max(96 + linear_pixel * 2, 97),
linear_pixel == 0 ? 1 : 2);
int color_weight = weight_table2[primary_index];
int alpha_weight = weight_table2[secondary_index];
ivec3 rgb0 = ivec3(r0, g0, b0);
ivec3 rgb1 = ivec3(r1, g1, b1);
rgb0 = (rgb0 << 1) | (rgb0 >> 6);
rgb1 = (rgb1 << 1) | (rgb1 >> 6);
return DecodedInterpolation(uvec4(rgb0, a0), uvec4(rgb1, a1), color_weight, alpha_weight, rot);
}
DecodedInterpolation decode_bc7_mode6(uvec4 payload, int linear_pixel)
{
int sep0 = extract_bits(payload, 63, 1);
int sep1 = extract_bits(payload, 64, 1);
int r0 = extract_bits(payload, 7, 7);
int r1 = extract_bits(payload, 14, 7);
int g0 = extract_bits(payload, 21, 7);
int g1 = extract_bits(payload, 28, 7);
int b0 = extract_bits(payload, 35, 7);
int b1 = extract_bits(payload, 42, 7);
int a0 = extract_bits(payload, 49, 7);
int a1 = extract_bits(payload, 56, 7);
ivec4 ep0 = ivec4(r0, g0, b0, a0) * 2 + sep0;
ivec4 ep1 = ivec4(r1, g1, b1, a1) * 2 + sep1;
int index = extract_bits(
payload,
max(64 + linear_pixel * 4, 65),
linear_pixel == 0 ? 3 : 4);
int w = weight_table4[index];
return DecodedInterpolation(ep0, ep1, w, w, 0);
}
DecodedInterpolation decode_bc7_mode7(uvec4 payload, int linear_pixel)
{
int part_index = extract_bits(payload, 8, 6);
int part = (partition_table2[part_index] >> linear_pixel) & 1;
int bit_offset = part * 10;
int r0 = extract_bits(payload, 14 + bit_offset, 5);
int r1 = extract_bits(payload, 19 + bit_offset, 5);
int g0 = extract_bits(payload, 34 + bit_offset, 5);
int g1 = extract_bits(payload, 39 + bit_offset, 5);
int b0 = extract_bits(payload, 54 + bit_offset, 5);
int b1 = extract_bits(payload, 59 + bit_offset, 5);
int a0 = extract_bits(payload, 74 + bit_offset, 5);
int a1 = extract_bits(payload, 79 + bit_offset, 5);
int sep0 = extract_bits(payload, 94 + part * 2, 1);
int sep1 = extract_bits(payload, 95 + part * 2, 1);
int anchor_pixel = anchor_table2[part_index];
int index = extract_bits(
payload,
max(97 + linear_pixel * 2 - int(linear_pixel > anchor_pixel), 98),
(linear_pixel == anchor_pixel || linear_pixel == 0) ? 1 : 2);
ivec4 rgba0 = ivec4(r0, g0, b0, a0);
ivec4 rgba1 = ivec4(r1, g1, b1, a1);
rgba0 = (rgba0 << 3) | (rgba0 >> 3) | (sep0 << 2);
rgba1 = (rgba1 << 3) | (rgba1 >> 3) | (sep1 << 2);
int w = weight_table2[index];
return DecodedInterpolation(rgba0, rgba1, w, w, 0);
}
uvec4 interpolate_endpoint(DecodedInterpolation interp)
{
uvec3 rgb = (((64u - interp.color_weight) * interp.ep0.rgb + interp.color_weight * interp.ep1.rgb + 32) >> 6);
uint a = (((64u - interp.alpha_weight) * interp.ep0.a + interp.alpha_weight * interp.ep1.a + 32) >> 6);
uvec4 rgba = uvec4(rgb, a);
switch (interp.rotation)
{
default:
break;
case 1u:
rgba = rgba.agbr;
break;
case 2u:
rgba = rgba.rabg;
break;
case 3u:
rgba = rgba.rgab;
break;
}
return rgba;
}
void main()
{
ivec2 coord = build_coord();
if (any(greaterThanEqual(coord, registers.resolution)))
return;
ivec2 tile_coord = coord >> 2;
ivec2 pixel_coord = coord & 3;
int linear_pixel = 4 * pixel_coord.y + pixel_coord.x;
uvec4 payload = texelFetch(uInput, tile_coord, 0);
DecodedInterpolation interp;
int mode = findLSB(payload.x);
switch (mode)
{
case 0:
interp = decode_bc7_mode0(payload, linear_pixel);
break;
case 1:
interp = decode_bc7_mode1(payload, linear_pixel);
break;
case 2:
interp = decode_bc7_mode2(payload, linear_pixel);
break;
case 3:
interp = decode_bc7_mode3(payload, linear_pixel);
break;
case 4:
interp = decode_bc7_mode4(payload, linear_pixel);
break;
case 5:
interp = decode_bc7_mode5(payload, linear_pixel);
break;
case 6:
interp = decode_bc7_mode6(payload, linear_pixel);
break;
case 7:
interp = decode_bc7_mode7(payload, linear_pixel);
break;
default:
interp = DecodedInterpolation(uvec4(0), uvec4(0), 0, 0, 0);
break;
}
uvec4 rgba_result = interpolate_endpoint(interp);
imageStore(uOutput, coord, rgba_result);
}