Begin adding glslang validation.

Themaister · Oct 28, 2015 · 10b50cf · 10b50cf
1 parent e4a0a9e
commit 10b50cf
Show file tree

Hide file tree

Showing 15 changed files with 471 additions and 54 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "muFFT"]
 	path = muFFT
 	url = git://github.com/Themaister/muFFT
+[submodule "glslang"]
+	path = glslang
+	url = git://github.com/KhronosGroup/glslang
diff --git a/GNUmakefile b/GNUmakefile
@@ -28,9 +28,6 @@ ifeq ($(BACKEND), glfw)
 		LDFLAGS += -lmufft $(shell pkg-config glfw3 --libs) -lGL
 	endif
 	GLSYM := test/glfw/glsym/rglgen.c test/glfw/glsym/glsym_gl.c
-
-	CC = clang
-	CXX = clang++
 endif
 
 ifeq ($(PLATFORM),win)
@@ -55,6 +52,37 @@ ifneq ($(TOOLCHAIN_PREFIX),)
 	CXX = $(TOOLCHAIN_PREFIX)g++
 endif
 
+GLSLANG_YACC := glslang/glslang/MachineIndependent/glslang.y
+GLSLANG_YACC_TAB := glslang/glslang_tab.cpp
+GLSLANG_YACC_TAB_INCLUDE := glslang/glslang_tab.cpp.h
+GLSLANG_SOURCES := \
+	$(wildcard glslang/SPIRV/*.cpp) \
+	$(wildcard glslang/glslang/GenericCodeGen/*.cpp) \
+	$(wildcard glslang/OGLCompilersDLL/*.cpp) \
+	$(wildcard glslang/glslang/MachineIndependent/*.cpp) \
+	$(wildcard glslang/glslang/MachineIndependent/preprocessor/*.cpp) \
+	$(wildcard glslang/glslang/OSDependent/Linux/*.cpp) \
+	$(GLSLANG_YACC_TAB)
+
+GLSLANG_OBJECTS := $(GLSLANG_SOURCES:.cpp=.o)
+GLSLANG_LIB := libglslang.a
+CXXFLAGS += -Iglslang/glslang/OSDependent/Linux \
+			-Iglslang \
+			-Iglslang/glslang/MachineIndependent \
+			-Iglslang/glslang/Public \
+			-Iglslang/SPIRV
+
+LDFLAGS += -pthread
+
+all: $(GLSLANG_YACC_TAB) build_fft_inc
+	@+$(MAKE) $(TARGET)
+
+$(GLSLANG_LIB): $(GLSLANG_OBJECTS)
+	$(AR) rcs $@ $(GLSLANG_OBJECTS)
+
+$(GLSLANG_YACC_TAB): $(GLSLANG_YACC)
+	bison --defines=$(GLSLANG_YACC_TAB_INCLUDE) -t $(GLSLANG_YACC) -o $(GLSLANG_YACC_TAB)
+
 CXX_SOURCES := $(wildcard *.cpp) $(wildcard test/*.cpp) $(wildcard test/$(BACKEND)/*.cpp)
 C_SOURCES := $(GLSYM)
 OBJDIR := obj
@@ -65,11 +93,6 @@ CXXFLAGS += -Wall -Wextra -pedantic -std=c++11 $(EXTERNAL_INCLUDE_DIRS) -DGLFFT_
 CFLAGS += -Wall -Wextra -std=c99 $(EXTERNAL_INCLUDE_DIRS)
 LDFLAGS += $(EXTERNAL_LIB_DIRS) -lm
 
-
-all: $(TARGET)
-
-glfft.cpp: build_fft_inc
-
 build_fft_inc:
 	$(MAKE) -C glsl
 
@@ -78,8 +101,8 @@ build_fft_inc:
 muFFT/libmufft.a:
 	$(MAKE) -C muFFT static PLATFORM=$(PLATFORM) TOOLCHAIN_PREFIX=$(TOOLCHAIN_PREFIX)
 
-$(TARGET): $(OBJECTS) $(MUFFT_LIB)
-	$(CXX) -o $@ $(OBJECTS) $(LDFLAGS) $(EXTERNAL_LIBS)
+$(TARGET): $(OBJECTS) $(MUFFT_LIB) $(GLSLANG_LIB)
+	$(CXX) -o $@ $(OBJECTS) $(LDFLAGS) $(EXTERNAL_LIBS) $(GLSLANG_LIB)
 
 $(OBJDIR)/%.o: %.cpp
 	@mkdir -p $(dir $@)
@@ -94,5 +117,8 @@ clean:
 	$(MAKE) -C muFFT clean PLATFORM=$(PLATFORM)
 	$(MAKE) -C glsl clean
 	rm -f muFFT/libmufft.a
+	rm -f $(GLSLANG_LIB)
+	rm -f $(GLSLANG_YACC_TAB)
+	rm -f $(GLSLANG_YACC_TAB_INCLUDE)
 
 .PHONY: clean
diff --git a/glfft.cpp b/glfft.cpp
@@ -56,6 +56,17 @@ struct Radix
     bool shared_banked;
 };
 
+static unsigned next_pow2(unsigned v)
+{
+    v--;
+    v |= v >> 16;
+    v |= v >> 8;
+    v |= v >> 4;
+    v |= v >> 2;
+    v |= v >> 1;
+    return v + 1;
+}
+
 static void reduce(unsigned &wg_size, unsigned &divisor)
 {
     if (divisor > 1 && wg_size >= divisor)
@@ -427,7 +438,6 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
         input_target,
         output_target,
         p == 1,
-        false,
         res.shared_banked,
         options.type.fp16, options.type.input_fp16, options.type.output_fp16,
         options.type.normalize,
@@ -443,6 +453,7 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
         params,
         res.num_workgroups_x, res.num_workgroups_y,
         uv_scale_x,
+        next_pow2(res.num_workgroups_x * params.workgroup_size_x),
         get_program(params),
     };
 
@@ -596,10 +607,6 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
         unsigned p = 1;
         unsigned i = 0;
 
-        // If we have R2C or C2R, we have a padded buffer to accomodate 2^n + 1 elements horizontally.
-        // For simplicity, this is implemented as a shader variant.
-        bool pow2_stride = expand && modes[index] == Vertical;
-
         for (auto &radix : radix_direction)
         {
             // If this is the last pass and we're writing to an image, use a special shader variant.
@@ -622,7 +629,6 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
                 in_target,
                 out_target,
                 p == 1,
-                pow2_stride,
                 radix.shared_banked,
                 options.type.fp16, input_fp16, options.type.output_fp16,
                 options.type.normalize,
@@ -632,6 +638,7 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
                 params,
                 radix.num_workgroups_x, radix.num_workgroups_y,
                 uv_scale_x,
+                next_pow2(radix.num_workgroups_x * params.workgroup_size_x),
                 get_program(params),
             };
 
@@ -671,7 +678,6 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
                 out_target,
                 true,
                 false,
-                false,
                 base_opts.type.fp16, base_opts.type.input_fp16, base_opts.type.output_fp16,
                 base_opts.type.normalize,
             };
@@ -681,6 +687,7 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
                 Nx / res.size.x,
                 Ny / res.size.y,
                 uv_scale_x,
+                next_pow2(Nx),
                 get_program(params),
             };
 
@@ -726,7 +733,6 @@ unique_ptr<Program> FFT::build_program(const Parameters &params)
             "   Mode:      %u\n"
             "   InTarget:  %u\n"
             "   OutTarget: %u\n"
-            "   POW2:      %u\n"
             "   FP16:      %u\n"
             "   InFP16:    %u\n"
             "   OutFP16:   %u\n"
@@ -740,7 +746,6 @@ unique_ptr<Program> FFT::build_program(const Parameters &params)
             params.mode,
             params.input_target,
             params.output_target,
-            params.pow2_stride,
             params.fft_fp16,
             params.input_fp16,
             params.output_fp16,
@@ -752,11 +757,6 @@ unique_ptr<Program> FFT::build_program(const Parameters &params)
         str += "#define FFT_P1\n";
     }
 
-    if (params.pow2_stride)
-    {
-        str += "#define FFT_POW2_STRIDE\n";
-    }
-
     if (params.fft_fp16)
     {
         str += "#define FFT_FP16\n";
@@ -1022,7 +1022,8 @@ void FFT::process(CommandBuffer *cmd, Resource *output, Resource *input, Resourc
     struct FFTConstantData
     {
         uint32_t p;
-        uint32_t padding[3];
+        uint32_t stride;
+        uint32_t padding[2];
         float offset_x, offset_y;
         float scale_x, scale_y;
     };
@@ -1042,6 +1043,7 @@ void FFT::process(CommandBuffer *cmd, Resource *output, Resource *input, Resourc
 
         FFTConstantData constant_data;
         constant_data.p = p;
+        constant_data.stride = pass.stride;
         p *= pass.parameters.radix;
 
         if (pass.parameters.input_target != SSBO)
@@ -1125,7 +1127,7 @@ void FFT::process(CommandBuffer *cmd, Resource *output, Resource *input, Resourc
         // so let barrier decisions be up to the API user.
         if (pass_index + 1 < passes.size())
         {
-            cmd->barrier(static_cast<Buffer*>(buffers[0]));
+            cmd->barrier(static_cast<Buffer*>(buffers[1]));
         }
 
         if (pass_index == 0)

diff --git a/glfft.hpp b/glfft.hpp
@@ -186,6 +186,7 @@ class FFT
             unsigned workgroups_x;
             unsigned workgroups_y;
             unsigned uv_scale_x;
+            unsigned stride;
             Program *program;
         };
 

diff --git a/glfft_common.hpp b/glfft_common.hpp
@@ -93,7 +93,6 @@ struct Parameters
     Target input_target;
     Target output_target;
     bool p1;
-    bool pow2_stride;
     bool shared_banked;
     bool fft_fp16, input_fp16, output_fp16;
     bool fft_normalize;

diff --git a/glfft_gl_interface.cpp b/glfft_gl_interface.cpp
@@ -17,6 +17,7 @@
  */
 
 #include "glfft_gl_interface.hpp"
+#include "glfft_validate.hpp"
 #include <cstdarg>
 #include <cstring>
 #include <vector>
@@ -133,6 +134,11 @@ unique_ptr<Buffer> GLContext::create_buffer(const void *initial_data, size_t siz
 
 unique_ptr<Program> GLContext::compile_compute_shader(const char *source)
 {
+#ifdef GLFFT_GL_DEBUG
+    //if (!validate_glsl_source(source))
+    //    return nullptr;
+#endif
+
     GLuint program = glCreateProgram();
     if (!program)
     {

diff --git a/glsl/fft_common.comp b/glsl/fft_common.comp
@@ -22,9 +22,10 @@ precision mediump float;
 
 layout(std140, binding = 0) uniform UBO
 {
-    uvec4 p;
+    uvec4 p_stride_padding;
     vec4 texture_offset_scale;
 } constant_data;
+#define uStride constant_data.p_stride_padding.y
 
 // cfloat is the "generic" type used to hold complex data.
 // GLFFT supports vec2, vec4 and "vec8" for its complex data
@@ -831,20 +832,3 @@ void FFT_complex_to_real(uvec2 i)
 }
 #endif
 
-// For vertical transforms with real-to-complex and complex-to-real,
-// our transform space is N / 2 + 1, but we pad the arrays to N anways to avoid weird,
-// unaligned strides.
-uint next_pow2(uint x)
-{
-    return 1u << uint(findMSB(x - 1u) + 1);
-}
-
-uint get_stride(uint samples)
-{
-#ifdef FFT_POW2_STRIDE
-    return next_pow2(samples);
-#else
-    return samples;
-#endif
-}
-
diff --git a/glsl/fft_main.comp b/glsl/fft_main.comp
@@ -21,7 +21,7 @@
 // Used to compute twiddle factors.
 
 #ifndef FFT_P1
-#define uP constant_data.p.x
+#define uP constant_data.p_stride_padding.x
 #endif
 
 #if FFT_RADIX == 4

diff --git a/glsl/fft_radix16.comp b/glsl/fft_radix16.comp
@@ -113,7 +113,7 @@ void FFT16_horiz(uvec2 i, uint p)
 void FFT16_p1_vert(uvec2 i)
 {
     uvec2 quarter_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
-    uint stride = get_stride(quarter_samples.x);
+    uint stride = uStride;
     uint y_stride = stride * quarter_samples.y;
     uint offset = stride * i.y;
 
@@ -151,7 +151,7 @@ void FFT16_p1_vert(uvec2 i)
 void FFT16_vert(uvec2 i, uint p)
 {
     uvec2 quarter_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
-    uint stride = get_stride(quarter_samples.x);
+    uint stride = uStride;
     uint y_stride = stride * quarter_samples.y;
     uint offset = stride * i.y;
 

diff --git a/glsl/fft_radix4.comp b/glsl/fft_radix4.comp
@@ -80,7 +80,7 @@ void FFT4_p1_horiz(uvec2 i)
 void FFT4_p1_vert(uvec2 i)
 {
     uvec2 quarter_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
-    uint stride = get_stride(quarter_samples.x);
+    uint stride = uStride;
     uint y_stride = stride * quarter_samples.y;
     uint offset = stride * i.y;
 
@@ -135,7 +135,7 @@ void FFT4_horiz(uvec2 i, uint p)
 void FFT4_vert(uvec2 i, uint p)
 {
     uvec2 quarter_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
-    uint stride = get_stride(quarter_samples.x);
+    uint stride = uStride;
     uint y_stride = stride * quarter_samples.y;
     uint offset = stride * i.y;
 

diff --git a/glsl/fft_radix64.comp b/glsl/fft_radix64.comp
@@ -121,7 +121,7 @@ void FFT64_horiz(uvec2 i, uint p)
 void FFT64_p1_vert(uvec2 i)
 {
     uvec2 octa_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
-    uint stride = get_stride(octa_samples.x);
+    uint stride = uStride;
     uint y_stride = stride * octa_samples.y;
     uint offset = stride * i.y;
 
@@ -172,7 +172,7 @@ void FFT64_p1_vert(uvec2 i)
 void FFT64_vert(uvec2 i, uint p)
 {
     uvec2 octa_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
-    uint stride = get_stride(octa_samples.x);
+    uint stride = uStride;
     uint y_stride = stride * octa_samples.y;
     uint offset = stride * i.y;
 

diff --git a/glsl/fft_radix8.comp b/glsl/fft_radix8.comp
@@ -124,7 +124,7 @@ void FFT8_p1_horiz(uvec2 i)
 void FFT8_p1_vert(uvec2 i)
 {
     uvec2 octa_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
-    uint stride = get_stride(octa_samples.x);
+    uint stride = uStride;
     uint y_stride = stride * octa_samples.y;
     uint offset = stride * i.y;
 
@@ -205,7 +205,7 @@ void FFT8_horiz(uvec2 i, uint p)
 void FFT8_vert(uvec2 i, uint p)
 {
     uvec2 octa_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
-    uint stride = get_stride(octa_samples.x);
+    uint stride = uStride;
     uint y_stride = stride * octa_samples.y;
     uint offset = stride * i.y;
 

diff --git a/glslang b/glslang