Skip to content

Commit

Permalink
Begin adding glslang validation.
Browse files Browse the repository at this point in the history
  • Loading branch information
Themaister committed Oct 28, 2015
1 parent e4a0a9e commit 10b50cf
Show file tree
Hide file tree
Showing 15 changed files with 471 additions and 54 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Expand Up @@ -4,3 +4,6 @@
[submodule "muFFT"]
path = muFFT
url = git://github.com/Themaister/muFFT
[submodule "glslang"]
path = glslang
url = git://github.com/KhronosGroup/glslang
46 changes: 36 additions & 10 deletions GNUmakefile
Expand Up @@ -28,9 +28,6 @@ ifeq ($(BACKEND), glfw)
LDFLAGS += -lmufft $(shell pkg-config glfw3 --libs) -lGL
endif
GLSYM := test/glfw/glsym/rglgen.c test/glfw/glsym/glsym_gl.c

CC = clang
CXX = clang++
endif

ifeq ($(PLATFORM),win)
Expand All @@ -55,6 +52,37 @@ ifneq ($(TOOLCHAIN_PREFIX),)
CXX = $(TOOLCHAIN_PREFIX)g++
endif

GLSLANG_YACC := glslang/glslang/MachineIndependent/glslang.y
GLSLANG_YACC_TAB := glslang/glslang_tab.cpp
GLSLANG_YACC_TAB_INCLUDE := glslang/glslang_tab.cpp.h
GLSLANG_SOURCES := \
$(wildcard glslang/SPIRV/*.cpp) \
$(wildcard glslang/glslang/GenericCodeGen/*.cpp) \
$(wildcard glslang/OGLCompilersDLL/*.cpp) \
$(wildcard glslang/glslang/MachineIndependent/*.cpp) \
$(wildcard glslang/glslang/MachineIndependent/preprocessor/*.cpp) \
$(wildcard glslang/glslang/OSDependent/Linux/*.cpp) \
$(GLSLANG_YACC_TAB)

GLSLANG_OBJECTS := $(GLSLANG_SOURCES:.cpp=.o)
GLSLANG_LIB := libglslang.a
CXXFLAGS += -Iglslang/glslang/OSDependent/Linux \
-Iglslang \
-Iglslang/glslang/MachineIndependent \
-Iglslang/glslang/Public \
-Iglslang/SPIRV

LDFLAGS += -pthread

all: $(GLSLANG_YACC_TAB) build_fft_inc
@+$(MAKE) $(TARGET)

$(GLSLANG_LIB): $(GLSLANG_OBJECTS)
$(AR) rcs $@ $(GLSLANG_OBJECTS)

$(GLSLANG_YACC_TAB): $(GLSLANG_YACC)
bison --defines=$(GLSLANG_YACC_TAB_INCLUDE) -t $(GLSLANG_YACC) -o $(GLSLANG_YACC_TAB)

CXX_SOURCES := $(wildcard *.cpp) $(wildcard test/*.cpp) $(wildcard test/$(BACKEND)/*.cpp)
C_SOURCES := $(GLSYM)
OBJDIR := obj
Expand All @@ -65,11 +93,6 @@ CXXFLAGS += -Wall -Wextra -pedantic -std=c++11 $(EXTERNAL_INCLUDE_DIRS) -DGLFFT_
CFLAGS += -Wall -Wextra -std=c99 $(EXTERNAL_INCLUDE_DIRS)
LDFLAGS += $(EXTERNAL_LIB_DIRS) -lm


all: $(TARGET)

glfft.cpp: build_fft_inc

build_fft_inc:
$(MAKE) -C glsl

Expand All @@ -78,8 +101,8 @@ build_fft_inc:
muFFT/libmufft.a:
$(MAKE) -C muFFT static PLATFORM=$(PLATFORM) TOOLCHAIN_PREFIX=$(TOOLCHAIN_PREFIX)

$(TARGET): $(OBJECTS) $(MUFFT_LIB)
$(CXX) -o $@ $(OBJECTS) $(LDFLAGS) $(EXTERNAL_LIBS)
$(TARGET): $(OBJECTS) $(MUFFT_LIB) $(GLSLANG_LIB)
$(CXX) -o $@ $(OBJECTS) $(LDFLAGS) $(EXTERNAL_LIBS) $(GLSLANG_LIB)

$(OBJDIR)/%.o: %.cpp
@mkdir -p $(dir $@)
Expand All @@ -94,5 +117,8 @@ clean:
$(MAKE) -C muFFT clean PLATFORM=$(PLATFORM)
$(MAKE) -C glsl clean
rm -f muFFT/libmufft.a
rm -f $(GLSLANG_LIB)
rm -f $(GLSLANG_YACC_TAB)
rm -f $(GLSLANG_YACC_TAB_INCLUDE)

.PHONY: clean
34 changes: 18 additions & 16 deletions glfft.cpp
Expand Up @@ -56,6 +56,17 @@ struct Radix
bool shared_banked;
};

static unsigned next_pow2(unsigned v)
{
v--;
v |= v >> 16;
v |= v >> 8;
v |= v >> 4;
v |= v >> 2;
v |= v >> 1;
return v + 1;
}

static void reduce(unsigned &wg_size, unsigned &divisor)
{
if (divisor > 1 && wg_size >= divisor)
Expand Down Expand Up @@ -427,7 +438,6 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
input_target,
output_target,
p == 1,
false,
res.shared_banked,
options.type.fp16, options.type.input_fp16, options.type.output_fp16,
options.type.normalize,
Expand All @@ -443,6 +453,7 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
params,
res.num_workgroups_x, res.num_workgroups_y,
uv_scale_x,
next_pow2(res.num_workgroups_x * params.workgroup_size_x),
get_program(params),
};

Expand Down Expand Up @@ -596,10 +607,6 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
unsigned p = 1;
unsigned i = 0;

// If we have R2C or C2R, we have a padded buffer to accomodate 2^n + 1 elements horizontally.
// For simplicity, this is implemented as a shader variant.
bool pow2_stride = expand && modes[index] == Vertical;

for (auto &radix : radix_direction)
{
// If this is the last pass and we're writing to an image, use a special shader variant.
Expand All @@ -622,7 +629,6 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
in_target,
out_target,
p == 1,
pow2_stride,
radix.shared_banked,
options.type.fp16, input_fp16, options.type.output_fp16,
options.type.normalize,
Expand All @@ -632,6 +638,7 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
params,
radix.num_workgroups_x, radix.num_workgroups_y,
uv_scale_x,
next_pow2(radix.num_workgroups_x * params.workgroup_size_x),
get_program(params),
};

Expand Down Expand Up @@ -671,7 +678,6 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
out_target,
true,
false,
false,
base_opts.type.fp16, base_opts.type.input_fp16, base_opts.type.output_fp16,
base_opts.type.normalize,
};
Expand All @@ -681,6 +687,7 @@ FFT::FFT(Context *context, unsigned Nx, unsigned Ny,
Nx / res.size.x,
Ny / res.size.y,
uv_scale_x,
next_pow2(Nx),
get_program(params),
};

Expand Down Expand Up @@ -726,7 +733,6 @@ unique_ptr<Program> FFT::build_program(const Parameters &params)
" Mode: %u\n"
" InTarget: %u\n"
" OutTarget: %u\n"
" POW2: %u\n"
" FP16: %u\n"
" InFP16: %u\n"
" OutFP16: %u\n"
Expand All @@ -740,7 +746,6 @@ unique_ptr<Program> FFT::build_program(const Parameters &params)
params.mode,
params.input_target,
params.output_target,
params.pow2_stride,
params.fft_fp16,
params.input_fp16,
params.output_fp16,
Expand All @@ -752,11 +757,6 @@ unique_ptr<Program> FFT::build_program(const Parameters &params)
str += "#define FFT_P1\n";
}

if (params.pow2_stride)
{
str += "#define FFT_POW2_STRIDE\n";
}

if (params.fft_fp16)
{
str += "#define FFT_FP16\n";
Expand Down Expand Up @@ -1022,7 +1022,8 @@ void FFT::process(CommandBuffer *cmd, Resource *output, Resource *input, Resourc
struct FFTConstantData
{
uint32_t p;
uint32_t padding[3];
uint32_t stride;
uint32_t padding[2];
float offset_x, offset_y;
float scale_x, scale_y;
};
Expand All @@ -1042,6 +1043,7 @@ void FFT::process(CommandBuffer *cmd, Resource *output, Resource *input, Resourc

FFTConstantData constant_data;
constant_data.p = p;
constant_data.stride = pass.stride;
p *= pass.parameters.radix;

if (pass.parameters.input_target != SSBO)
Expand Down Expand Up @@ -1125,7 +1127,7 @@ void FFT::process(CommandBuffer *cmd, Resource *output, Resource *input, Resourc
// so let barrier decisions be up to the API user.
if (pass_index + 1 < passes.size())
{
cmd->barrier(static_cast<Buffer*>(buffers[0]));
cmd->barrier(static_cast<Buffer*>(buffers[1]));
}

if (pass_index == 0)
Expand Down
1 change: 1 addition & 0 deletions glfft.hpp
Expand Up @@ -186,6 +186,7 @@ class FFT
unsigned workgroups_x;
unsigned workgroups_y;
unsigned uv_scale_x;
unsigned stride;
Program *program;
};

Expand Down
1 change: 0 additions & 1 deletion glfft_common.hpp
Expand Up @@ -93,7 +93,6 @@ struct Parameters
Target input_target;
Target output_target;
bool p1;
bool pow2_stride;
bool shared_banked;
bool fft_fp16, input_fp16, output_fp16;
bool fft_normalize;
Expand Down
6 changes: 6 additions & 0 deletions glfft_gl_interface.cpp
Expand Up @@ -17,6 +17,7 @@
*/

#include "glfft_gl_interface.hpp"
#include "glfft_validate.hpp"
#include <cstdarg>
#include <cstring>
#include <vector>
Expand Down Expand Up @@ -133,6 +134,11 @@ unique_ptr<Buffer> GLContext::create_buffer(const void *initial_data, size_t siz

unique_ptr<Program> GLContext::compile_compute_shader(const char *source)
{
#ifdef GLFFT_GL_DEBUG
//if (!validate_glsl_source(source))
// return nullptr;
#endif

GLuint program = glCreateProgram();
if (!program)
{
Expand Down
20 changes: 2 additions & 18 deletions glsl/fft_common.comp
Expand Up @@ -22,9 +22,10 @@ precision mediump float;

layout(std140, binding = 0) uniform UBO
{
uvec4 p;
uvec4 p_stride_padding;
vec4 texture_offset_scale;
} constant_data;
#define uStride constant_data.p_stride_padding.y

// cfloat is the "generic" type used to hold complex data.
// GLFFT supports vec2, vec4 and "vec8" for its complex data
Expand Down Expand Up @@ -831,20 +832,3 @@ void FFT_complex_to_real(uvec2 i)
}
#endif

// For vertical transforms with real-to-complex and complex-to-real,
// our transform space is N / 2 + 1, but we pad the arrays to N anways to avoid weird,
// unaligned strides.
uint next_pow2(uint x)
{
return 1u << uint(findMSB(x - 1u) + 1);
}

uint get_stride(uint samples)
{
#ifdef FFT_POW2_STRIDE
return next_pow2(samples);
#else
return samples;
#endif
}

2 changes: 1 addition & 1 deletion glsl/fft_main.comp
Expand Up @@ -21,7 +21,7 @@
// Used to compute twiddle factors.

#ifndef FFT_P1
#define uP constant_data.p.x
#define uP constant_data.p_stride_padding.x
#endif

#if FFT_RADIX == 4
Expand Down
4 changes: 2 additions & 2 deletions glsl/fft_radix16.comp
Expand Up @@ -113,7 +113,7 @@ void FFT16_horiz(uvec2 i, uint p)
void FFT16_p1_vert(uvec2 i)
{
uvec2 quarter_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
uint stride = get_stride(quarter_samples.x);
uint stride = uStride;
uint y_stride = stride * quarter_samples.y;
uint offset = stride * i.y;

Expand Down Expand Up @@ -151,7 +151,7 @@ void FFT16_p1_vert(uvec2 i)
void FFT16_vert(uvec2 i, uint p)
{
uvec2 quarter_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
uint stride = get_stride(quarter_samples.x);
uint stride = uStride;
uint y_stride = stride * quarter_samples.y;
uint offset = stride * i.y;

Expand Down
4 changes: 2 additions & 2 deletions glsl/fft_radix4.comp
Expand Up @@ -80,7 +80,7 @@ void FFT4_p1_horiz(uvec2 i)
void FFT4_p1_vert(uvec2 i)
{
uvec2 quarter_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
uint stride = get_stride(quarter_samples.x);
uint stride = uStride;
uint y_stride = stride * quarter_samples.y;
uint offset = stride * i.y;

Expand Down Expand Up @@ -135,7 +135,7 @@ void FFT4_horiz(uvec2 i, uint p)
void FFT4_vert(uvec2 i, uint p)
{
uvec2 quarter_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
uint stride = get_stride(quarter_samples.x);
uint stride = uStride;
uint y_stride = stride * quarter_samples.y;
uint offset = stride * i.y;

Expand Down
4 changes: 2 additions & 2 deletions glsl/fft_radix64.comp
Expand Up @@ -121,7 +121,7 @@ void FFT64_horiz(uvec2 i, uint p)
void FFT64_p1_vert(uvec2 i)
{
uvec2 octa_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
uint stride = get_stride(octa_samples.x);
uint stride = uStride;
uint y_stride = stride * octa_samples.y;
uint offset = stride * i.y;

Expand Down Expand Up @@ -172,7 +172,7 @@ void FFT64_p1_vert(uvec2 i)
void FFT64_vert(uvec2 i, uint p)
{
uvec2 octa_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
uint stride = get_stride(octa_samples.x);
uint stride = uStride;
uint y_stride = stride * octa_samples.y;
uint offset = stride * i.y;

Expand Down
4 changes: 2 additions & 2 deletions glsl/fft_radix8.comp
Expand Up @@ -124,7 +124,7 @@ void FFT8_p1_horiz(uvec2 i)
void FFT8_p1_vert(uvec2 i)
{
uvec2 octa_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
uint stride = get_stride(octa_samples.x);
uint stride = uStride;
uint y_stride = stride * octa_samples.y;
uint offset = stride * i.y;

Expand Down Expand Up @@ -205,7 +205,7 @@ void FFT8_horiz(uvec2 i, uint p)
void FFT8_vert(uvec2 i, uint p)
{
uvec2 octa_samples = gl_NumWorkGroups.xy * gl_WorkGroupSize.xy;
uint stride = get_stride(octa_samples.x);
uint stride = uStride;
uint y_stride = stride * octa_samples.y;
uint offset = stride * i.y;

Expand Down
1 change: 1 addition & 0 deletions glslang
Submodule glslang added at 40f6f6

0 comments on commit 10b50cf

Please sign in to comment.