Permalink
Browse files

OpenGL Renderer: Do some minor performance improving tweaks.

- Most notably, fix a performance regression where polygon drawing was no longer getting batched due to an incorrect polygon-facing test. (Regression from commit dab414c.)
  • Loading branch information...
rogerman committed Dec 27, 2018
1 parent 062d9a6 commit c1357c1451f8ae947375b089d745409d82667c11
Showing with 38 additions and 37 deletions.
  1. +21 −23 desmume/src/OGLRender.cpp
  2. +12 −14 desmume/src/OGLRender_3_2.cpp
  3. +5 −0 desmume/src/render3D.cpp
@@ -54,7 +54,7 @@ typedef struct
static OGLVersion _OGLDriverVersion = {0, 0, 0};

// Lookup Tables
static CACHE_ALIGN GLfloat material_8bit_to_float[256] = {0};
static CACHE_ALIGN GLfloat material_6bit_to_float[64] = {0};
CACHE_ALIGN const GLfloat divide5bitBy31_LUT[32] = {0.0, 0.0322580645161, 0.0645161290323, 0.0967741935484,
0.1290322580645, 0.1612903225806, 0.1935483870968, 0.2258064516129,
0.2580645161290, 0.2903225806452, 0.3225806451613, 0.3548387096774,
@@ -292,7 +292,7 @@ void main() \n\
\n\
vtxPosition = inPosition; \n\
vtxTexCoord = texScaleMtx * inTexCoord0; \n\
vtxColor = vec4(inColor * 4.0, polyAlpha); \n\
vtxColor = vec4(inColor / 63.0, polyAlpha); \n\
\n\
gl_Position = vtxPosition; \n\
} \n\
@@ -330,18 +330,6 @@ void main()\n\
#endif\n\
#if ENABLE_FOG\n\
vec4 newFogAttributes = vec4(0.0, 0.0, 0.0, 0.0);\n\
#endif\n\
\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float vertW = (vtxPosition.w == 0.0) ? 0.00000001 : vtxPosition.w;\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( ( (floor(((vtxPosition.z/vertW) * 0.5 + 0.5) * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#endif\n\
\n\
if ((polyMode != 3) || polyDrawShadow)\n\
@@ -416,6 +404,16 @@ void main()\n\
gl_FragData[2] = newFogAttributes;\n\
#endif\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float vertW = (vtxPosition.w == 0.0) ? 0.00000001 : vtxPosition.w;\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( ( (floor(((vtxPosition.z/vertW) * 0.5 + 0.5) * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
\n\
gl_FragDepth = newFragDepthValue;\n\
#endif\n\
}\n\
@@ -1908,7 +1906,7 @@ size_t OpenGLRenderer::DrawPolygonsForIndexRange(const POLYLIST *polyList, const
polyPrimitive != GL_LINE_STRIP &&
oglPrimitiveType[nextPoly.vtxFormat] != GL_LINE_LOOP &&
oglPrimitiveType[nextPoly.vtxFormat] != GL_LINE_STRIP &&
this->_isPolyFrontFacing[i] != this->_isPolyFrontFacing[i+1])
this->_isPolyFrontFacing[i] == this->_isPolyFrontFacing[i+1])
{
continue;
}
@@ -2765,7 +2763,7 @@ Render3DError OpenGLRenderer_1_2::CreateVAOs()
glEnableVertexAttribArray(OGLVertexAttributeID_Color);
glVertexAttribPointer(OGLVertexAttributeID_Position, 4, GL_FLOAT, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, coord));
glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, texcoord));
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(VERT), (const GLvoid *)offsetof(VERT, color));
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, color));

glBindVertexArray(0);

@@ -3758,8 +3756,8 @@ Render3DError OpenGLRenderer_1_2::InitTables()

if (needTableInit)
{
for (size_t i = 0; i < 256; i++)
material_8bit_to_float[i] = (GLfloat)(i * 4) / 255.0f;
for (size_t i = 0; i < 63; i++)
material_6bit_to_float[i] = ((GLfloat)i * (255.0f/63.0f)) / 255.0f;

needTableInit = false;
}
@@ -3931,7 +3929,7 @@ Render3DError OpenGLRenderer_1_2::EnableVertexAttributes()
glEnableVertexAttribArray(OGLVertexAttributeID_Color);
glVertexAttribPointer(OGLVertexAttributeID_Position, 4, GL_FLOAT, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrPosition);
glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrTexCoord);
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(VERT), OGLRef.vtxPtrColor);
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrColor);
}
else
{
@@ -4356,9 +4354,9 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
// Consolidate the vertex color and the poly alpha to our internal color buffer
// so that OpenGL can use it.
const VERT *vertForAlpha = &engine.vertList[vertIndex];
OGLRef.color4fBuffer[colorIndex+0] = material_8bit_to_float[vertForAlpha->color[0]];
OGLRef.color4fBuffer[colorIndex+1] = material_8bit_to_float[vertForAlpha->color[1]];
OGLRef.color4fBuffer[colorIndex+2] = material_8bit_to_float[vertForAlpha->color[2]];
OGLRef.color4fBuffer[colorIndex+0] = material_6bit_to_float[vertForAlpha->color[0]];
OGLRef.color4fBuffer[colorIndex+1] = material_6bit_to_float[vertForAlpha->color[1]];
OGLRef.color4fBuffer[colorIndex+2] = material_6bit_to_float[vertForAlpha->color[2]];
OGLRef.color4fBuffer[colorIndex+3] = thePolyAlpha;

// While we're looping through our vertices, add each vertex index to a
@@ -5515,7 +5513,7 @@ Render3DError OpenGLRenderer_2_0::EnableVertexAttributes()
glEnableVertexAttribArray(OGLVertexAttributeID_Color);
glVertexAttribPointer(OGLVertexAttributeID_Position, 4, GL_FLOAT, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrPosition);
glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrTexCoord);
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(VERT), OGLRef.vtxPtrColor);
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrColor);
}

return OGLERROR_NOERR;
@@ -147,7 +147,7 @@ void main() \n\
\n\
vtxPosition = inPosition; \n\
vtxTexCoord = texScaleMtx * inTexCoord0; \n\
vtxColor = vec4(inColor * 4.0, polyAlpha); \n\
vtxColor = vec4(inColor / 63.0, polyAlpha); \n\
\n\
gl_Position = vtxPosition; \n\
} \n\
@@ -206,18 +206,6 @@ void main()\n\
#endif\n\
#if ENABLE_FOG\n\
vec4 newFogAttributes = vec4(0.0, 0.0, 0.0, 0.0);\n\
#endif\n\
\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float vertW = (vtxPosition.w == 0.0) ? 0.00000001 : vtxPosition.w;\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( ( (floor(((vtxPosition.z/vertW) * 0.5 + 0.5) * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#endif\n\
\n\
if ((polyMode != 3u) || polyDrawShadow)\n\
@@ -299,6 +287,16 @@ void main()\n\
outFogAttributes = newFogAttributes;\n\
#endif\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float vertW = (vtxPosition.w == 0.0) ? 0.00000001 : vtxPosition.w;\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( ( (floor(((vtxPosition.z/vertW) * 0.5 + 0.5) * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
\n\
gl_FragDepth = newFragDepthValue;\n\
#endif\n\
}\n\
@@ -1259,7 +1257,7 @@ Render3DError OpenGLRenderer_3_2::CreateVAOs()
glEnableVertexAttribArray(OGLVertexAttributeID_Color);
glVertexAttribPointer(OGLVertexAttributeID_Position, 4, GL_FLOAT, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, coord));
glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, texcoord));
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(VERT), (const GLvoid *)offsetof(VERT, color));
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, color));

glBindVertexArray(0);

@@ -244,6 +244,11 @@ Render3D::Render3D()
_textureDeposterizeSrcSurface.Height = _textureDeposterizeDstSurface.Height = 1;
_textureDeposterizeSrcSurface.Pitch = _textureDeposterizeDstSurface.Pitch = 1;

for (size_t i = 0; i < POLYLIST_SIZE; i++)
{
_textureList[i] = NULL;
}

Reset();
}

3 comments on commit c1357c1

@Jules-A

This comment has been minimized.

Copy link
Contributor

Jules-A replied Dec 28, 2018

@rogerman While you say is a minor performance increase, this commit increased FPS by up to 13% in heavy scenes for me, the largest I've seen in a long time without having to sacrifice accuracy. When dab414c was introduced it only caused a ~1.5% drop.
Thanks for your work on optimizations, it is much appreciated!

@rogerman

This comment has been minimized.

Copy link
Collaborator

rogerman replied Dec 28, 2018

@Jules-A, while YOU are getting much better performance, a lot of other users (like me) are only going to see a minor performance improvement. Why? Graphics driver differences.

That is why I said "minor performance increase", so that I wouldn't unnecessarily get users' hopes up. However, it's not outside the realm of possibility that these changes would help some graphics drivers more than others, and so I am genuinely glad that you got such a big performance increase there!

In reality, this was kind of a lucky commit, as I only saw these things due to a related side project that I'm doing at the moment. It is very possible that you may see more OpenGL renderer tweaks that occur as byproducts of this side project in the near future.

@Jules-A

This comment has been minimized.

Copy link
Contributor

Jules-A replied Dec 28, 2018

while YOU are getting much better performance, a lot of other users (like me) are only going to see a minor performance improvement. Why? _Graphics driver differences.

Yeah, I assumed that was the case. Originally I skimmed over the changes, thought "cool, a few % more fps" and forgot about it so when I did a build with heavier MSVC optimizations I was super surprised.

That is why I said "minor performance increase", so that I wouldn't unnecessarily get users' hopes up.

Good idea, every time you say anything other than minor, there is a Reddit post about the commit that gets heavily upvoted.

In reality, this was kind of a lucky commit, as I only saw these things due to a related side project that I'm doing at the moment. It is very possible that you may see more OpenGL renderer tweaks that occur as byproducts of this side project in the near future.

Sounds awesome.

Please sign in to comment.