Skip to content

Commit

Permalink
Ignore format reinterpretation hack #4089
Browse files Browse the repository at this point in the history
Fixes performance for a bunch of games (including PW:DD and PW:SoJ)
  • Loading branch information
Marocco2 committed Nov 3, 2019
1 parent 4abbad2 commit b899e83
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 5 deletions.
2 changes: 2 additions & 0 deletions src/citra/config.cpp
Expand Up @@ -124,6 +124,8 @@ void Config::ReadValues() {
Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
Settings::values.frame_limit =
static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
Settings::values.use_format_reinterpret_hack =
static_cast<u16>(sdl2_config->GetBoolean("Renderer", "use_format_reinterpret_hack", true));

Settings::values.render_3d = static_cast<Settings::StereoRenderOption>(
sdl2_config->GetInteger("Renderer", "render_3d", 0));
Expand Down
6 changes: 6 additions & 0 deletions src/citra/default_ini.h
Expand Up @@ -129,6 +129,12 @@ use_frame_limit =
# 1 - 9999: Speed limit as a percentage of target game speed. 100 (default)
frame_limit =
# Advanced option: Ignores flushing surfaces from cpu memory if the surface was created by the gpu
# and has a different format. This can speed up many games, potentially break some, but is rightfully
# just a hack as a placeholder for gpu texture encoding/decoding
# 0: Off, 1: On (default)
use_format_reinterpret_hack =
# The clear color for the renderer. What shows up on the sides of the bottom screen.
# Must be in range of 0.0-1.0. Defaults to 0.0 for all.
bg_red =
Expand Down
3 changes: 3 additions & 0 deletions src/citra_qt/configuration/config.cpp
Expand Up @@ -167,6 +167,8 @@ void Config::ReadValues() {
Settings::values.vsync_enabled = ReadSetting("vsync_enabled", false).toBool();
Settings::values.use_frame_limit = ReadSetting("use_frame_limit", true).toBool();
Settings::values.frame_limit = ReadSetting("frame_limit", 100).toInt();
Settings::values.use_format_reinterpret_hack =
ReadSetting("use_format_reinterpret_hack", true).toBool()

Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat();
Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat();
Expand Down Expand Up @@ -463,6 +465,7 @@ void Config::SaveValues() {
WriteSetting("vsync_enabled", Settings::values.vsync_enabled, false);
WriteSetting("use_frame_limit", Settings::values.use_frame_limit, true);
WriteSetting("frame_limit", Settings::values.frame_limit, 100);
WriteSetting("use_format_reinterpret_hack", Settings::values.use_format_reinterpret_hack, true);

// Cast to double because Qt's written float values are not human-readable
WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0);
Expand Down
2 changes: 2 additions & 0 deletions src/core/settings.cpp
Expand Up @@ -27,6 +27,7 @@ void Apply() {
VideoCore::g_shader_jit_enabled = values.use_shader_jit;
VideoCore::g_hw_shader_enabled = values.use_hw_shader;
VideoCore::g_hw_shader_accurate_mul = values.shaders_accurate_mul;
VideoCore::g_use_format_reinterpret_hack = values.use_format_reinterpret_hack;

if (VideoCore::g_renderer) {
VideoCore::g_renderer->UpdateCurrentFramebufferLayout();
Expand Down Expand Up @@ -78,6 +79,7 @@ void LogSettings() {
LogSetting("Renderer_VsyncEnabled", Settings::values.vsync_enabled);
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
LogSetting("Renderer_FormatReinterpretHack", Settings::values.use_format_reinterpret_hack);
LogSetting("Renderer_PostProcessingShader", Settings::values.pp_shader_name);
LogSetting("Layout_Factor3d", Settings::values.factor_3d);
LogSetting("Layout_LayoutOption", static_cast<int>(Settings::values.layout_option));
Expand Down
1 change: 1 addition & 0 deletions src/core/settings.h
Expand Up @@ -145,6 +145,7 @@ struct Values {
u16 resolution_factor;
bool vsync_enabled;
bool use_frame_limit;
bool use_format_reinterpret_hack;
u16 frame_limit;
u32 core_ticks_hack;

Expand Down
93 changes: 88 additions & 5 deletions src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
Expand Up @@ -25,6 +25,8 @@
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "core/settings.h"
#include "core/core.h"
#include "common/telemetry.h"
#include "video_core/pica_state.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
Expand Down Expand Up @@ -1604,6 +1606,45 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface,
}
}

static const char* PixelFormatAsString(SurfaceParams::PixelFormat format) {
switch (format) {
case SurfaceParams::PixelFormat::RGBA8:
return "RGBA8";
case SurfaceParams::PixelFormat::RGB8:
return "RGB8";
case SurfaceParams::PixelFormat::RGB5A1:
return "RGB5A1";
case SurfaceParams::PixelFormat::RGB565:
return "RGB565";
case SurfaceParams::PixelFormat::RGBA4:
return "RGBA4";
case SurfaceParams::PixelFormat::IA8:
return "IA8";
case SurfaceParams::PixelFormat::I8:
return "I8";
case SurfaceParams::PixelFormat::A8:
return "A8";
case SurfaceParams::PixelFormat::IA4:
return "IA4";
case SurfaceParams::PixelFormat::I4:
return "I4";
case SurfaceParams::PixelFormat::A4:
return "A4";
case SurfaceParams::PixelFormat::ETC1:
return "ETC1";
case SurfaceParams::PixelFormat::ETC1A4:
return "ETC1A4";
case SurfaceParams::PixelFormat::D16:
return "D16";
case SurfaceParams::PixelFormat::D24:
return "D24";
case SurfaceParams::PixelFormat::D24S8:
return "D24S8";
default:
return "Not a real pixel format";
}
}

void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) {
if (size == 0)
return;
Expand All @@ -1616,9 +1657,18 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr,
return;
}

auto validate_regions = surface->invalid_regions & validate_interval;
auto notify_validated = [&](SurfaceInterval interval) {
surface->invalid_regions.erase(interval);
validate_regions.erase(interval);
};

bool flushed_from_cpu = false;

std::set<u32> formats;
while (true) {
const auto it = surface->invalid_regions.find(validate_interval);
if (it == surface->invalid_regions.end())
const auto it = validate_regions.begin();
if (it == validate_regions.end())
break;

const auto interval = *it & validate_interval;
Expand All @@ -1630,7 +1680,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr,
if (copy_surface != nullptr) {
SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface);
CopySurface(copy_surface, surface, copy_interval);
surface->invalid_regions.erase(copy_interval);
notify_validated(copy_interval);
continue;
}

Expand All @@ -1650,19 +1700,52 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr,
ConvertD24S8toABGR(reinterpret_surface->texture.handle, src_rect,
surface->texture.handle, dest_rect);
}
surface->invalid_regions.erase(convert_interval);
notify_validated(convert_interval);
continue;
}
}

// By this point, we've checked to see if there was a valid surface that we could have
// copied from, so now we want to check if the surface was created on the gpu only. If it
// was, and since we already checked if there was a matching surface with the same format,
// this means its requesting a different texture format and we will skip it. If any part
// that we will validate is from the CPU, then we flush it all.
// As this is a HACK, remove this when we get proper hw texture en/decoding support
if (VideoCore::g_use_format_reinterpret_hack) {
bool retry = false;
for (const auto& pair : RangeFromInterval(dirty_regions, interval)) {
// Don't actually validate the region, and instead just skip it for now
validate_regions.erase(pair.first & interval);
formats.insert(static_cast<u32>(pair.second->pixel_format));
retry = true;
}
if (retry)
continue;
}

// Load data from 3DS memory
FlushRegion(params.addr, params.size);
if (!GLES || surface->pixel_format < PixelFormat::D16) {
surface->LoadGLBuffer(params.addr, params.end);
surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle,
draw_framebuffer.handle);
}
surface->invalid_regions.erase(params.GetInterval());
notify_validated(params.GetInterval());
flushed_from_cpu = true;
}

if (!flushed_from_cpu && !formats.empty()) {
std::string s;
for (auto format : formats) {
s += PixelFormatAsString(static_cast<PixelFormat>(format));
s += ", ";
}
LOG_DEBUG(Debug_GPU,
"Validating surface with pixel format {} and found surfaces created on the gpu "
"that have the following pixel formats: {}",
PixelFormatAsString(surface->pixel_format), s);
Core::Telemetry().AddField(Telemetry::FieldType::Session, "VideoCore_FormatReinterpret",
true);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/video_core/video_core.cpp
Expand Up @@ -24,6 +24,7 @@ std::atomic<bool> g_shader_jit_enabled;
std::atomic<bool> g_hw_shader_enabled;
std::atomic<bool> g_hw_shader_accurate_mul;
std::atomic<bool> g_renderer_bg_color_update_requested;
std::atomic<bool> g_use_format_reinterpret_hack;
// Screenshot
std::atomic<bool> g_renderer_screenshot_requested;
std::function<void(u32*, u32, u32, const std::string&)> g_dump_texture_callback;
Expand Down
1 change: 1 addition & 0 deletions src/video_core/video_core.h
Expand Up @@ -33,6 +33,7 @@ extern std::atomic<bool> g_shader_jit_enabled;
extern std::atomic<bool> g_hw_shader_enabled;
extern std::atomic<bool> g_hw_shader_accurate_mul;
extern std::atomic<bool> g_renderer_bg_color_update_requested;
extern std::atomic<bool> g_use_format_reinterpret_hack;
// Screenshot
extern std::atomic<bool> g_renderer_screenshot_requested;
extern std::function<void(u32*, u32, u32, const std::string&)> g_dump_texture_callback;
Expand Down

0 comments on commit b899e83

Please sign in to comment.