RPCS3 · kd-11 · Oct 11, 2022 · Oct 10, 2022 · Oct 10, 2022 · Oct 10, 2022
diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp
@@ -256,9 +256,29 @@ void GLGSRender::update_draw_state()
 		gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE);
 		gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL);
 
-		//offset_bias is the constant factor, multiplied by the implementation factor R
-		//offset_scale is the slope factor, multiplied by the triangle slope factor M
-		gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias());
+		// offset_bias is the constant factor, multiplied by the implementation factor R
+		// offset_scale is the slope factor, multiplied by the triangle slope factor M
+		const auto poly_offset_scale = rsx::method_registers.poly_offset_scale();
+		auto poly_offset_bias = rsx::method_registers.poly_offset_bias();
+
+		if (auto ds = m_rtts.m_bound_depth_stencil.second;
+			ds && ds->get_internal_format() == gl::texture::internal_format::depth24_stencil8)
+		{
+			// Check details in VKDraw.cpp about behaviour of RSX vs desktop D24X8 implementations
+			// TLDR, RSX expects R = 16,777,215 (2^24 - 1)
+			const auto& caps = gl::get_driver_caps();
+			if (caps.vendor_NVIDIA || caps.vendor_MESA)
+			{
+				// R derived to be 8388607 (2^23 - 1)
+				poly_offset_bias *= 0.5f;
+			}
+			else if (caps.vendor_AMD)
+			{
+				// R derived to be 4194303 (2^22 - 1)
+				poly_offset_bias *= 0.25f;
+			}
+		}
+		gl_state.polygon_offset(poly_offset_scale, poly_offset_bias);
 
 		if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE))
 		{

diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp
@@ -354,7 +354,7 @@ void GLFragmentProgram::Decompile(const RSXFragmentProgram& prog)
 	std::string source;
 	GLFragmentDecompilerThread decompiler(source, parr, prog, size);
 
-	if (!g_cfg.video.disable_native_float16)
+	if (g_cfg.video.shader_precision == gpu_preset_level::low)
 	{
 		const auto driver_caps = gl::get_driver_caps();
 		decompiler.device_props.has_native_half_support = driver_caps.NV_gpu_shader5_supported || driver_caps.AMD_gpu_shader_half_float_supported;

diff --git a/rpcs3/Emu/RSX/GL/glutils/capabilities.hpp b/rpcs3/Emu/RSX/GL/glutils/capabilities.hpp
@@ -30,6 +30,8 @@ namespace gl
 		bool vendor_AMD = false;    // has broken ARB_multidraw
 		bool vendor_NVIDIA = false; // has NaN poisoning issues
 		bool vendor_MESA = false;   // requires CLIENT_STORAGE bit set for streaming buffers
+		bool subvendor_RADEONSI = false;
+		bool subvendor_NOUVEAU = false;
 
 		bool check(const std::string& ext_name, const char* test)
 		{
@@ -174,6 +176,15 @@ namespace gl
 			if (version_string.find("Mesa") != umax || renderer_string.find("Mesa") != umax)
 			{
 				vendor_MESA = true;
+
+				if (vendor_string.find("nouveau") != umax)
+				{
+					subvendor_NOUVEAU = true;
+				}
+				else if (vendor_string.find("AMD") != umax)
+				{
+					subvendor_RADEONSI = true;
+				}
 			}
 
 			// Workaround for intel drivers which have terrible capability reporting

diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp
@@ -5,6 +5,7 @@
 #include "VKAsyncScheduler.h"
 #include "VKGSRender.h"
 #include "vkutils/buffer_object.h"
+#include "vkutils/chip_class.h"
 
 namespace vk
 {
@@ -173,7 +174,22 @@ void VKGSRender::update_draw_state()
 	{
 		// offset_bias is the constant factor, multiplied by the implementation factor R
 		// offst_scale is the slope factor, multiplied by the triangle slope factor M
-		vkCmdSetDepthBias(*m_current_command_buffer, rsx::method_registers.poly_offset_bias(), 0.f, rsx::method_registers.poly_offset_scale());
+		// R is implementation dependent and has to be derived empirically for supported implementations.
+		// Lucky for us, only NVIDIA currently supports fixed-point 24-bit depth buffers.
+
+		const auto polygon_offset_scale = rsx::method_registers.poly_offset_scale();
+		auto polygon_offset_bias = rsx::method_registers.poly_offset_bias();
+
+		if (m_draw_fbo->depth_format() == VK_FORMAT_D24_UNORM_S8_UINT && is_NVIDIA(vk::get_chip_family()))
+		{
+			// Empirically derived to be 0.5 * (2^24 - 1) for fixed type on Pascal. The same seems to apply for other NVIDIA GPUs.
+			// RSX seems to be using 2^24 - 1 instead making the biases twice as large when using fixed type Z-buffer on NVIDIA.
+			// Note, that the formula for floating point is complicated, but actually works out for us.
+			// Since the exponent range for a polygon is around 0, and we have 23 (+1) mantissa bits, R just works out to the same range by chance \o/.
+			polygon_offset_bias *= 0.5f;
+		}
+
+		vkCmdSetDepthBias(*m_current_command_buffer, polygon_offset_bias, 0.f, polygon_offset_scale);
 	}
 	else
 	{

diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp
@@ -419,7 +419,7 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog)
 	VKFragmentDecompilerThread decompiler(source, parr, prog, size, *this);
 
 	const auto pdev = vk::get_current_renderer();
-	if (!g_cfg.video.disable_native_float16)
+	if (g_cfg.video.shader_precision == gpu_preset_level::low)
 	{
 		decompiler.device_props.has_native_half_support = pdev->get_shader_types_support().allow_float16;
 	}

diff --git a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h
@@ -50,4 +50,6 @@ namespace vk
 
 	chip_class get_chip_family();
 	chip_class get_chip_family(u32 vendor_id, u32 device_id);
+
+	static inline bool is_NVIDIA(chip_class chip) { return chip >= chip_class::NV_generic && chip <= chip_class::NV_ampere; }
 }
diff --git a/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp b/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp
@@ -63,6 +63,18 @@ namespace vk
 			return attachments[0]->image()->samples();
 		}
 
+		VkFormat format()
+		{
+			ensure(!attachments.empty());
+			return attachments[0]->image()->format();
+		}
+
+		VkFormat depth_format()
+		{
+			ensure(!attachments.empty());
+			return attachments.back()->image()->format();
+		}
+
 		bool matches(std::vector<vk::image*> fbo_images, u32 width, u32 height)
 		{
 			if (m_width != width || m_height != height)

diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h
@@ -132,6 +132,7 @@ struct cfg_root : cfg::node
 		cfg::_enum<frame_limit_type> frame_limit{ this, "Frame limit", frame_limit_type::_auto, true };
 		cfg::_enum<msaa_level> antialiasing_level{ this, "MSAA", msaa_level::_auto };
 		cfg::_enum<shader_mode> shadermode{ this, "Shader Mode", shader_mode::async_recompiler };
+		cfg::_enum<gpu_preset_level> shader_precision{ this, "Shader Precision", gpu_preset_level::high };
 
 		cfg::_bool write_color_buffers{ this, "Write Color Buffers" };
 		cfg::_bool write_depth_buffer{ this, "Write Depth Buffer" };
@@ -156,11 +157,6 @@ struct cfg_root : cfg::node
 		cfg::_bool disable_vulkan_mem_allocator{ this, "Disable Vulkan Memory Allocator", false };
 		cfg::_bool full_rgb_range_output{ this, "Use full RGB output range", true, true }; // Video out dynamic range
 		cfg::_bool strict_texture_flushing{ this, "Strict Texture Flushing", false };
-#ifdef __APPLE__
-		cfg::_bool disable_native_float16{ this, "Disable native float16 support", true };
-#else
-		cfg::_bool disable_native_float16{ this, "Disable native float16 support", false };
-#endif
 		cfg::_bool multithreaded_rsx{ this, "Multithreaded RSX", false };
 		cfg::_bool relaxed_zcull_sync{ this, "Relaxed ZCULL Sync", false };
 		cfg::_bool enable_3d{ this, "Enable 3D", false };

diff --git a/rpcs3/Emu/system_config_types.cpp b/rpcs3/Emu/system_config_types.cpp
@@ -596,3 +596,18 @@ void fmt_class_string<thread_scheduler_mode>::format(std::string& out, u64 arg)
 		return unknown;
 	});
 }
+
+template <>
+void fmt_class_string<gpu_preset_level>::format(std::string& out, u64 arg)
+{
+	format_enum(out, arg, [](gpu_preset_level value)
+	{
+		switch (value)
+		{
+		case gpu_preset_level::high: return "High";
+		case gpu_preset_level::low: return "Low";
+		}
+
+		return unknown;
+	});
+}
diff --git a/rpcs3/Emu/system_config_types.h b/rpcs3/Emu/system_config_types.h
@@ -288,3 +288,10 @@ enum class zcull_precision_level
 	relaxed,
 	undefined
 };
+
+enum class gpu_preset_level
+{
+	high,
+	low,
+	_auto
+};
diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h
@@ -74,7 +74,7 @@ enum class emu_settings_type
 	DisableVideoOutput,
 	DisableFIFOReordering,
 	StrictTextureFlushing,
-	DisableNativefloat16,
+	ShaderPrecisionQuality,
 	Enable3D,
 	AnisotropicFilterOverride,
 	ResolutionScale,
@@ -246,14 +246,14 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
 	{ emu_settings_type::DisableOcclusionQueries,    { "Video", "Disable ZCull Occlusion Queries"}},
 	{ emu_settings_type::DisableVideoOutput,         { "Video", "Disable Video Output"}},
 	{ emu_settings_type::DisableFIFOReordering,      { "Video", "Disable FIFO Reordering"}},
-	{ emu_settings_type::DisableNativefloat16,       { "Video", "Disable native float16 support"}},
 	{ emu_settings_type::Enable3D,                   { "Video", "Enable 3D"}},
 	{ emu_settings_type::StrictTextureFlushing,      { "Video", "Strict Texture Flushing"}},
 	{ emu_settings_type::ForceCPUBlitEmulation,      { "Video", "Force CPU Blit"}},
 	{ emu_settings_type::DisableOnDiskShaderCache,   { "Video", "Disable On-Disk Shader Cache"}},
 	{ emu_settings_type::DisableVulkanMemAllocator,  { "Video", "Disable Vulkan Memory Allocator"}},
 	{ emu_settings_type::ShaderMode,                 { "Video", "Shader Mode"}},
 	{ emu_settings_type::ShaderCompilerNumThreads,   { "Video", "Shader Compiler Threads"}},
+	{ emu_settings_type::ShaderPrecisionQuality,     { "Video", "Shader Precision"}},
 	{ emu_settings_type::MultithreadedRSX,           { "Video", "Multithreaded RSX"}},
 	{ emu_settings_type::RelaxedZCULL,               { "Video", "Relaxed ZCULL Sync"}},
 	{ emu_settings_type::PreciseZCULL,               { "Video", "Accurate ZCULL stats"}},

diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp
@@ -546,6 +546,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
 		}
 	}
 
+	m_emu_settings->EnhanceComboBox(ui->shaderPrecision, emu_settings_type::ShaderPrecisionQuality);
+	SubscribeTooltip(ui->shaderPrecision, tooltips.settings.shader_precision);
+
 	// Comboboxes
 	m_emu_settings->EnhanceComboBox(ui->shaderCompilerThreads, emu_settings_type::ShaderCompilerNumThreads, true);
 	SubscribeTooltip(ui->gb_shader_compiler_threads, tooltips.settings.shader_compiler_threads);
@@ -2123,9 +2126,6 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
 	m_emu_settings->EnhanceCheckBox(ui->strictTextureFlushing, emu_settings_type::StrictTextureFlushing);
 	SubscribeTooltip(ui->strictTextureFlushing, tooltips.settings.strict_texture_flushing);
 
-	m_emu_settings->EnhanceCheckBox(ui->disableNativefp16, emu_settings_type::DisableNativefloat16);
-	SubscribeTooltip(ui->disableNativefp16, tooltips.settings.disable_native_fp16);
-
 	m_emu_settings->EnhanceCheckBox(ui->Enable3D, emu_settings_type::Enable3D);
 	SubscribeTooltip(ui->Enable3D, tooltips.settings.enable_3d);
 

diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui
@@ -43,8 +43,8 @@
        <rect>
         <x>0</x>
         <y>0</y>
-        <width>838</width>
-        <height>641</height>
+        <width>821</width>
+        <height>678</height>
        </rect>
       </property>
       <property name="sizePolicy">
@@ -54,7 +54,7 @@
        </sizepolicy>
       </property>
       <property name="currentIndex">
-       <number>0</number>
+       <number>1</number>
       </property>
       <widget class="QWidget" name="coreTab">
        <attribute name="title">
@@ -209,8 +209,8 @@
                 <widget class="QSlider" name="maxPreemptCount">
                  <property name="sizePolicy">
                   <sizepolicy hsizetype="Preferred" vsizetype="Minimum">
-                    <horstretch>0</horstretch>
-                    <verstretch>0</verstretch>
+                   <horstretch>0</horstretch>
+                   <verstretch>0</verstretch>
                   </sizepolicy>
                  </property>
                  <property name="orientation">
@@ -519,7 +519,7 @@
             </item>
             <item>
              <widget class="QWidget" name="widget_gpu_3" native="true">
-              <layout class="QHBoxLayout" name="widget_gpu_3_layout" stretch="1">
+              <layout class="QHBoxLayout" name="widget_gpu_3_layout" stretch="1,1">
                <property name="leftMargin">
                 <number>0</number>
                </property>
@@ -544,6 +544,18 @@
                  </layout>
                 </widget>
                </item>
+               <item>
+                <widget class="QGroupBox" name="gbShaderPrecision">
+                 <property name="title">
+                  <string>Shader Quality</string>
+                 </property>
+                 <layout class="QVBoxLayout" name="gbShaderPrecision_layout">
+                  <item>
+                   <widget class="QComboBox" name="shaderPrecision"/>
+                  </item>
+                 </layout>
+                </widget>
+               </item>
               </layout>
              </widget>
             </item>
@@ -2537,13 +2549,6 @@
                  </property>
                 </widget>
                </item>
-               <item>
-                <widget class="QCheckBox" name="disableNativefp16">
-                 <property name="text">
-                  <string>Disable native float16 support</string>
-                 </property>
-                </widget>
-               </item>
                <item>
                 <widget class="QCheckBox" name="allowHostGPULabels">
                  <property name="text">

diff --git a/rpcs3/rpcs3qt/tooltips.h b/rpcs3/rpcs3qt/tooltips.h
@@ -110,7 +110,6 @@ class Tooltips : public QObject
 		const QString disable_fifo_reordering      = tr("Disables RSX FIFO optimizations completely. Draws are processed as they are received by the DMA puller.");
 		const QString gpu_texture_scaling          = tr("Force all texture transfer, scaling and conversion operations on the GPU.\nMay cause texture corruption in some cases.");
 		const QString strict_texture_flushing      = tr("Forces texture flushing even in situations where it is not necessary/correct. Known to cause visual artifacts, but useful for debugging certain texture cache issues.");
-		const QString disable_native_fp16          = tr("Disables hardware half-float support which is known to cause problems in some rare cases on some GPUs.");
 		const QString enable_3d                    = tr("Enables 3D stereo rendering.\nNote that only anaglyph viewing is supported at the moment.");
 		const QString accurate_ppu_128_loop        = tr("When enabled, PPU atomic operations will operate on entire cache line data, as opposed to a single 64bit block of memory when disabled.\nNumerical values control whether or not to enable the accurate version based on the atomic operation's length.");
 		const QString enable_performance_report    = tr("Measure certain events and print a chart after the emulator is stopped. Don't enable if not asked to.");
@@ -176,6 +175,7 @@ class Tooltips : public QObject
 		const QString async_with_shader_interpreter   = tr("Hybrid rendering mode.\nIf a shader is not found in the cache, the interpreter will be used to render approximated graphics for this shader until it has compiled.");
 		const QString shader_interpreter_only         = tr("All rendering is handled by the interpreter with no attempt to compile native shaders.\nThis mode is very slow and experimental.");
 		const QString shader_compiler_threads         = tr("Number of threads to use for the shader compiler backend.\nOnly has an impact when shader mode is set to one of the asynchronous modes.");
+		const QString shader_precision                = tr("Controls the precision level of generated shaders. Low precision generates much faster code depending on the hardware, but can sometimes generate minor visual glitches or flicker.");
 
 		const QString async_texture_streaming                  = tr("Stream textures to GPU in parallel with 3D rendering using asynchronous compute.\nCan improve performance on more powerful GPUs that have spare headroom.\nOnly works with Vulkan renderer and greatly benefits from having MTRSX enabled if you have a capable CPU.");
 		const QString force_disable_exclusive_fullscreen_mode  = tr("Forces borderless windowed mode for all fullscreen windows. Disables exclusive fullscreen graphics driver optimizations.\nUse when you wish to stream using Vulkan or if your screen goes dim using HDR.\nNote: RPCS3 does not use HDR at all.");