RPCS3 · kd-11 · Nov 10, 2019 · Nov 9, 2019 · Nov 9, 2019 · Nov 9, 2019
diff --git a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h
@@ -10,8 +10,9 @@
  * Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten.
  * User has to update the GET pointer when synchronisation happens.
  */
-struct data_heap
+class data_heap
 {
+protected:
 	/**
 	* Does alloc cross get position ?
 	*/
@@ -43,6 +44,13 @@ struct data_heap
 		}
 	}
 
+    // Grow the buffer to hold at least size bytes
+	virtual bool grow(size_t size)
+	{
+		// Stub
+		return false;
+	}
+
 	size_t m_size;
 	size_t m_put_pos; // Start of free space
 	size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget
@@ -75,15 +83,15 @@ struct data_heap
 	template<int Alignment>
 	size_t alloc(size_t size)
 	{
-		if (!can_alloc<Alignment>(size))
+		const size_t alloc_size = align(size, Alignment);
+		const size_t aligned_put_pos = align(m_put_pos, Alignment);
+
+		if (!can_alloc<Alignment>(size) && !grow(aligned_put_pos + alloc_size))
 		{
 			fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE,
 					m_name, m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool);
 		}
 
-		size_t alloc_size = align(size, Alignment);
-		size_t aligned_put_pos = align(m_put_pos, Alignment);
-
 		const size_t block_length = (aligned_put_pos - m_put_pos) + alloc_size;
 		m_current_allocated_size += block_length;
 		m_largest_allocated_pool = std::max(m_largest_allocated_pool, block_length);
@@ -108,7 +116,7 @@ struct data_heap
 		return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
 	}
 
-	bool is_critical() const
+	virtual bool is_critical() const
 	{
 		const size_t guard_length = std::max(m_min_guard_size, m_largest_allocated_pool);
 		return (m_current_allocated_size + guard_length) >= m_size;

diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -854,66 +854,6 @@ namespace rsx
 		return{ ptr + first * type_size, count * type_size };
 	}
 
-	gsl::span<const std::byte> thread::get_raw_vertex_buffer(const rsx::data_array_format_info& vertex_array_info, u32 base_offset, const draw_clause& draw_array_clause) const
-	{
-		u32 offset  = vertex_array_info.offset();
-		u32 address = rsx::get_address(rsx::get_vertex_offset_from_base(base_offset, offset & 0x7fffffff), offset >> 31);
-
-		u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array_info.type(), vertex_array_info.size());
-
-		const u32 first = draw_array_clause.min_index();
-		const u32 count = draw_array_clause.get_elements_count();
-
-		const std::byte* ptr = vm::_ptr<const std::byte>(address);
-		return {ptr + first * vertex_array_info.stride(), count * vertex_array_info.stride() + element_size};
-	}
-
-	std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
-	thread::get_vertex_buffers(const rsx::rsx_state& state, const u64 consumed_attrib_mask) const
-	{
-		std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> result;
-		result.reserve(rsx::limits::vertex_count);
-
-		u32 input_mask = state.vertex_attrib_input_mask();
-		for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
-		{
-			const bool enabled = !!(input_mask & (1 << index));
-			const bool consumed = !!(consumed_attrib_mask & (1ull << index));
-
-			if (!enabled && !consumed)
-				continue;
-
-			if (state.vertex_arrays_info[index].size() > 0)
-			{
-				const rsx::data_array_format_info& info = state.vertex_arrays_info[index];
-				result.emplace_back(vertex_array_buffer{info.type(), info.size(), info.stride(),
-					get_raw_vertex_buffer(info, state.vertex_data_base_offset(), state.current_draw_clause), index, true});
-				continue;
-			}
-
-			if (vertex_push_buffers[index].vertex_count > 1)
-			{
-				const auto& info = vertex_push_buffers[index];
-				const u8 element_size = info.size * sizeof(u32);
-
-				gsl::span<const std::byte> vertex_src = { (const std::byte*)vertex_push_buffers[index].data.data(), vertex_push_buffers[index].vertex_count * element_size };
-				result.emplace_back(vertex_array_buffer{ info.type, info.size, element_size, vertex_src, index, false });
-				continue;
-			}
-
-			if (state.register_vertex_info[index].size > 0)
-			{
-				const rsx::register_vertex_data_info& info = state.register_vertex_info[index];
-				result.emplace_back(vertex_array_register{info.type, info.size, info.data, index});
-				continue;
-			}
-
-			result.emplace_back(empty_vertex_array{index});
-		}
-
-		return result;
-	}
-
 	std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
 	thread::get_draw_command(const rsx::rsx_state& state) const
 	{
@@ -961,33 +901,6 @@ namespace rsx
 		}
 	}
 
-
-	//std::future<void> thread::add_internal_task(std::function<bool()> callback)
-	//{
-	//	std::lock_guard lock(m_mtx_task);
-	//	m_internal_tasks.emplace_back(callback);
-
-	//	return m_internal_tasks.back().promise.get_future();
-	//}
-
-	//void thread::invoke(std::function<bool()> callback)
-	//{
-	//	if (get() == thread_ctrl::get_current())
-	//	{
-	//		while (true)
-	//		{
-	//			if (callback())
-	//			{
-	//				break;
-	//			}
-	//		}
-	//	}
-	//	else
-	//	{
-	//		add_internal_task(callback).wait();
-	//	}
-	//}
-
 	namespace
 	{
 		bool is_int_type(rsx::vertex_base_type type)
@@ -1831,116 +1744,6 @@ namespace rsx
 		}
 	}
 
-	void thread::get_current_fragment_program_legacy(const std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)>& get_surface_info)
-	{
-		auto &result = current_fragment_program = {};
-
-		const u32 shader_program = rsx::method_registers.shader_program_address();
-		const u32 program_location = (shader_program & 0x3) - 1;
-		const u32 program_offset = (shader_program & ~0x3);
-
-		result.addr = vm::base(rsx::get_address(program_offset, program_location));
-		auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
-
-		result.addr = ((u8*)result.addr + program_info.program_start_offset);
-		result.offset = program_offset + program_info.program_start_offset;
-		result.ucode_length = program_info.program_ucode_length;
-		result.valid = true;
-		result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
-		result.unnormalized_coords = 0;
-		result.two_sided_lighting = rsx::method_registers.two_side_light_en();
-		result.redirected_textures = 0;
-		result.shadow_textures = 0;
-
-		const auto resolution_scale = rsx::get_resolution_scale();
-
-		for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i)
-		{
-			auto &tex = rsx::method_registers.fragment_textures[i];
-			result.texture_scale[i][0] = 1.f;
-			result.texture_scale[i][1] = 1.f;
-			result.textures_alpha_kill[i] = 0;
-			result.textures_zfunc[i] = 0;
-
-			if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i)))
-			{
-				result.texture_dimensions |= ((u32)tex.get_extended_texture_dimension() << (i << 1));
-
-				if (tex.alpha_kill_enabled())
-				{
-					//alphakill can be ignored unless a valid comparison function is set
-					const auto func = tex.zfunc();
-					if (func < rsx::comparison_function::always && func > rsx::comparison_function::never)
-					{
-						result.textures_alpha_kill[i] = 1;
-						result.textures_zfunc[i] = (u8)func;
-					}
-				}
-
-				const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
-				const u32 raw_format = tex.format();
-
-				if (raw_format & CELL_GCM_TEXTURE_UN)
-					result.unnormalized_coords |= (1 << i);
-
-				bool surface_exists;
-				u16  surface_pitch;
-
-				std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, false);
-
-				if (surface_exists && surface_pitch)
-				{
-					if (raw_format & CELL_GCM_TEXTURE_UN)
-					{
-						result.texture_scale[i][0] = (resolution_scale * (float)surface_pitch) / tex.pitch();
-						result.texture_scale[i][1] = resolution_scale;
-					}
-				}
-				else
-				{
-					std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, true);
-					if (surface_exists)
-					{
-						if (raw_format & CELL_GCM_TEXTURE_UN)
-						{
-							result.texture_scale[i][0] = (resolution_scale * (float)surface_pitch) / tex.pitch();
-							result.texture_scale[i][1] = resolution_scale;
-						}
-
-						const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
-						switch (format)
-						{
-						case CELL_GCM_TEXTURE_A8R8G8B8:
-						case CELL_GCM_TEXTURE_D8R8G8B8:
-						case CELL_GCM_TEXTURE_A4R4G4B4:
-						case CELL_GCM_TEXTURE_R5G6B5:
-						{
-							u32 remap = tex.remap();
-							result.redirected_textures |= (1 << i);
-							result.texture_scale[i][2] = std::bit_cast<f32>(remap);
-							break;
-						}
-						case CELL_GCM_TEXTURE_DEPTH16:
-						case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
-						case CELL_GCM_TEXTURE_DEPTH24_D8:
-						case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
-						{
-							const auto compare_mode = tex.zfunc();
-							if (result.textures_alpha_kill[i] == 0 &&
-								compare_mode < rsx::comparison_function::always &&
-								compare_mode > rsx::comparison_function::never)
-								result.shadow_textures |= (1 << i);
-							break;
-						}
-						default:
-							LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format);
-						}
-					}
-				}
-			}
-		}
-	}
-
 	void thread::reset()
 	{
 		rsx::method_registers.reset();

diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h
@@ -635,7 +635,6 @@ namespace rsx
 		 * returns whether surface is a render target and surface pitch in native format
 		 */
 		void get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors);
-		void get_current_fragment_program_legacy(const std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)>& get_surface_info);
 
 	public:
 		double fps_limit = 59.94;
@@ -714,11 +713,7 @@ namespace rsx
 		flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
 		virtual void sync_hint(FIFO_hint /*hint*/, u64 /*arg*/) {}
 
-		gsl::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
-		gsl::span<const std::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const draw_clause& draw_array_clause) const;
-
-		std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
-		get_vertex_buffers(const rsx::rsx_state& state, u64 consumed_attrib_mask) const;
+		gsl::span<const gsl::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
 
 		std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
 		get_draw_command(const rsx::rsx_state& state) const;

diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@@ -448,11 +448,11 @@ VKGSRender::VKGSRender() : GSRender()
 	semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
 
 	//VRAM allocation
-	m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000);
+	m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000, VK_TRUE);
 	m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer");
 	m_vertex_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer");
 	m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer");
-	m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer");
+	m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE);
 	m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer");
 	m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
 	m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
@@ -528,6 +528,8 @@ VKGSRender::VKGSRender() : GSRender()
 	m_ui_renderer = std::make_unique<vk::ui_overlay_renderer>();
 	m_ui_renderer->create(*m_current_command_buffer, m_texture_upload_buffer_ring_info);
 
+	m_occlusion_query_pool.initialize(*m_current_command_buffer);
+
 	backend_config.supports_multidraw = true;
 
 	// NOTE: We do not actually need multiple sample support for A2C to work
@@ -1090,7 +1092,17 @@ void VKGSRender::emit_geometry(u32 sub_index)
 		// Allocate stream layout memory for this batch
 		m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128;
 		m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range);
-		// m_vertex_layout_stream_info.buffer = m_vertex_layout_ring_info.heap->value;
+
+		if (vk::test_status_interrupt(vk::heap_changed))
+		{
+			if (m_vertex_layout_storage &&
+				m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value)
+			{
+				m_current_frame->buffer_views_to_clean.push_back(std::move(m_vertex_layout_storage));
+			}
+
+			vk::clear_status_interrupt(vk::heap_changed);
+		}
 	}
 	else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer)
 	{
@@ -2831,8 +2843,7 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait
 	// Wait before sync block below
 	rsx::g_dma_manager.sync();
 
-	// TODO: Better check for shadowed memory
-	if (m_attrib_ring_info.shadow)
+	if (vk::test_status_interrupt(vk::heap_dirty))
 	{
 		if (m_attrib_ring_info.dirty() ||
 			m_fragment_env_ring_info.dirty() ||
@@ -2862,6 +2873,8 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait
 			m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(),
 				VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
 		}
+
+		vk::clear_status_interrupt(vk::heap_dirty);
 	}
 
 	// End any active renderpasses; the caller should handle reopening

diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h
@@ -36,14 +36,13 @@ namespace vk
 	};
 }
 
-//Heap allocation sizes in MB
-//NOTE: Texture uploads can be huge, up to 16MB for a single texture (4096x4096px)
-#define VK_ATTRIB_RING_BUFFER_SIZE_M 384
-#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
+// Initial heap allocation values. The heaps are growable and will automatically increase in size to accomodate demands
+#define VK_ATTRIB_RING_BUFFER_SIZE_M 64
+#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 64
 #define VK_UBO_RING_BUFFER_SIZE_M 16
-#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 64
-#define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 64
-#define VK_INDEX_RING_BUFFER_SIZE_M 64
+#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 16
+#define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 16
+#define VK_INDEX_RING_BUFFER_SIZE_M 16
 
 #define VK_MAX_ASYNC_CB_COUNT 64
 #define VK_MAX_ASYNC_FRAMES 2