Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rsx/vk: Implement dynamic sized memory heaps #6972

Merged
merged 7 commits into from
Nov 10, 2019
Merged
20 changes: 14 additions & 6 deletions rpcs3/Emu/RSX/Common/ring_buffer_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
* Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten.
* User has to update the GET pointer when synchronisation happens.
*/
struct data_heap
class data_heap
{
protected:
/**
* Does alloc cross get position ?
*/
Expand Down Expand Up @@ -43,6 +44,13 @@ struct data_heap
}
}

// Grow the buffer to hold at least size bytes
virtual bool grow(size_t size)
{
// Stub
return false;
}

size_t m_size;
size_t m_put_pos; // Start of free space
size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget
Expand Down Expand Up @@ -75,15 +83,15 @@ struct data_heap
template<int Alignment>
size_t alloc(size_t size)
{
if (!can_alloc<Alignment>(size))
const size_t alloc_size = align(size, Alignment);
const size_t aligned_put_pos = align(m_put_pos, Alignment);

if (!can_alloc<Alignment>(size) && !grow(aligned_put_pos + alloc_size))
{
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE,
m_name, m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool);
}

size_t alloc_size = align(size, Alignment);
size_t aligned_put_pos = align(m_put_pos, Alignment);

const size_t block_length = (aligned_put_pos - m_put_pos) + alloc_size;
m_current_allocated_size += block_length;
m_largest_allocated_pool = std::max(m_largest_allocated_pool, block_length);
Expand All @@ -108,7 +116,7 @@ struct data_heap
return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
}

bool is_critical() const
virtual bool is_critical() const
{
const size_t guard_length = std::max(m_min_guard_size, m_largest_allocated_pool);
return (m_current_allocated_size + guard_length) >= m_size;
Expand Down
197 changes: 0 additions & 197 deletions rpcs3/Emu/RSX/RSXThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -854,66 +854,6 @@ namespace rsx
return{ ptr + first * type_size, count * type_size };
}

gsl::span<const std::byte> thread::get_raw_vertex_buffer(const rsx::data_array_format_info& vertex_array_info, u32 base_offset, const draw_clause& draw_array_clause) const
{
u32 offset = vertex_array_info.offset();
u32 address = rsx::get_address(rsx::get_vertex_offset_from_base(base_offset, offset & 0x7fffffff), offset >> 31);

u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array_info.type(), vertex_array_info.size());

const u32 first = draw_array_clause.min_index();
const u32 count = draw_array_clause.get_elements_count();

const std::byte* ptr = vm::_ptr<const std::byte>(address);
return {ptr + first * vertex_array_info.stride(), count * vertex_array_info.stride() + element_size};
}

std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
thread::get_vertex_buffers(const rsx::rsx_state& state, const u64 consumed_attrib_mask) const
{
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> result;
result.reserve(rsx::limits::vertex_count);

u32 input_mask = state.vertex_attrib_input_mask();
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
const bool enabled = !!(input_mask & (1 << index));
const bool consumed = !!(consumed_attrib_mask & (1ull << index));

if (!enabled && !consumed)
continue;

if (state.vertex_arrays_info[index].size() > 0)
{
const rsx::data_array_format_info& info = state.vertex_arrays_info[index];
result.emplace_back(vertex_array_buffer{info.type(), info.size(), info.stride(),
get_raw_vertex_buffer(info, state.vertex_data_base_offset(), state.current_draw_clause), index, true});
continue;
}

if (vertex_push_buffers[index].vertex_count > 1)
{
const auto& info = vertex_push_buffers[index];
const u8 element_size = info.size * sizeof(u32);

gsl::span<const std::byte> vertex_src = { (const std::byte*)vertex_push_buffers[index].data.data(), vertex_push_buffers[index].vertex_count * element_size };
result.emplace_back(vertex_array_buffer{ info.type, info.size, element_size, vertex_src, index, false });
continue;
}

if (state.register_vertex_info[index].size > 0)
{
const rsx::register_vertex_data_info& info = state.register_vertex_info[index];
result.emplace_back(vertex_array_register{info.type, info.size, info.data, index});
continue;
}

result.emplace_back(empty_vertex_array{index});
}

return result;
}

std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
thread::get_draw_command(const rsx::rsx_state& state) const
{
Expand Down Expand Up @@ -961,33 +901,6 @@ namespace rsx
}
}


//std::future<void> thread::add_internal_task(std::function<bool()> callback)
//{
// std::lock_guard lock(m_mtx_task);
// m_internal_tasks.emplace_back(callback);

// return m_internal_tasks.back().promise.get_future();
//}

//void thread::invoke(std::function<bool()> callback)
//{
// if (get() == thread_ctrl::get_current())
// {
// while (true)
// {
// if (callback())
// {
// break;
// }
// }
// }
// else
// {
// add_internal_task(callback).wait();
// }
//}

namespace
{
bool is_int_type(rsx::vertex_base_type type)
Expand Down Expand Up @@ -1831,116 +1744,6 @@ namespace rsx
}
}

void thread::get_current_fragment_program_legacy(const std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)>& get_surface_info)
{
auto &result = current_fragment_program = {};

const u32 shader_program = rsx::method_registers.shader_program_address();
const u32 program_location = (shader_program & 0x3) - 1;
const u32 program_offset = (shader_program & ~0x3);

result.addr = vm::base(rsx::get_address(program_offset, program_location));
auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);

result.addr = ((u8*)result.addr + program_info.program_start_offset);
result.offset = program_offset + program_info.program_start_offset;
result.ucode_length = program_info.program_ucode_length;
result.valid = true;
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
result.unnormalized_coords = 0;
result.two_sided_lighting = rsx::method_registers.two_side_light_en();
result.redirected_textures = 0;
result.shadow_textures = 0;

const auto resolution_scale = rsx::get_resolution_scale();

for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
auto &tex = rsx::method_registers.fragment_textures[i];
result.texture_scale[i][0] = 1.f;
result.texture_scale[i][1] = 1.f;
result.textures_alpha_kill[i] = 0;
result.textures_zfunc[i] = 0;

if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i)))
{
result.texture_dimensions |= ((u32)tex.get_extended_texture_dimension() << (i << 1));

if (tex.alpha_kill_enabled())
{
//alphakill can be ignored unless a valid comparison function is set
const auto func = tex.zfunc();
if (func < rsx::comparison_function::always && func > rsx::comparison_function::never)
{
result.textures_alpha_kill[i] = 1;
result.textures_zfunc[i] = (u8)func;
}
}

const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
const u32 raw_format = tex.format();

if (raw_format & CELL_GCM_TEXTURE_UN)
result.unnormalized_coords |= (1 << i);

bool surface_exists;
u16 surface_pitch;

std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, false);

if (surface_exists && surface_pitch)
{
if (raw_format & CELL_GCM_TEXTURE_UN)
{
result.texture_scale[i][0] = (resolution_scale * (float)surface_pitch) / tex.pitch();
result.texture_scale[i][1] = resolution_scale;
}
}
else
{
std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, true);
if (surface_exists)
{
if (raw_format & CELL_GCM_TEXTURE_UN)
{
result.texture_scale[i][0] = (resolution_scale * (float)surface_pitch) / tex.pitch();
result.texture_scale[i][1] = resolution_scale;
}

const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
switch (format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_D8R8G8B8:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
{
u32 remap = tex.remap();
result.redirected_textures |= (1 << i);
result.texture_scale[i][2] = std::bit_cast<f32>(remap);
break;
}
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
{
const auto compare_mode = tex.zfunc();
if (result.textures_alpha_kill[i] == 0 &&
compare_mode < rsx::comparison_function::always &&
compare_mode > rsx::comparison_function::never)
result.shadow_textures |= (1 << i);
break;
}
default:
LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format);
}
}
}
}
}
}

void thread::reset()
{
rsx::method_registers.reset();
Expand Down
7 changes: 1 addition & 6 deletions rpcs3/Emu/RSX/RSXThread.h
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,6 @@ namespace rsx
* returns whether surface is a render target and surface pitch in native format
*/
void get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors);
void get_current_fragment_program_legacy(const std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)>& get_surface_info);

public:
double fps_limit = 59.94;
Expand Down Expand Up @@ -714,11 +713,7 @@ namespace rsx
flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
virtual void sync_hint(FIFO_hint /*hint*/, u64 /*arg*/) {}

gsl::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
gsl::span<const std::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const draw_clause& draw_array_clause) const;

std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
get_vertex_buffers(const rsx::rsx_state& state, u64 consumed_attrib_mask) const;
gsl::span<const gsl::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;

std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
get_draw_command(const rsx::rsx_state& state) const;
Expand Down
23 changes: 18 additions & 5 deletions rpcs3/Emu/RSX/VK/VKGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,11 +448,11 @@ VKGSRender::VKGSRender() : GSRender()
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;

//VRAM allocation
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000);
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000, VK_TRUE);
m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer");
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer");
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer");
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer");
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE);
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer");
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
Expand Down Expand Up @@ -528,6 +528,8 @@ VKGSRender::VKGSRender() : GSRender()
m_ui_renderer = std::make_unique<vk::ui_overlay_renderer>();
m_ui_renderer->create(*m_current_command_buffer, m_texture_upload_buffer_ring_info);

m_occlusion_query_pool.initialize(*m_current_command_buffer);

backend_config.supports_multidraw = true;

// NOTE: We do not actually need multiple sample support for A2C to work
Expand Down Expand Up @@ -1090,7 +1092,17 @@ void VKGSRender::emit_geometry(u32 sub_index)
// Allocate stream layout memory for this batch
m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128;
m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range);
// m_vertex_layout_stream_info.buffer = m_vertex_layout_ring_info.heap->value;

if (vk::test_status_interrupt(vk::heap_changed))
{
if (m_vertex_layout_storage &&
m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value)
{
m_current_frame->buffer_views_to_clean.push_back(std::move(m_vertex_layout_storage));
}

vk::clear_status_interrupt(vk::heap_changed);
}
}
else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer)
{
Expand Down Expand Up @@ -2831,8 +2843,7 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait
// Wait before sync block below
rsx::g_dma_manager.sync();

// TODO: Better check for shadowed memory
if (m_attrib_ring_info.shadow)
if (vk::test_status_interrupt(vk::heap_dirty))
{
if (m_attrib_ring_info.dirty() ||
m_fragment_env_ring_info.dirty() ||
Expand Down Expand Up @@ -2862,6 +2873,8 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait
m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(),
VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
}

vk::clear_status_interrupt(vk::heap_dirty);
}

// End any active renderpasses; the caller should handle reopening
Expand Down
13 changes: 6 additions & 7 deletions rpcs3/Emu/RSX/VK/VKGSRender.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,13 @@ namespace vk
};
}

//Heap allocation sizes in MB
//NOTE: Texture uploads can be huge, up to 16MB for a single texture (4096x4096px)
#define VK_ATTRIB_RING_BUFFER_SIZE_M 384
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
// Initial heap allocation values. The heaps are growable and will automatically increase in size to accomodate demands
#define VK_ATTRIB_RING_BUFFER_SIZE_M 64
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 64
#define VK_UBO_RING_BUFFER_SIZE_M 16
#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 64
#define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 64
#define VK_INDEX_RING_BUFFER_SIZE_M 64
#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 16
#define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 16
#define VK_INDEX_RING_BUFFER_SIZE_M 16

#define VK_MAX_ASYNC_CB_COUNT 64
#define VK_MAX_ASYNC_FRAMES 2
Expand Down
Loading