Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rsx: Tiled memory handling improvements #15160

Merged
merged 10 commits into from
Feb 10, 2024
18 changes: 10 additions & 8 deletions rpcs3/Emu/RSX/Common/texture_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ namespace rsx
rsx::texture_upload_context context, rsx::texture_dimension_extended type, bool swizzled, component_order swizzle_flags, rsx::flags32_t flags) = 0;
virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, const address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 pitch, u32 gcm_format, texture_upload_context context,
const std::vector<rsx::subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) = 0;
virtual section_storage_type* create_nul_section(commandbuffer_type&, const address_range &rsx_range, const image_section_attributes_t& attrs, bool memory_load) = 0;
virtual section_storage_type* create_nul_section(commandbuffer_type&, const address_range &rsx_range, const image_section_attributes_t& attrs, const GCM_tile_reference& tile, bool memory_load) = 0;
virtual void set_component_order(section_storage_type& section, u32 gcm_format, component_order expected) = 0;
virtual void insert_texture_barrier(commandbuffer_type&, image_storage_type* tex, bool strong_ordering = true) = 0;
virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& remap_vector) = 0;
Expand Down Expand Up @@ -2551,11 +2551,10 @@ namespace rsx
src_address += (src.width - src_w) * src_bpp;
}

const auto is_tiled_mem = [&](const utils::address_range& range)
const auto get_tiled_region = [&](const utils::address_range& range)
{
auto rsxthr = rsx::get_current_renderer();
auto region = rsxthr->get_tiled_memory_region(range);
return region.tile != nullptr;
return rsxthr->get_tiled_memory_region(range);
};

auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, rsx::flags32_t access, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
Expand Down Expand Up @@ -2662,8 +2661,10 @@ namespace rsx
};

// Check tiled mem
const auto dst_is_tiled = is_tiled_mem(utils::address_range::start_length(dst_address, dst.pitch * dst.clip_height));
const auto src_is_tiled = is_tiled_mem(utils::address_range::start_length(src_address, src.pitch * src.height));
const auto dst_tile = get_tiled_region(utils::address_range::start_length(dst_address, dst.pitch * dst.clip_height));
const auto src_tile = get_tiled_region(utils::address_range::start_length(src_address, src.pitch * src.height));
const auto dst_is_tiled = !!dst_tile;
const auto src_is_tiled = !!src_tile;

// Check if src/dst are parts of render targets
typename surface_store_type::surface_overlap_info dst_subres;
Expand Down Expand Up @@ -3219,9 +3220,10 @@ namespace rsx
{
.pitch = dst.pitch,
.width = static_cast<u16>(dst_dimensions.width),
.height = static_cast<u16>(dst_dimensions.height)
.height = static_cast<u16>(dst_dimensions.height),
.bpp = dst_bpp
};
cached_dest = create_nul_section(cmd, rsx_range, attrs, force_dma_load);
cached_dest = create_nul_section(cmd, rsx_range, attrs, dst_tile, force_dma_load);
}
else
{
Expand Down
12 changes: 7 additions & 5 deletions rpcs3/Emu/RSX/Common/tiled_dma_copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ namespace rsx
uint32_t num_tiles_per_row;
uint32_t tile_base_address;
uint32_t tile_size;
uint32_t tile_offset;
uint32_t tile_address_offset;
uint32_t tile_rw_offset;
uint32_t tile_pitch;
uint32_t tile_bank;
uint32_t image_width;
Expand All @@ -33,7 +34,7 @@ namespace rsx

static inline void tiled_dma_copy(const uint32_t row, const uint32_t col, const detiler_config& conf, char* tiled_data, char* linear_data, int direction)
{
const uint32_t row_offset = (row * conf.tile_pitch) + conf.tile_base_address + conf.tile_offset;
const uint32_t row_offset = (row * conf.tile_pitch) + conf.tile_base_address + conf.tile_address_offset;
const uint32_t this_address = row_offset + (col * conf.image_bpp);

// 1. Calculate row_addr
Expand Down Expand Up @@ -103,8 +104,8 @@ namespace rsx

// Calculate relative addresses and sample
const uint32_t linear_image_offset = (row * conf.image_pitch) + (col * conf.image_bpp);
const uint32_t tile_base_offset = tile_address - conf.tile_base_address; // Distance from tile base address
const uint32_t tile_data_offset = tile_base_offset - conf.tile_offset; // Distance from data base address
const uint32_t tile_base_offset = tile_address - conf.tile_base_address; // Distance from tile base address
const uint32_t tile_data_offset = tile_base_offset - conf.tile_rw_offset; // Distance from data base address

if (tile_base_offset >= conf.tile_size)
{
Expand Down Expand Up @@ -160,7 +161,8 @@ namespace rsx
.num_tiles_per_row = tiles_per_row,
.tile_base_address = base_address,
.tile_size = tile_size,
.tile_offset = base_offset,
.tile_address_offset = base_offset,
.tile_rw_offset = base_offset,
.tile_pitch = row_pitch_in_bytes,
.tile_bank = bank_sense,
.image_width = image_width,
Expand Down
9 changes: 9 additions & 0 deletions rpcs3/Emu/RSX/Core/RSXContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,13 @@ namespace rsx

return {};
}

utils::address_range GCM_tile_reference::tile_align(const utils::address_range& range) const
{
const auto alignment = 64 * tile->pitch;
const u32 start_offset = rsx::align_down2(range.start - base_address, alignment);
const u32 end_offset = rsx::align2(range.end - base_address + 1, alignment);

return utils::address_range::start_length(start_offset + base_address, end_offset - start_offset);
}
}
2 changes: 2 additions & 0 deletions rpcs3/Emu/RSX/Core/RSXContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ namespace rsx
{
return !!tile;
}

utils::address_range tile_align(const rsx::address_range& range) const;
};

struct GCM_context
Expand Down
1 change: 1 addition & 0 deletions rpcs3/Emu/RSX/GL/GLTextureCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,7 @@ namespace gl
gl::command_context& /*cmd*/,
const utils::address_range& rsx_range,
const rsx::image_section_attributes_t& attrs,
const rsx::GCM_tile_reference& /*tile*/,
bool /*memory_load*/) override
{
auto& cached = *find_cached_texture(rsx_range, { .gcm_format = RSX_GCM_FORMAT_IGNORED }, true, false, false);
Expand Down
58 changes: 30 additions & 28 deletions rpcs3/Emu/RSX/Program/GLSLSnippets/RSXMemoryTiling.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -29,35 +29,37 @@ layout(%set, binding=SSBO_LOCATION(1), std430) LINEAR_DATA_MODIFIER restrict buf
#ifdef VULKAN
layout(%push_block) uniform Configuration
{
uint prime;
uint factor;
uint num_tiles_per_row;
uint tile_base_address;
uint tile_size;
uint tile_offset;
uint tile_pitch;
uint tile_bank;
uint image_width;
uint image_height;
uint image_pitch;
uint image_bpp;
uint prime; /* Prime factor derived from the number of tiles per row */
uint factor; /* Counterpart to the prime value. prime * factor = tiles per row. */
uint num_tiles_per_row; /* Pitch / tile-width. Each "tile" is 256 bytes long */
uint tile_base_address; /* Base address for this tile. */
uint tile_size; /* Size of the whole tile. */
uint tile_address_offset; /* Address offset where the texture region sits. */
uint tile_rw_offset; /* Access offset. If we load the entire tile then this is 0, but can be a multiple of pitch if we skip some rows for performance reasons. */
uint tile_pitch; /* Row length in bytes for every line in the tile and consequently the image. */
uint tile_bank; /* Bank sense offset. Acts as a memory-subsystem bias so that different FBOS can make use of different parts of the circuitry */
uint image_width; /* Width of the linear 2D region we're encoding/decoding */
uint image_height; /* Height of the linear 2D region to encode/decode */
uint image_pitch; /* Image pitch. The incoming data may be from a GPU operation with packed pixels which can have a different pitch than the tile we're writing from/to */
uint image_bpp; /* Texel width of the image format. */
};
#else
uniform uint prime;
uniform uint factor;
uniform uint num_tiles_per_row;
uniform uint tile_base_address;
uniform uint tile_size;
uniform uint tile_offset;
uniform uint tile_pitch;
uniform uint tile_bank;
uniform uint image_width;
uniform uint image_height;
uniform uint image_pitch;
uniform uint image_bpp;
uniform uint prime; /* Prime factor derived from the number of tiles per row */
uniform uint factor; /* Counterpart to the prime value. prime * factor = tiles per row. */
uniform uint num_tiles_per_row; /* Pitch / tile-width. Each "tile" is 256 bytes long */
uniform uint tile_base_address; /* Base address for this tile. */
uniform uint tile_size; /* Size of the whole tile. */
uniform uint tile_address_offset; /* Address offset where the texture region sits. */
uniform uint tile_rw_offset; /* Access offset. If we load the entire tile then this is 0, but can be a multiple of pitch if we skip some rows for performance reasons. */
uniform uint tile_pitch; /* Row length in bytes for every line in the tile and consequently the image. */
uniform uint tile_bank; /* Bank sense offset. Acts as a memory-subsystem bias so that different FBOS can make use of different parts of the circuitry */
uniform uint image_width; /* Width of the linear 2D region we're encoding/decoding */
uniform uint image_height; /* Height of the linear 2D region to encode/decode */
uniform uint image_pitch; /* Image pitch. The incoming data may be from a GPU operation with packed pixels which can have a different pitch than the tile we're writing from/to */
uniform uint image_bpp; /* Texel width of the image format. */
#endif

// Constants
// Hard constants, set by hardware
#define RSX_TILE_WIDTH 256
#define RSX_TILE_HEIGHT 64

Expand Down Expand Up @@ -239,7 +241,7 @@ void write_linear(const in uint offset, const in uvec4 value)

void do_memory_op(const in uint row, const in uint col)
{
const uint row_offset = (row * tile_pitch) + tile_base_address + tile_offset;
const uint row_offset = (row * tile_pitch) + tile_base_address + tile_address_offset;
const uint this_address = row_offset + (col * image_bpp);

// 1. Calculate row_addr
Expand Down Expand Up @@ -309,8 +311,8 @@ void do_memory_op(const in uint row, const in uint col)

// Calculate relative addresses and sample
uint linear_image_offset = (row * image_pitch) + (col * image_bpp);
uint tile_base_offset = tile_address - tile_base_address; // Distance from tile base address
uint tile_data_offset = tile_base_offset - tile_offset; // Distance from data base address
uint tile_base_offset = tile_address - tile_base_address; // Distance from tile base address
uint tile_data_offset = tile_base_offset - tile_rw_offset; // Distance from data base address

if (tile_base_offset >= tile_size)
{
Expand Down
17 changes: 10 additions & 7 deletions rpcs3/Emu/RSX/VK/VKCompute.h
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,7 @@ namespace vk
{
u32 tile_base_address;
u32 tile_base_offset;
u32 tile_rw_offset;
u32 tile_size;
u32 tile_pitch;
u32 bank;
Expand All @@ -538,7 +539,8 @@ namespace vk
u32 num_tiles_per_row;
u32 tile_base_address;
u32 tile_size;
u32 tile_offset;
u32 tile_address_offset;
u32 tile_rw_offset;
u32 tile_pitch;
u32 tile_bank;
u32 image_width;
Expand All @@ -559,7 +561,7 @@ namespace vk
{
ssbo_count = 2;
use_push_constants = true;
push_constants_size = 48;
push_constants_size = sizeof(params);

create();

Expand Down Expand Up @@ -599,20 +601,20 @@ namespace vk
this->in_offset = config.src_offset;
this->out_offset = config.dst_offset;

const auto tiled_height = std::min(
const auto tile_aligned_height = std::min(
utils::align<u32>(config.image_height, 64),
utils::aligned_div(config.tile_size - config.tile_base_offset, config.tile_pitch)
);

if constexpr (Op == RSX_detiler_op::decode)
{
this->in_block_length = tiled_height * config.tile_pitch;
this->in_block_length = tile_aligned_height * config.tile_pitch;
this->out_block_length = config.image_height * config.image_pitch;
}
else
{
this->in_block_length = config.image_height * config.image_pitch;
this->out_block_length = tiled_height* config.tile_pitch;
this->out_block_length = tile_aligned_height * config.tile_pitch;
}

auto get_prime_factor = [](u32 pitch) -> std::pair<u32, u32>
Expand Down Expand Up @@ -642,12 +644,13 @@ namespace vk
params.factor = factor;
params.num_tiles_per_row = tiles_per_row;
params.tile_base_address = config.tile_base_address;
params.tile_rw_offset = config.tile_rw_offset;
params.tile_size = config.tile_size;
params.tile_offset = config.tile_base_offset;
params.tile_address_offset = config.tile_base_offset;
params.tile_pitch = config.tile_pitch;
params.tile_bank = config.bank;
params.image_width = config.image_width;
params.image_height = tiled_height;
params.image_height = (Op == RSX_detiler_op::decode) ? tile_aligned_height : config.image_height;
params.image_pitch = config.image_pitch;
params.image_bpp = config.image_bpp;
set_parameters(cmd);
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/RSX/VK/VKDMA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ namespace vk
// NOTE: Do not unmap. This can be extremely slow on some platforms.
}

std::pair<u32, buffer*> dma_block::get(const utils::address_range& range)
dma_mapping_handle dma_block::get(const utils::address_range& range)
{
if (inheritance_info.parent)
{
Expand Down Expand Up @@ -331,7 +331,7 @@ namespace vk
block->init(*g_render_device, base_address, expected_length);
}

std::pair<u32, vk::buffer*> map_dma(u32 local_address, u32 length)
dma_mapping_handle map_dma(u32 local_address, u32 length)
{
// Not much contention expected here, avoid searching twice
std::lock_guard lock(g_dma_mutex);
Expand Down
4 changes: 3 additions & 1 deletion rpcs3/Emu/RSX/VK/VKDMA.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

namespace vk
{
std::pair<u32, vk::buffer*> map_dma(u32 local_address, u32 length);
using dma_mapping_handle = std::pair<u32, vk::buffer*>;

dma_mapping_handle map_dma(u32 local_address, u32 length);
void load_dma(u32 local_address, u32 length);
void flush_dma(u32 local_address, u32 length);
void unmap_dma(u32 local_address, u32 length);
Expand Down
1 change: 1 addition & 0 deletions rpcs3/Emu/RSX/VK/VKTexture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1274,6 +1274,7 @@ namespace vk
{
.tile_base_address = tiled_region.base_address,
.tile_base_offset = range.start - tiled_region.base_address,
.tile_rw_offset = range.start - tiled_region.base_address, // TODO
.tile_size = tiled_region.tile->size,
.tile_pitch = tiled_region.tile->pitch,
.bank = tiled_region.tile->bank,
Expand Down