Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rsx: Minor surface cache improvements #11335

Merged
merged 4 commits into from
Jan 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 173 additions & 37 deletions rpcs3/Emu/RSX/Common/surface_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,177 @@ namespace rsx
invalidated_resources.push_back(std::move(storage));
}

int remove_duplicates_fast_impl(std::vector<surface_overlap_info>& sections, const rsx::address_range& range)
{
// Range tests to check for gaps
std::list<utils::address_range> m_ranges;
bool invalidate_sections = false;
int removed_count = 0;

for (auto it = sections.crbegin(); it != sections.crend(); ++it)
{
auto this_range = it->surface->get_memory_range();
if (invalidate_sections)
{
if (this_range.inside(range))
{
invalidate_surface_address(it->base_address, it->is_depth);
removed_count++;
}
continue;
}

if (it->surface->get_rsx_pitch() != it->surface->get_native_pitch() &&
it->surface->get_surface_height() != 1)
{
// Memory gap in descriptor
continue;
}

// Insert the range, respecting sort order
bool inserted = false;
for (auto iter = m_ranges.begin(); iter != m_ranges.end(); ++iter)
{
if (this_range.start < iter->start)
{
// This range slots in here. Test ranges after this one to find the end position
auto pos = iter;
for (auto _p = ++iter; _p != m_ranges.end();)
{
if (_p->start > (this_range.end + 1))
{
// Gap
break;
}

// Consume
this_range.end = std::max(this_range.end, _p->end);
_p = m_ranges.erase(_p);
}

m_ranges.insert(pos, this_range);
break;
}
}

if (!inserted)
{
m_ranges.push_back(this_range);
}
else if (m_ranges.size() == 1 && range.inside(m_ranges.front()))
{
invalidate_sections = true;
}
}

return removed_count;
}

void remove_duplicates_fallback_impl(std::vector<surface_overlap_info>& sections, const rsx::address_range& range)
{
// Generic painter's algorithm to detect obsolete sections
ensure(range.length() < 64 * 0x100000);
std::vector<u8> marker(range.length(), 0);

auto compare_and_tag_row = [&](u32 offset, u32 length) -> bool
{
bool valid = false;
for (u32 i = 0; i < (length / 8); ++i, offset += 8, length -= 8)
{
auto dest = reinterpret_cast<u64*>(marker.data() + offset);
valid |= (*dest != umax);
*dest = umax;
}

if (length >= 4)
{
auto dest = reinterpret_cast<u32*>(marker.data() + offset);
valid |= (*dest != umax);
*dest = umax;

offset += 4;
length -= 4;
}

if (length >= 2)
{
auto dest = reinterpret_cast<u16*>(marker.data() + offset);
valid |= (*dest != umax);
*dest = umax;

offset += 2;
length -= 2;
}

if (length)
{
auto dest = (marker.data() + offset);
valid |= (*dest != umax);
*dest = umax;
}

return valid;
};

for (auto it = sections.crbegin(); it != sections.crend(); ++it)
{
auto this_range = it->surface->get_memory_range();
ensure(this_range.overlaps(range));

const auto native_pitch = it->surface->get_surface_width(rsx::surface_metrics::bytes);
const auto rsx_pitch = it->surface->get_rsx_pitch();
auto num_rows = it->surface->get_surface_height(rsx::surface_metrics::samples);
bool valid = false;

if (this_range.start < range.start)
{
// Starts outside bounds
const auto internal_offset = (range.start - this_range.start);
const auto row_num = internal_offset / rsx_pitch;
const auto row_offset = internal_offset % rsx_pitch;

// This section is unconditionally valid
valid = true;

if (row_offset < native_pitch)
{
compare_and_tag_row(0, native_pitch - row_offset);
}

// Jump to next row...
this_range.start = this_range.start + (row_num + 1) * rsx_pitch;
}

if (this_range.end > range.end)
{
// Unconditionally valid
valid = true;
this_range.end = range.end;
}

if (valid)
{
if (this_range.start >= this_range.end)
{
continue;
}

num_rows = utils::aligned_div(this_range.length(), rsx_pitch);
}

for (u32 row = 0, offset = (this_range.start - range.start); row < num_rows; ++row, offset += rsx_pitch)
{
valid |= compare_and_tag_row(offset, std::min<u32>(native_pitch, (this_range.end - offset + 1)));
}

if (!valid)
{
rsx_log.error("Stale surface at address 0x%x will be deleted", it->base_address);
invalidate_surface_address(it->base_address, it->is_depth);
}
}
}

protected:
/**
* If render target already exists at address, issue state change operation on cmdList.
Expand Down Expand Up @@ -923,44 +1094,9 @@ namespace rsx

void check_for_duplicates(std::vector<surface_overlap_info>& sections, const rsx::address_range& range)
{
// Generic painter's algorithm to detect obsolete sections
ensure(range.length() < 64 * 0x100000);
std::vector<u8> marker(range.length());
std::memset(marker.data(), 0, range.length());

for (auto it = sections.crbegin(); it != sections.crend(); ++it)
if (!remove_duplicates_fast_impl(sections, range))
{
if (!it->surface->get_memory_range().inside(range))
{
continue;
}

const auto true_pitch_in_bytes = it->surface->get_surface_width(rsx::surface_metrics::bytes);
const auto true_height_in_rows = it->surface->get_surface_height(rsx::surface_metrics::samples);

bool valid = false;
auto addr = it->base_address - range.start;
auto data = marker.data();

for (usz row = 0; row < true_height_in_rows; ++row)
{
for (usz col = 0; col < true_pitch_in_bytes; ++col)
{
if (const auto loc = col + addr; !data[loc])
{
valid = true;
data[loc] = 1;
}
}

addr += true_pitch_in_bytes;
}

if (!valid)
{
rsx_log.error("Stale surface at address 0x%x will be deleted", it->base_address);
invalidate_surface_address(it->base_address, it->is_depth);
}
remove_duplicates_fallback_impl(sections, range);
}
}

Expand Down
34 changes: 28 additions & 6 deletions rpcs3/Emu/RSX/Common/texture_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -1875,23 +1875,45 @@ namespace rsx
if (result_is_valid)
{
// Check for possible duplicates
usz max_safe_sections = u32{umax};
usz max_overdraw_ratio = u32{ umax };
usz max_safe_sections = u32{ umax };

switch (result.external_subresource_desc.op)
{
case deferred_request_command::atlas_gather:
max_safe_sections = 8 + attr.mipmaps; break;
max_overdraw_ratio = 150;
max_safe_sections = 8 + 2 * attr.mipmaps;
break;
case deferred_request_command::cubemap_gather:
max_safe_sections = 8 * attr.mipmaps; break;
max_overdraw_ratio = 150;
max_safe_sections = 6 * 2 * attr.mipmaps;
break;
case deferred_request_command::_3d_gather:
max_safe_sections = (attr.depth * attr.mipmaps * 110) / 100; break; // 10% factor of safety
// 3D gather can have very many input sections, try to keep section count low
max_overdraw_ratio = 125;
max_safe_sections = (attr.depth * attr.mipmaps * 110) / 100;
break;
default:
break;
}

if (overlapping_fbos.size() > max_safe_sections)
{
rsx_log.error("[Performance warning] Texture gather routine encountered too many objects!");
m_rtts.check_for_duplicates(overlapping_fbos, memory_range);
// Are we really over-budget?
u32 coverage_size = 0;
for (const auto& section : overlapping_fbos)
{
const auto area = section.surface->get_native_pitch() * section.surface->get_surface_height(rsx::surface_metrics::bytes);
coverage_size += area;
}

if (const auto coverage_ratio = (coverage_size * 100ull) / memory_range.length();
coverage_ratio > max_overdraw_ratio)
{
rsx_log.error("[Performance warning] Texture gather routine encountered too many objects! Operation=%d, Mipmaps=%d, Depth=%d, Sections=%zu, Ratio=%llu%",
static_cast<int>(result.external_subresource_desc.op), attr.mipmaps, attr.depth, overlapping_fbos.size(), coverage_ratio);
m_rtts.check_for_duplicates(overlapping_fbos, memory_range);
}
}

// Optionally disallow caching if resource is being written to as it is being read from
Expand Down