Skip to content

Commit

Permalink
rsx: Temporary workaround for race condition in blit engine
Browse files Browse the repository at this point in the history
  • Loading branch information
kd-11 committed Aug 16, 2019
1 parent 87bd5b6 commit 05caa10
Showing 1 changed file with 53 additions and 32 deletions.
85 changes: 53 additions & 32 deletions rpcs3/Emu/RSX/Common/texture_cache.h
Expand Up @@ -2394,8 +2394,21 @@ namespace rsx
};

// Check if src/dst are parts of render targets
auto dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false);
dst_is_render_target = dst_subres.surface != nullptr;
typename surface_store_type::surface_overlap_info dst_subres;
if (dst_address > 0xc0000000)
{
// TODO: HACK
// After writing, it is required to lock the memory range from access!
dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false);
dst_is_render_target = dst_subres.surface != nullptr;
}
else
{
// Surface exists in local memory.
// 1. Invalidate surfaces in range
// 2. Proceed as normal, blit into a 'normal' surface and any upload routines should catch it
m_rtts.invalidate_range(utils::address_range::start_length(dst_address, dst.pitch * dst_h));
}

// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, false);
Expand Down Expand Up @@ -2743,6 +2756,29 @@ namespace rsx
src_area.y2 += scaled_clip_offset_y;
}

// Calculate number of bytes actually modified
u32 mem_base, mem_length;
if (dst_is_render_target)
{
mem_base = dst_address - dst_subres.base_address;
}
else
{
mem_base = dst_address - dst.rsx_address;
}

if (dst.clip_height == 1)
{
mem_length = dst.clip_width * dst_bpp;
}
else
{
const u32 mem_excess = mem_base % dst.pitch;
mem_length = (dst.pitch * dst.clip_height) - mem_excess;
}

const auto modified_range = utils::address_range::start_length(dst_address, mem_length);

if (dest_texture == 0)
{
verify(HERE), !dst_is_render_target;
Expand All @@ -2755,13 +2791,6 @@ namespace rsx
const u32 section_length = std::max(write_end, expected_end) - dst.rsx_address;
dst_dimensions.height = section_length / dst.pitch;

lock.upgrade();

// NOTE: Invalidating for read also flushes framebuffers locked in the range and invalidates them (obj->test() will fail)
const auto rsx_range = address_range::start_length(dst.rsx_address, section_length);
// NOTE: Write flag set to remove all other overlapping regions (e.g shader_read or blit_src)
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward<Args>(extras)...);

// render target data is already in correct swizzle layout
auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order :
dst_is_argb8 ? rsx::texture_create_flags::default_component_order :
Expand All @@ -2773,6 +2802,12 @@ namespace rsx
dst_area.y1 += dst.offset_y;
dst_area.y2 += dst.offset_y;

lock.upgrade();

// NOTE: Write flag set to remove all other overlapping regions (e.g shader_read or blit_src)
const auto rsx_range = address_range::start_length(dst.rsx_address, section_length);
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward<Args>(extras)...);

if (!dst_area.x1 && !dst_area.y1 && dst_area.x2 == dst_dimensions.width && dst_area.y2 == dst_dimensions.height)
{
cached_dest = create_new_texture(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch,
Expand All @@ -2781,14 +2816,19 @@ namespace rsx
}
else
{
// HACK: workaround for data race with Cell
// Pre-lock the memory range we'll be touching, then load with super_ptr
const auto prot_range = modified_range.to_page_range();
utils::memory_protect(vm::base(prot_range.start), prot_range.length(), utils::protection::no);

const u16 pitch_in_block = dst.pitch / dst_bpp;
std::vector<rsx_subresource_layout> subresource_layout;
rsx_subresource_layout subres = {};
subres.width_in_block = dst_dimensions.width;
subres.height_in_block = dst_dimensions.height;
subres.pitch_in_block = pitch_in_block;
subres.depth = 1;
subres.data = { reinterpret_cast<const gsl::byte*>(vm::base(dst.rsx_address)), dst.pitch * dst_dimensions.height };
subres.data = { reinterpret_cast<const gsl::byte*>(vm::get_super_ptr(dst.rsx_address)), dst.pitch * dst_dimensions.height };
subresource_layout.push_back(subres);

cached_dest = upload_image_from_cpu(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch,
Expand All @@ -2804,29 +2844,8 @@ namespace rsx

verify(HERE), cached_dest || dst_is_render_target;

// Calculate number of bytes actually modified
u32 mem_base, mem_length;
if (dst_is_render_target)
{
mem_base = dst_address - dst_subres.base_address;
}
else
{
mem_base = dst_address - cached_dest->get_section_base();
}

if (dst.clip_height == 1)
{
mem_length = dst.clip_width * dst_bpp;
}
else
{
const u32 mem_excess = mem_base % dst.pitch;
mem_length = (dst.pitch * dst.clip_height) - mem_excess;
}

// Invalidate any cached subresources in modified range
notify_surface_changed(utils::address_range::start_length(dst_address, mem_length));
notify_surface_changed(modified_range);

if (cached_dest)
{
Expand All @@ -2840,6 +2859,8 @@ namespace rsx
}
else
{
// NOTE: This doesn't work very well in case of Cell access
// Need to lock the affected memory range and actually attach this subres to a locked_region
dst_subres.surface->on_write_copy(rsx::get_shared_tag());
m_rtts.notify_memory_structure_changed();
}
Expand Down

0 comments on commit 05caa10

Please sign in to comment.