diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index e31b094fc5b7..4719d6f1188c 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -896,4 +896,57 @@ u32 get_remap_encoding(const std::pair, std::array>& re encode |= (remap.second[2] << 12); encode |= (remap.second[3] << 14); return encode; +} + +std::pair get_compatible_gcm_format(rsx::surface_color_format format) +{ + switch (format) + { + case rsx::surface_color_format::r5g6b5: + return{ CELL_GCM_TEXTURE_R5G6B5, false }; + + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + case rsx::surface_color_format::a8r8g8b8: + return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; //verified + + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::a8b8g8r8: + return{ CELL_GCM_TEXTURE_A8R8G8B8, false }; + + case rsx::surface_color_format::w16z16y16x16: + return{ CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT, true }; + + case rsx::surface_color_format::w32z32y32x32: + return{ CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT, true }; + + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + return{ CELL_GCM_TEXTURE_A1R5G5B5, false }; + + case rsx::surface_color_format::b8: + return{ CELL_GCM_TEXTURE_B8, false }; + + case rsx::surface_color_format::g8b8: + return{ CELL_GCM_TEXTURE_G8B8, true }; + + case rsx::surface_color_format::x32: + return{ CELL_GCM_TEXTURE_X32_FLOAT, true }; //verified + default: + fmt::throw_exception("Unhandled surface format 0x%x", (u32)format); + } +} + +std::pair get_compatible_gcm_format(rsx::surface_depth_format format) +{ + switch (format) + { + case rsx::surface_depth_format::z16: + return{ CELL_GCM_TEXTURE_DEPTH16, true }; + case rsx::surface_depth_format::z24s8: + return{ CELL_GCM_TEXTURE_DEPTH24_D8, true }; + default: + ASSUME(0); + } } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index a6ca6703b4de..14a9b787913a 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -146,3 +146,9 @@ u32 get_format_packed_pitch(u32 format, u16 width, bool border = false, bool swi * Reverse encoding */ u32 get_remap_encoding(const std::pair, std::array>& remap); + +/** + * Get gcm texel layout. Returns + */ +std::pair get_compatible_gcm_format(rsx::surface_color_format format); +std::pair get_compatible_gcm_format(rsx::surface_depth_format format); diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 108acf573a19..a6102cebf398 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -311,7 +311,7 @@ namespace rsx if (ignore) continue; - this_address = surface->memory_tag_samples[0].first; + this_address = surface->base_addr; verify(HERE), this_address; } @@ -363,6 +363,7 @@ namespace rsx surface_storage_type new_surface_storage; surface_type old_surface = nullptr; surface_type new_surface = nullptr; + bool do_intersection_test = true; bool store = true; address_range *storage_bounds; @@ -404,10 +405,13 @@ namespace rsx { // Preserve memory outside the area to be inherited if needed split_surface_region(command_list, address, Traits::get(surface), (u16)width, (u16)height, bpp, antialias); + old_surface = Traits::get(surface); } - old_surface = Traits::get(surface); + // This will be unconditionally moved to invalidated list shortly + Traits::notify_surface_invalidated(surface); old_surface_storage = std::move(surface); + primary_storage->erase(It); } } @@ -428,10 +432,9 @@ namespace rsx new_surface_storage = std::move(surface); Traits::notify_surface_reused(new_surface_storage); - if (old_surface) + if (old_surface_storage) { // Exchange this surface with the invalidated one - Traits::notify_surface_invalidated(old_surface_storage); surface = std::move(old_surface_storage); } else @@ -449,10 +452,9 @@ namespace rsx } // Check for stale storage - if (old_surface != nullptr && new_surface == nullptr) + if (old_surface_storage) { // This was already determined to be invalid and is excluded from testing above - Traits::notify_surface_invalidated(old_surface_storage); invalidated_resources.push_back(std::move(old_surface_storage)); } @@ -463,36 +465,46 @@ namespace rsx new_surface = Traits::get(new_surface_storage); } - if (!old_surface) + // Remove and preserve if possible any overlapping/replaced surface from the other pool + auto aliased_surface = secondary_storage->find(address); + if (aliased_surface != secondary_storage->end()) { - // Remove and preserve if possible any overlapping/replaced surface from the other pool - auto aliased_surface = secondary_storage->find(address); - if (aliased_surface != secondary_storage->end()) + if (Traits::surface_is_pitch_compatible(aliased_surface->second, pitch)) { - if (Traits::surface_is_pitch_compatible(aliased_surface->second, pitch)) + auto surface = Traits::get(aliased_surface->second); + split_surface_region(command_list, address, surface, (u16)width, (u16)height, bpp, antialias); + + if (!old_surface || old_surface->last_use_tag < surface->last_use_tag) { - old_surface = Traits::get(aliased_surface->second); - split_surface_region(command_list, address, old_surface, (u16)width, (u16)height, bpp, antialias); + // TODO: This can leak data outside inherited bounds + old_surface = surface; } - - Traits::notify_surface_invalidated(aliased_surface->second); - invalidated_resources.push_back(std::move(aliased_surface->second)); - secondary_storage->erase(aliased_surface); } - } - bool do_intersection_test = true; + Traits::notify_surface_invalidated(aliased_surface->second); + invalidated_resources.push_back(std::move(aliased_surface->second)); + secondary_storage->erase(aliased_surface); + } // Check if old_surface is 'new' and hopefully avoid intersection - if (old_surface && old_surface->last_use_tag >= write_tag) + if (old_surface) { - const auto new_area = new_surface->get_normalized_memory_area(); - const auto old_area = old_surface->get_normalized_memory_area(); - - if (new_area.x2 <= old_area.x2 && new_area.y2 <= old_area.y2) + if (old_surface->last_use_tag < new_surface->last_use_tag) { - do_intersection_test = false; - new_surface->set_old_contents(old_surface); + // Can happen if aliasing occurs; a probable condition due to memory splitting + // This is highly unlikely but is possible in theory + old_surface = nullptr; + } + else if (old_surface->last_use_tag >= write_tag) + { + const auto new_area = new_surface->get_normalized_memory_area(); + const auto old_area = old_surface->get_normalized_memory_area(); + + if (new_area.x2 <= old_area.x2 && new_area.y2 <= old_area.y2) + { + do_intersection_test = false; + new_surface->set_old_contents(old_surface); + } } } @@ -507,7 +519,7 @@ namespace rsx (*primary_storage)[address] = std::move(new_surface_storage); } - verify(HERE), new_surface->get_spp() == get_format_sample_count(antialias); + verify(HERE), !old_surface_storage, new_surface->get_spp() == get_format_sample_count(antialias); return new_surface; } @@ -704,7 +716,7 @@ namespace rsx } template - std::vector get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u8 required_bpp) + std::vector get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u8 required_bpp, rsx::surface_access access) { std::vector result; std::vector> dirty; @@ -727,12 +739,6 @@ namespace rsx if ((this_address + texture_size) <= texaddr) continue; - if (surface->read_barrier(cmd); !surface->test()) - { - dirty.emplace_back(this_address, is_depth); - continue; - } - surface_overlap_info info; info.surface = surface; info.base_address = this_address; @@ -777,6 +783,13 @@ namespace rsx info.height = std::min(required_height, normalized_surface_height - info.src_y); } + // Delay this as much as possible to avoid side-effects of spamming barrier + if (surface->memory_barrier(cmd, access); !surface->test()) + { + dirty.emplace_back(this_address, is_depth); + continue; + } + info.is_clipped = (info.width < required_width || info.height < required_height); if (auto surface_bpp = surface->get_bpp(); UNLIKELY(surface_bpp != required_bpp)) @@ -831,7 +844,7 @@ namespace rsx return result; } - void on_write(u32 address = 0) + void on_write(bool color, bool z, u32 address = 0) { if (!address) { @@ -839,14 +852,17 @@ namespace rsx { if (m_invalidate_on_write) { - for (int i = m_bound_render_targets_config.first, count = 0; - count < m_bound_render_targets_config.second; - ++i, ++count) + if (color) { - m_bound_render_targets[i].second->on_invalidate_children(); + for (int i = m_bound_render_targets_config.first, count = 0; + count < m_bound_render_targets_config.second; + ++i, ++count) + { + m_bound_render_targets[i].second->on_invalidate_children(); + } } - if (m_bound_depth_stencil.first) + if (z && m_bound_depth_stencil.first) { m_bound_depth_stencil.second->on_invalidate_children(); } @@ -860,33 +876,39 @@ namespace rsx } // Tag all available surfaces - for (int i = m_bound_render_targets_config.first, count = 0; - count < m_bound_render_targets_config.second; - ++i, ++count) + if (color) { - m_bound_render_targets[i].second->on_write(write_tag); + for (int i = m_bound_render_targets_config.first, count = 0; + count < m_bound_render_targets_config.second; + ++i, ++count) + { + m_bound_render_targets[i].second->on_write(write_tag); + } } - if (m_bound_depth_stencil.first) + if (z && m_bound_depth_stencil.first) { m_bound_depth_stencil.second->on_write(write_tag); } } else { - for (int i = m_bound_render_targets_config.first, count = 0; - count < m_bound_render_targets_config.second; - ++i, ++count) + if (color) { - if (m_bound_render_targets[i].first != address) + for (int i = m_bound_render_targets_config.first, count = 0; + count < m_bound_render_targets_config.second; + ++i, ++count) { - continue; - } + if (m_bound_render_targets[i].first != address) + { + continue; + } - m_bound_render_targets[i].second->on_write(write_tag); + m_bound_render_targets[i].second->on_write(write_tag); + } } - if (m_bound_depth_stencil.first == address) + if (z && m_bound_depth_stencil.first == address) { m_bound_depth_stencil.second->on_write(write_tag); } @@ -922,5 +944,26 @@ namespace rsx rtt = std::make_pair(0, nullptr); } } + + void invalidate_range(const rsx::address_range& range) + { + for (auto &rtt : m_render_targets_storage) + { + if (range.overlaps(rtt.second->get_memory_range())) + { + rtt.second->clear_rw_barrier(); + rtt.second->state_flags |= rsx::surface_state_flags::erase_bkgnd; + } + } + + for (auto &ds : m_depth_stencil_storage) + { + if (range.overlaps(ds.second->get_memory_range())) + { + ds.second->clear_rw_barrier(); + ds.second->state_flags |= rsx::surface_state_flags::erase_bkgnd; + } + } + } }; } diff --git a/rpcs3/Emu/RSX/Common/surface_utils.h b/rpcs3/Emu/RSX/Common/surface_utils.h index 7960a6155e8e..8bc96c217cc5 100644 --- a/rpcs3/Emu/RSX/Common/surface_utils.h +++ b/rpcs3/Emu/RSX/Common/surface_utils.h @@ -6,6 +6,8 @@ #include "TextureUtils.h" #include "../rsx_utils.h" +#define ENABLE_SURFACE_CACHE_DEBUG 0 + namespace rsx { enum surface_state_flags : u32 @@ -128,7 +130,13 @@ namespace rsx struct render_target_descriptor { u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to - std::array, 5> memory_tag_samples; + u64 base_addr = 0; + +#if (ENABLE_SURFACE_CACHE_DEBUG) + u64 memory_hash = 0; +#else + std::array, 3> memory_tag_samples; +#endif std::vector> old_contents; @@ -286,27 +294,102 @@ namespace rsx return (state_flags != rsx::surface_state_flags::ready) || !old_contents.empty(); } +#if (ENABLE_SURFACE_CACHE_DEBUG) + u64 hash_block() const + { + const auto padding = (rsx_pitch - native_pitch) / 8; + const auto row_length = (native_pitch) / 8; + auto num_rows = (surface_height * samples_y); + auto ptr = reinterpret_cast(vm::g_sudo_addr + base_addr); + + auto col = row_length; + u64 result = 0; + + while (num_rows--) + { + while (col--) + { + result ^= *ptr++; + } + + ptr += padding; + col = row_length; + } + + return result; + } + + void queue_tag(u32 address) + { + base_addr = address; + } + + void sync_tag() + { + memory_hash = hash_block(); + } + + void shuffle_tag() + { + memory_hash = ~memory_hash; + } + bool test() const { - if (dirty()) + return hash_block() == memory_hash; + } + +#else + void queue_tag(u32 address) + { + base_addr = address; + + const u32 size_x = (native_pitch > 8)? (native_pitch - 8) : 0u; + const u32 size_y = u32(surface_height * samples_y) - 1u; + const position2u samples[] = + { + // NOTE: Sorted by probability to catch dirty flag + {0, 0}, + {size_x, size_y}, + {size_x / 2, size_y / 2}, + + // Auxilliary, highly unlikely to ever catch anything + // NOTE: Currently unused as length of samples is truncated to 3 + {size_x, 0}, + {0, size_y}, + }; + + for (int n = 0; n < memory_tag_samples.size(); ++n) { - // TODO - // Should RCB or mem-sync (inherit previous mem) to init memory - LOG_TODO(RSX, "Resource used before memory initialization"); + const auto sample_offset = (samples[n].y * rsx_pitch) + samples[n].x; + memory_tag_samples[n].first = (sample_offset + base_addr); } + } - // Tags are tested in an X pattern - for (const auto &tag : memory_tag_samples) + void sync_tag() + { + for (auto &e : memory_tag_samples) { - if (!tag.first) - break; + e.second = *reinterpret_cast(vm::g_sudo_addr + e.first); + } + } - if (tag.second != *reinterpret_cast(vm::g_sudo_addr + tag.first)) + void shuffle_tag() + { + memory_tag_samples[0].second = memory_tag_samples[0].second; + } + + bool test() + { + for (auto &e : memory_tag_samples) + { + if (e.second != *reinterpret_cast(vm::g_sudo_addr + e.first)) return false; } return true; } +#endif void clear_rw_barrier() { @@ -415,51 +498,6 @@ namespace rsx } } - void queue_tag(u32 address) - { - for (unsigned i = 0; i < memory_tag_samples.size(); ++i) - { - if (LIKELY(i)) - memory_tag_samples[i].first = 0; - else - memory_tag_samples[i].first = address; // Top left - } - - const u32 pitch = get_native_pitch(); - if (UNLIKELY(pitch < 16)) - { - // Not enough area to gather samples if pitch is too small - return; - } - - // Top right corner - memory_tag_samples[1].first = address + pitch - 8; - - if (const u32 h = get_surface_height(); h > 1) - { - // Last row - const u32 pitch2 = get_rsx_pitch(); - const u32 last_row_offset = pitch2 * (h - 1); - memory_tag_samples[2].first = address + last_row_offset; // Bottom left corner - memory_tag_samples[3].first = address + last_row_offset + pitch - 8; // Bottom right corner - - // Centroid - const u32 center_row_offset = pitch2 * (h / 2); - memory_tag_samples[4].first = address + center_row_offset + pitch / 2; - } - } - - void sync_tag() - { - for (auto &tag : memory_tag_samples) - { - if (!tag.first) - break; - - tag.second = *reinterpret_cast(vm::g_sudo_addr + tag.first); - } - } - void on_write(u64 write_tag = 0, rsx::surface_state_flags resolve_flags = surface_state_flags::require_resolve) { if (write_tag) @@ -516,7 +554,7 @@ namespace rsx rsx::address_range get_memory_range() const { const u32 internal_height = get_surface_height(rsx::surface_metrics::samples); - return rsx::address_range::start_length(memory_tag_samples[0].first, internal_height * get_rsx_pitch()); + return rsx::address_range::start_length(base_addr, internal_height * get_rsx_pitch()); } template diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 71e0fa25db4b..782666426ba2 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1609,8 +1609,6 @@ namespace rsx return; } - section.surface->read_barrier(cmd); - // How much of this slice to read? int rebased = int(section.dst_y) - slice_begin; const auto src_x = section.src_x; @@ -2086,7 +2084,7 @@ namespace rsx // NOTE: Compressed formats require a reupload, facilitated by blit synchronization and/or WCB and are not handled here const auto bpp = get_format_block_size_in_bytes(format); - const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch, bpp); + const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch, bpp, rsx::surface_access::read); if (!overlapping_fbos.empty() || !overlapping_locals.empty()) { @@ -2344,7 +2342,7 @@ namespace rsx auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, bool allow_clipped) -> typename surface_store_type::surface_overlap_info { - const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp); + const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, rsx::surface_access::transfer); if (list.empty()) { return {}; @@ -2396,8 +2394,21 @@ namespace rsx }; // Check if src/dst are parts of render targets - auto dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false); - dst_is_render_target = dst_subres.surface != nullptr; + typename surface_store_type::surface_overlap_info dst_subres; + if (dst_address > 0xc0000000) + { + // TODO: HACK + // After writing, it is required to lock the memory range from access! + dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false); + dst_is_render_target = dst_subres.surface != nullptr; + } + else + { + // Surface exists in local memory. + // 1. Invalidate surfaces in range + // 2. Proceed as normal, blit into a 'normal' surface and any upload routines should catch it + m_rtts.invalidate_range(utils::address_range::start_length(dst_address, dst.pitch * dst_h)); + } // TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, false); @@ -2444,8 +2455,6 @@ namespace rsx if (src_is_render_target) { - src_subres.surface->read_barrier(cmd); - const auto surf = src_subres.surface; const auto bpp = surf->get_bpp(); if (bpp != src_bpp) @@ -2460,9 +2469,6 @@ namespace rsx if (dst_is_render_target) { - // Full barrier is required in case of partial transfers - dst_subres.surface->read_barrier(cmd); - auto bpp = dst_subres.surface->get_bpp(); if (bpp != dst_bpp) { @@ -2750,6 +2756,29 @@ namespace rsx src_area.y2 += scaled_clip_offset_y; } + // Calculate number of bytes actually modified + u32 mem_base, mem_length; + if (dst_is_render_target) + { + mem_base = dst_address - dst_subres.base_address; + } + else + { + mem_base = dst_address - dst.rsx_address; + } + + if (dst.clip_height == 1) + { + mem_length = dst.clip_width * dst_bpp; + } + else + { + const u32 mem_excess = mem_base % dst.pitch; + mem_length = (dst.pitch * dst.clip_height) - mem_excess; + } + + const auto modified_range = utils::address_range::start_length(dst_address, mem_length); + if (dest_texture == 0) { verify(HERE), !dst_is_render_target; @@ -2762,13 +2791,6 @@ namespace rsx const u32 section_length = std::max(write_end, expected_end) - dst.rsx_address; dst_dimensions.height = section_length / dst.pitch; - lock.upgrade(); - - // NOTE: Invalidating for read also flushes framebuffers locked in the range and invalidates them (obj->test() will fail) - const auto rsx_range = address_range::start_length(dst.rsx_address, section_length); - // NOTE: Write flag set to remove all other overlapping regions (e.g shader_read or blit_src) - invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward(extras)...); - // render target data is already in correct swizzle layout auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order : dst_is_argb8 ? rsx::texture_create_flags::default_component_order : @@ -2780,6 +2802,12 @@ namespace rsx dst_area.y1 += dst.offset_y; dst_area.y2 += dst.offset_y; + lock.upgrade(); + + // NOTE: Write flag set to remove all other overlapping regions (e.g shader_read or blit_src) + const auto rsx_range = address_range::start_length(dst.rsx_address, section_length); + invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward(extras)...); + if (!dst_area.x1 && !dst_area.y1 && dst_area.x2 == dst_dimensions.width && dst_area.y2 == dst_dimensions.height) { cached_dest = create_new_texture(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch, @@ -2788,6 +2816,11 @@ namespace rsx } else { + // HACK: workaround for data race with Cell + // Pre-lock the memory range we'll be touching, then load with super_ptr + const auto prot_range = modified_range.to_page_range(); + utils::memory_protect(vm::base(prot_range.start), prot_range.length(), utils::protection::no); + const u16 pitch_in_block = dst.pitch / dst_bpp; std::vector subresource_layout; rsx_subresource_layout subres = {}; @@ -2795,7 +2828,7 @@ namespace rsx subres.height_in_block = dst_dimensions.height; subres.pitch_in_block = pitch_in_block; subres.depth = 1; - subres.data = { reinterpret_cast(vm::base(dst.rsx_address)), dst.pitch * dst_dimensions.height }; + subres.data = { reinterpret_cast(vm::get_super_ptr(dst.rsx_address)), dst.pitch * dst_dimensions.height }; subresource_layout.push_back(subres); cached_dest = upload_image_from_cpu(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch, @@ -2811,29 +2844,8 @@ namespace rsx verify(HERE), cached_dest || dst_is_render_target; - // Calculate number of bytes actually modified - u32 mem_base, mem_length; - if (dst_is_render_target) - { - mem_base = dst_address - dst_subres.base_address; - } - else - { - mem_base = dst_address - cached_dest->get_section_base(); - } - - if (dst.clip_height == 1) - { - mem_length = dst.clip_width * dst_bpp; - } - else - { - const u32 mem_excess = mem_base % dst.pitch; - mem_length = (dst.pitch * dst.clip_height) - mem_excess; - } - // Invalidate any cached subresources in modified range - notify_surface_changed(utils::address_range::start_length(dst_address, mem_length)); + notify_surface_changed(modified_range); if (cached_dest) { @@ -2847,6 +2859,8 @@ namespace rsx } else { + // NOTE: This doesn't work very well in case of Cell access + // Need to lock the affected memory range and actually attach this subres to a locked_region dst_subres.surface->on_write_copy(rsx::get_shared_tag()); m_rtts.notify_memory_structure_changed(); } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 53bc6fbe797b..c55164d06b43 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -626,7 +626,7 @@ void GLGSRender::end() } } while (rsx::method_registers.current_draw_clause.next()); - m_rtts.on_write(); + m_rtts.on_write(rsx::method_registers.color_write_enabled(), rsx::method_registers.depth_write_enabled()); m_attrib_ring_buffer->notify(); m_index_ring_buffer->notify(); @@ -1152,7 +1152,7 @@ void GLGSRender::clear_surface(u32 arg) if (require_mem_load) ds->write_barrier(cmd); // Memory has been initialized - m_rtts.on_write(std::get<0>(m_rtts.m_bound_depth_stencil)); + m_rtts.on_write(false, true); } } @@ -1189,7 +1189,7 @@ void GLGSRender::clear_surface(u32 arg) if (const auto address = rtt.first) { if (require_mem_load) rtt.second->write_barrier(cmd); - m_rtts.on_write(address); + m_rtts.on_write(true, false, address); } } @@ -1629,7 +1629,7 @@ void GLGSRender::flip(int buffer, bool emu_flip) else { gl::command_context cmd = { gl_state }; - const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp()); + const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read); if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) { diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 22a3306cdcc8..7df1454894a6 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -223,8 +223,6 @@ struct gl_render_target_traits prev.target = sink.get(); - sink->sync_tag(); - if (!sink->old_contents.empty()) { // Deal with this, likely only needs to clear diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 16509a9d78da..be391dd13bcc 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -122,51 +122,6 @@ namespace vk } } - std::pair get_compatible_gcm_format(rsx::surface_color_format color_format) - { - switch (color_format) - { - case rsx::surface_color_format::r5g6b5: - return{ CELL_GCM_TEXTURE_R5G6B5, false }; - - case rsx::surface_color_format::a8r8g8b8: - return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; //verified - - case rsx::surface_color_format::a8b8g8r8: - return{ CELL_GCM_TEXTURE_A8R8G8B8, false }; - - case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: - case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: - return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; - - case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: - case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: - return{ CELL_GCM_TEXTURE_A8R8G8B8, false }; - - case rsx::surface_color_format::w16z16y16x16: - return{ CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT, true }; - - case rsx::surface_color_format::w32z32y32x32: - return{ CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT, true }; - - case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: - case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: - return{ CELL_GCM_TEXTURE_A1R5G5B5, false }; - - case rsx::surface_color_format::b8: - return{ CELL_GCM_TEXTURE_B8, false }; - - case rsx::surface_color_format::g8b8: - return{ CELL_GCM_TEXTURE_G8B8, true }; - - case rsx::surface_color_format::x32: - return{ CELL_GCM_TEXTURE_X32_FLOAT, true }; //verified - - default: - return{ CELL_GCM_TEXTURE_A8R8G8B8, false }; - } - } - VkLogicOp get_logic_op(rsx::logic_op op) { switch (op) @@ -1778,7 +1733,7 @@ void VKGSRender::end() close_render_pass(); vk::leave_uninterruptible(); - m_rtts.on_write(); + m_rtts.on_write(rsx::method_registers.color_write_enabled(), rsx::method_registers.depth_write_enabled()); rsx::thread::end(); } @@ -2079,7 +2034,7 @@ void VKGSRender::clear_surface(u32 mask) if (const auto address = rtt.first) { if (require_mem_load) rtt.second->write_barrier(*m_current_command_buffer); - m_rtts.on_write(address); + m_rtts.on_write(true, false, address); } } } @@ -2088,10 +2043,10 @@ void VKGSRender::clear_surface(u32 mask) if (depth_stencil_mask) { - if (const auto address = m_rtts.m_bound_depth_stencil.first) + if (m_rtts.m_bound_depth_stencil.first) { if (require_mem_load) m_rtts.m_bound_depth_stencil.second->write_barrier(*m_current_command_buffer); - m_rtts.on_write(address); + m_rtts.on_write(false, true); clear_descriptors.push_back({ (VkImageAspectFlags)depth_stencil_mask, 0, depth_stencil_clear_values }); } } @@ -2762,22 +2717,9 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ m_vertex_layout_ring_info.unmap(); } -void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading) +void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool) { prepare_rtts(context); - - if (!skip_reading) - { - read_buffers(); - } -} - -void VKGSRender::read_buffers() -{ -} - -void VKGSRender::write_buffers() -{ } void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags) @@ -2948,7 +2890,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) flush_command_queue(); } - const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format); + const auto color_fmt_info = get_compatible_gcm_format(layout.color_format); for (u8 index : m_draw_buffers) { if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; @@ -3003,7 +2945,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { if (!g_cfg.video.write_color_buffers) continue; - auto info = vk::get_compatible_gcm_format(surface->get_surface_color_format()); + auto info = get_compatible_gcm_format(surface->get_surface_color_format()); gcm_format = info.first; swap_bytes = info.second; } @@ -3282,7 +3224,7 @@ void VKGSRender::flip(int buffer, bool emu_flip) } else { - const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp()); + const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read); if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) { // Confirmed to be the newest data source in that range diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 328c1ff27f9c..501d919b5eb6 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -458,8 +458,6 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control public: void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false); - void read_buffers(); - void write_buffers(); void set_viewport(); void set_scissor(bool clip_viewport); void bind_viewport(); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index b975dd780e2a..9d577770e717 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -19,6 +19,10 @@ namespace vk std::unordered_map> g_typeless_textures; std::unordered_map> g_compute_tasks; + // General purpose upload heap + // TODO: Clean this up and integrate cleanly with VKGSRender + data_heap g_upload_heap; + // Garbage collection std::vector> g_deleted_typeless_textures; @@ -219,6 +223,16 @@ namespace vk return g_scratch_buffer.get(); } + data_heap* get_upload_heap() + { + if (!g_upload_heap.heap) + { + g_upload_heap.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 64 * 0x100000, "auxilliary upload heap"); + } + + return &g_upload_heap; + } + void acquire_global_submit_lock() { g_submit_mutex.lock(); @@ -241,6 +255,8 @@ namespace vk { vk::reset_compute_tasks(); vk::reset_resolve_resources(); + + g_upload_heap.reset_allocation_stats(); } void destroy_global_resources() @@ -254,6 +270,7 @@ namespace vk g_null_texture.reset(); g_null_image_view.reset(); g_scratch_buffer.reset(); + g_upload_heap.destroy(); g_typeless_textures.clear(); g_deleted_typeless_textures.clear(); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index ceac8c6a53e3..caeaaec3c716 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -118,6 +118,7 @@ namespace vk image_view* null_image_view(vk::command_buffer&); image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height); buffer* get_scratch_buffer(); + data_heap* get_upload_heap(); memory_type_mapping get_memory_mapping(const physical_device& dev); gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev); @@ -140,7 +141,7 @@ namespace vk */ void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - VkImageAspectFlags flags, vk::data_heap &upload_heap); + VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align = 256); //Other texture management helpers void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range); diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index c5b18d65caf8..744f1db558fe 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -13,59 +13,42 @@ namespace vk void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); - struct render_target : public viewable_image, public rsx::ref_counted, public rsx::render_target_descriptor + class render_target : public viewable_image, public rsx::ref_counted, public rsx::render_target_descriptor { - u64 frame_tag = 0; // frame id when invalidated, 0 if not invalid - - using viewable_image::viewable_image; - - vk::viewable_image* get_surface(rsx::surface_access access_type) override + // Get the linear resolve target bound to this surface. Initialize if none exists + vk::viewable_image* get_resolve_target_safe(vk::command_buffer& cmd) { - if (samples() == 1 || access_type == rsx::surface_access::write) + if (!resolve_surface) { - return this; - } - - // A read barrier should have been called before this! - verify("Read access without explicit barrier" HERE), resolve_surface, !(msaa_flags & rsx::surface_state_flags::require_resolve); - return resolve_surface.get(); - } + // Create a resolve surface + auto pdev = vk::get_current_renderer(); + const auto resolve_w = width() * samples_x; + const auto resolve_h = height() * samples_y; - bool is_depth_surface() const override - { - return !!(aspect() & VK_IMAGE_ASPECT_DEPTH_BIT); - } - - void release_ref(vk::viewable_image* t) const override - { - static_cast(t)->release(); - } + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + usage |= (this->info.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); - bool matches_dimensions(u16 _width, u16 _height) const - { - //Use forward scaling to account for rounding and clamping errors - return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height()); - } + resolve_surface.reset(new vk::viewable_image( + *pdev, + pdev->get_memory_mapping().device_local, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VK_IMAGE_TYPE_2D, + format(), + resolve_w, resolve_h, 1, 1, 1, + VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, + usage, + 0)); - image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap, - VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) override - { - if (remap_encoding != 0xDEADBEEF && resolve_surface) - { - return resolve_surface->get_view(remap_encoding, remap, mask); + resolve_surface->native_component_map = native_component_map; + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); } - else - { - if (remap_encoding == 0xDEADBEEF) - { - // Special encoding to skip the resolve target fetch - remap_encoding = 0xAAE4; - } - return vk::viewable_image::get_view(remap_encoding, remap, mask); - } + return resolve_surface.get(); } + // Resolve the planar MSAA data into a linear block void resolve(vk::command_buffer& cmd) { VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 }; @@ -140,6 +123,7 @@ namespace vk msaa_flags &= ~(rsx::surface_state_flags::require_resolve); } + // Unresolve the linear data into planar MSAA data void unresolve(vk::command_buffer& cmd) { verify(HERE), !(msaa_flags & rsx::surface_state_flags::require_resolve); @@ -212,94 +196,229 @@ namespace vk msaa_flags &= ~(rsx::surface_state_flags::require_unresolve); } - void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access) + // Default-initialize memory without loading + void clear_memory(vk::command_buffer& cmd, vk::image *surface) + { + const auto optimal_layout = (surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) ? + VK_IMAGE_LAYOUT_GENERAL : + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + surface->push_layout(cmd, optimal_layout); + + VkImageSubresourceRange range{ surface->aspect(), 0, 1, 0, 1 }; + if (surface->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) + { + VkClearColorValue color = { 0.f, 0.f, 0.f, 1.f }; + vkCmdClearColorImage(cmd, surface->value, surface->current_layout, &color, 1, &range); + } + else + { + VkClearDepthStencilValue clear{ 1.f, 255 }; + vkCmdClearDepthStencilImage(cmd, surface->value, surface->current_layout, &clear, 1, &range); + } + + surface->pop_layout(cmd); + + if (surface == this) + { + state_flags &= ~rsx::surface_state_flags::erase_bkgnd; + } + } + + // Load memory from cell and use to initialize the surface + void load_memory(vk::command_buffer& cmd) { - // Helper to optionally clear/initialize memory contents depending on barrier type - auto clear_surface_impl = [&cmd, this](vk::image* surface) + auto& upload_heap = *vk::get_upload_heap(); + + u32 gcm_format; + if (is_depth_surface()) + { + gcm_format = get_compatible_gcm_format(format_info.gcm_depth_format).first; + } + else { - const auto optimal_layout = (surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) ? - VK_IMAGE_LAYOUT_GENERAL : - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + auto fmt = get_compatible_gcm_format(format_info.gcm_color_format); + if (fmt.second) + { + switch (fmt.first) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_D8R8G8B8: + //Hack + gcm_format = CELL_GCM_TEXTURE_X32_FLOAT; + break; + default: + gcm_format = fmt.first; + break; + } + } + else + { + gcm_format = fmt.first; + } + } - surface->push_layout(cmd, optimal_layout); + rsx_subresource_layout subres{}; + subres.width_in_block = surface_width * samples_x; + subres.height_in_block = surface_height * samples_y; + subres.pitch_in_block = rsx_pitch / get_bpp(); + subres.depth = 1; + subres.data = { (const gsl::byte*)vm::get_super_ptr(base_addr), s32(rsx_pitch * surface_height * samples_y) }; + + if (LIKELY(g_cfg.video.resolution_scale_percent == 100 && samples() == 1)) + { + push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vk::copy_mipmaped_image_using_buffer(cmd, this, { subres }, gcm_format, false, 1, aspect(), upload_heap, rsx_pitch); + pop_layout(cmd); + } + else + { + vk::image* content = nullptr; + vk::image* final_dst = (samples() > 1) ? get_resolve_target_safe(cmd) : this; - VkImageSubresourceRange range{ surface->aspect(), 0, 1, 0, 1 }; - if (surface->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) + if (LIKELY(g_cfg.video.resolution_scale_percent == 100)) { - VkClearColorValue color = { 0.f, 0.f, 0.f, 1.f }; - vkCmdClearColorImage(cmd, surface->value, surface->current_layout, &color, 1, &range); + verify(HERE), samples() > 1; + content = get_resolve_target_safe(cmd); } else { - VkClearDepthStencilValue clear{ 1.f, 255 }; - vkCmdClearDepthStencilImage(cmd, surface->value, surface->current_layout, &clear, 1, &range); + content = vk::get_typeless_helper(format(), subres.width_in_block, subres.height_in_block); + content->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); } - surface->pop_layout(cmd); + // Load Cell data into temp buffer + vk::copy_mipmaped_image_using_buffer(cmd, content, { subres }, gcm_format, false, 1, aspect(), upload_heap, rsx_pitch); + + // Write into final image + if (content != final_dst) + { + vk::copy_scaled_image(cmd, content->value, final_dst->value, content->current_layout, final_dst->current_layout, + { 0, 0, subres.width_in_block, subres.height_in_block }, { 0, 0, (s32)final_dst->width(), (s32)final_dst->height() }, + 1, aspect(), true, aspect() == VK_IMAGE_ASPECT_COLOR_BIT ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, + format(), format()); + } + + if (samples() > 1) + { + // Trigger unresolve + msaa_flags = rsx::surface_state_flags::require_unresolve; + } + } + + state_flags &= ~rsx::surface_state_flags::erase_bkgnd; + } + + void initialize_memory(vk::command_buffer& cmd, bool read_access) + { + const bool memory_load = is_depth_surface() ? + !!g_cfg.video.read_depth_buffer : + !!g_cfg.video.read_color_buffers; - if (surface == this) + if (!memory_load) + { + clear_memory(cmd, this); + + if (read_access && samples() > 1) { - state_flags &= ~rsx::surface_state_flags::erase_bkgnd; + // Only clear the resolve surface if reading from it, otherwise it's a waste + clear_memory(cmd, get_resolve_target_safe(cmd)); } - }; - auto get_resolve_target = [&]() + msaa_flags = rsx::surface_state_flags::ready; + } + else + { + load_memory(cmd); + } + } + + public: + u64 frame_tag = 0; // frame id when invalidated, 0 if not invalid + using viewable_image::viewable_image; + + vk::viewable_image* get_surface(rsx::surface_access access_type) override + { + if (samples() == 1 || access_type == rsx::surface_access::write) + { + return this; + } + + // A read barrier should have been called before this! + verify("Read access without explicit barrier" HERE), resolve_surface, !(msaa_flags & rsx::surface_state_flags::require_resolve); + return resolve_surface.get(); + } + + bool is_depth_surface() const override + { + return !!(aspect() & VK_IMAGE_ASPECT_DEPTH_BIT); + } + + void release_ref(vk::viewable_image* t) const override + { + static_cast(t)->release(); + } + + bool matches_dimensions(u16 _width, u16 _height) const + { + //Use forward scaling to account for rounding and clamping errors + return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height()); + } + + image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap, + VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) override + { + if (remap_encoding != 0xDEADBEEF && resolve_surface) + { + return resolve_surface->get_view(remap_encoding, remap, mask); + } + else { - if (!resolve_surface) + if (remap_encoding == 0xDEADBEEF) { - // Create a resolve surface - auto pdev = vk::get_current_renderer(); - const auto resolve_w = width() * samples_x; - const auto resolve_h = height() * samples_y; - - VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - usage |= (this->info.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); - - resolve_surface.reset(new vk::viewable_image( - *pdev, - pdev->get_memory_mapping().device_local, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_IMAGE_TYPE_2D, - format(), - resolve_w, resolve_h, 1, 1, 1, - VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, - usage, - 0)); - - resolve_surface->native_component_map = native_component_map; - resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + // Special encoding to skip the resolve target fetch + remap_encoding = 0xAAE4; } - return resolve_surface.get(); - }; + return vk::viewable_image::get_view(remap_encoding, remap, mask); + } + } + void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access) + { const bool read_access = (access != rsx::surface_access::write); - if (samples() > 1 && read_access) + const bool is_depth = is_depth_surface(); + + if ((g_cfg.video.read_color_buffers && !is_depth) || + (g_cfg.video.read_depth_buffer && is_depth)) { - get_resolve_target(); + // TODO: Decide what to do when memory loads are disabled but the underlying has memory changed + // NOTE: Assume test() is expensive when in a pinch + if (last_use_tag && state_flags == rsx::surface_state_flags::ready && !test()) + { + // TODO: Figure out why merely returning and failing the test does not work when reading (TLoU) + // The result should have been the same either way + state_flags |= rsx::surface_state_flags::erase_bkgnd; + } } if (LIKELY(old_contents.empty())) { if (state_flags & rsx::surface_state_flags::erase_bkgnd) { - clear_surface_impl(this); + // NOTE: This step CAN introduce MSAA flags! + initialize_memory(cmd, read_access); - if (resolve_surface && read_access) - { - // Only clear the resolve surface if reading from it, otherwise it's a waste - clear_surface_impl(resolve_surface.get()); - } - - on_write(rsx::get_shared_tag(), rsx::surface_state_flags::ready); + verify(HERE), state_flags == rsx::surface_state_flags::ready; + on_write(rsx::get_shared_tag(), static_cast(msaa_flags)); } - else if (msaa_flags & rsx::surface_state_flags::require_resolve) + + if (msaa_flags & rsx::surface_state_flags::require_resolve) { if (read_access) { // Only do this step when read access is required + get_resolve_target_safe(cmd); resolve(cmd); } } @@ -308,6 +427,7 @@ namespace vk if (!read_access) { // Only do this step when it is needed to start rendering + verify(HERE), resolve_surface; unresolve(cmd); } } @@ -316,11 +436,13 @@ namespace vk } // Memory transfers - vk::image *target_image = (samples() > 1) ? get_resolve_target() : this; + vk::image *target_image = (samples() > 1) ? get_resolve_target_safe(cmd) : this; vk::blitter hw_blitter; - bool optimize_copy = true; const auto dst_bpp = get_bpp(); + unsigned first = prepare_rw_barrier_for_transfer(this); + bool optimize_copy = true; + bool any_valid_writes = false; for (auto i = first; i < old_contents.size(); ++i) { @@ -328,10 +450,19 @@ namespace vk auto src_texture = static_cast(section.source); src_texture->read_barrier(cmd); + if (LIKELY(src_texture->test())) + { + any_valid_writes = true; + } + else + { + continue; + } + const auto src_bpp = src_texture->get_bpp(); rsx::typeless_xfer typeless_info{}; - if (src_texture->info.format == info.format) + if (LIKELY(src_texture->info.format == info.format)) { verify(HERE), src_bpp == dst_bpp; } @@ -343,7 +474,7 @@ namespace vk typeless_info.src_is_typeless = true; typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; typeless_info.src_native_format_override = (u32)info.format; - typeless_info.src_is_depth = src_texture->is_depth_surface(); + typeless_info.src_is_depth = is_depth; typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp; } } @@ -371,12 +502,12 @@ namespace vk } else if (state_flags & rsx::surface_state_flags::erase_bkgnd) { - clear_surface_impl(target_image); - - state_flags &= ~(rsx::surface_state_flags::erase_bkgnd); - msaa_flags = rsx::surface_state_flags::ready; + // Might introduce MSAA flags + initialize_memory(cmd, false); + verify(HERE), state_flags == rsx::surface_state_flags::ready; } - else if (msaa_flags & rsx::surface_state_flags::require_resolve) + + if (msaa_flags & rsx::surface_state_flags::require_resolve) { // Need to forward resolve this resolve(cmd); @@ -393,6 +524,24 @@ namespace vk optimize_copy = optimize_copy && !memory_load; } + if (UNLIKELY(!any_valid_writes)) + { + LOG_WARNING(RSX, "Surface at 0x%x inherited stale references", base_addr); + + clear_rw_barrier(); + shuffle_tag(); + + if (!read_access) + { + // This will be modified either way + state_flags |= rsx::surface_state_flags::erase_bkgnd; + memory_barrier(cmd, access); + } + + return; + } + + // NOTE: Optimize flag relates to stencil resolve/unresolve for NVIDIA. on_write_copy(0, optimize_copy); if (!read_access && samples() > 1) @@ -592,8 +741,6 @@ namespace rsx prev.target = sink.get(); sink->rsx_pitch = ref->get_rsx_pitch(); - sink->sync_tag(); - if (!sink->old_contents.empty()) { // Deal with this, likely only needs to clear diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index a4bfcd223916..87ab340facc7 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -510,7 +510,7 @@ namespace vk void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - VkImageAspectFlags flags, vk::data_heap &upload_heap) + VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align) { u32 mipmap_level = 0; u32 block_in_pixel = get_format_block_size_in_texel(format); @@ -518,7 +518,8 @@ namespace vk for (const rsx_subresource_layout &layout : subresource_layout) { - u32 row_pitch = align(layout.width_in_block * block_size_in_bytes, 256); + u32 row_pitch = (((layout.width_in_block * block_size_in_bytes) + heap_align - 1) / heap_align) * heap_align; + if (heap_align != 256) verify(HERE), row_pitch == heap_align; u32 image_linear_size = row_pitch * layout.height_in_block * layout.depth; //Map with extra padding bytes in case of realignment @@ -527,7 +528,7 @@ namespace vk VkBuffer buffer_handle = upload_heap.heap->value; gsl::span mapped{ (gsl::byte*)mapped_buffer, ::narrow(image_linear_size) }; - upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256); + upload_texture_subresource(mapped, layout, format, is_swizzled, false, heap_align); upload_heap.unmap(); VkBufferImageCopy copy_info = {};