diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 7c8efe110054..eddfd8d29ffc 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -167,7 +167,7 @@ namespace current_subresource_layout.depth = depth; // src_pitch in texture can uses 20 bits so fits on 32 bits int. u32 src_pitch_in_block = padded_row ? suggested_pitch_in_bytes / block_size_in_bytes : miplevel_width_in_block; - current_subresource_layout.pitch_in_bytes = src_pitch_in_block; + current_subresource_layout.pitch_in_block = src_pitch_in_block; current_subresource_layout.data = gsl::span(texture_data_pointer + offset_in_src, src_pitch_in_block * block_size_in_bytes * miplevel_height_in_block * depth); @@ -277,7 +277,7 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre u16 w = src_layout.width_in_block; u16 h = src_layout.height_in_block; u16 depth = src_layout.depth; - u32 pitch = src_layout.pitch_in_bytes; + u32 pitch = src_layout.pitch_in_block; // Ignore when texture width > pitch if (w > pitch) @@ -290,7 +290,7 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre if (is_swizzled) copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } @@ -312,34 +312,42 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre if (is_swizzled) copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } - case CELL_GCM_TEXTURE_DEPTH24_D8: // Untested + case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // Untested + { + if (is_swizzled) + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); + else + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); + break; + } + case CELL_GCM_TEXTURE_A8R8G8B8: case CELL_GCM_TEXTURE_D8R8G8B8: { if (is_swizzled) copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } case CELL_GCM_TEXTURE_Y16_X16: case CELL_GCM_TEXTURE_Y16_X16_FLOAT: case CELL_GCM_TEXTURE_X32_FLOAT: - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; case CELL_GCM_TEXTURE_COMPRESSED_DXT1: @@ -348,11 +356,11 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } else { - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } break; @@ -363,11 +371,11 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } else { - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } break; diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index cf48823b75dd..704f544473af 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -38,7 +38,7 @@ struct rsx_subresource_layout u16 width_in_block; u16 height_in_block; u16 depth; - u32 pitch_in_bytes; + u32 pitch_in_block; }; /** diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 3659cb3dc59b..aa1165ceeb19 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -849,7 +849,7 @@ namespace rsx for (auto &tex_info : data) { auto this_address = std::get<0>(tex_info); - if (this_address > limit) + if (this_address >= limit) continue; auto surface = std::get<1>(tex_info).get(); diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index a490a0df2747..bb5298d7034f 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -29,6 +29,34 @@ namespace rsx flush_once = 1 }; + struct typeless_xfer + { + bool src_is_typeless = false; + bool dst_is_typeless = false; + bool src_is_depth = false; + bool dst_is_depth = false; + u32 src_gcm_format = 0; + u32 dst_gcm_format = 0; + f32 src_scaling_hint = 1.f; + f32 dst_scaling_hint = 1.f; + + void analyse() + { + if (src_is_typeless && dst_is_typeless) + { + if (src_scaling_hint == dst_scaling_hint && + src_scaling_hint != 1.f) + { + if (src_is_depth == dst_is_depth) + { + src_is_typeless = dst_is_typeless = false; + src_scaling_hint = dst_scaling_hint = 1.f; + } + } + } + } + }; + struct cached_texture_section : public rsx::buffered_section { u16 width; @@ -76,7 +104,7 @@ namespace rsx if (depth && depth != this->depth) return false; - if (mipmaps && mipmaps != this->mipmaps) + if (mipmaps && mipmaps > this->mipmaps) return false; return true; @@ -882,6 +910,7 @@ namespace rsx } section_storage_type tmp; + update_cache_tag(); m_cache[block_address].add(tmp, rsx_address, rsx_size); return m_cache[block_address].data.back(); } @@ -933,10 +962,44 @@ namespace rsx region.set_context(texture_upload_context::framebuffer_storage); region.set_sampler_status(rsx::texture_sampler_status::status_uninitialized); region.set_image_type(rsx::texture_dimension_extended::texture_dimension_2d); - update_cache_tag(); - region.set_memory_read_flags(memory_read_flags::flush_always); + m_flush_always_cache[memory_address] = memory_size; + + //Test for invalidated sections from surface cache occupying same address range + const auto &overlapped = find_texture_from_range(memory_address, memory_size); + if (overlapped.size() > 1) + { + for (auto surface : overlapped) + { + if (surface == ®ion) + continue; + + if (surface->get_context() != rsx::texture_upload_context::framebuffer_storage) + { + m_unreleased_texture_objects++; + } + else + { + if (surface->get_section_base() != memory_address) + //HACK: preserve other overlapped sections despite overlap unless new section is superseding + //TODO: write memory to cell or redesign sections to preserve the data + continue; + } + + //Memory is shared with another surface + //Discard it - the backend should ensure memory contents are preserved if needed + surface->set_dirty(true); + + if (surface->is_locked()) + { + surface->unprotect(); + m_cache[get_block_address(surface->get_section_base())].remove_one(); + } + } + } + + update_cache_tag(); } void set_memory_read_flags(u32 memory_address, u32 memory_size, memory_read_flags flags) @@ -1717,7 +1780,8 @@ namespace rsx { for (const auto &surface : overlapping_surfaces) { - if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst) + if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst || + !surface->is_locked()) continue; if (surface->get_width() >= tex_width && surface->get_height() >= tex_height) @@ -1786,25 +1850,28 @@ namespace rsx { //Since we will have dst in vram, we can 'safely' ignore the swizzle flag //TODO: Verify correct behavior - bool is_depth_blit = false; bool src_is_render_target = false; bool dst_is_render_target = false; bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8); bool src_is_argb8 = (src.format == rsx::blit_engine::transfer_source_format::a8r8g8b8); + typeless_xfer typeless_info = {}; image_resource_type vram_texture = 0; image_resource_type dest_texture = 0; const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0)); const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0)); - float scale_x = dst.scale_x; - float scale_y = dst.scale_y; + f32 scale_x = dst.scale_x; + f32 scale_y = dst.scale_y; //Offset in x and y for src is 0 (it is already accounted for when getting pixels_src) //Reproject final clip onto source... - const u16 src_w = (const u16)((f32)dst.clip_width / scale_x); - const u16 src_h = (const u16)((f32)dst.clip_height / scale_y); + u16 src_w = (u16)((f32)dst.clip_width / scale_x); + u16 src_h = (u16)((f32)dst.clip_height / scale_y); + + u16 dst_w = dst.clip_width; + u16 dst_h = dst.clip_height; //Check if src/dst are parts of render targets auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, false, false); @@ -1848,6 +1915,51 @@ namespace rsx if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target)) return false; + if (src_is_render_target) + { + const auto surf = src_subres.surface; + auto src_bpp = surf->get_native_pitch() / surf->get_surface_width(); + auto expected_bpp = src_is_argb8 ? 4 : 2; + if (src_bpp != expected_bpp) + { + //Enable type scaling in src + typeless_info.src_is_typeless = true; + typeless_info.src_is_depth = src_subres.is_depth_surface; + typeless_info.src_scaling_hint = (f32)src_bpp / expected_bpp; + typeless_info.src_gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; + + src_w = (u16)(src_w / typeless_info.src_scaling_hint); + if (!src_subres.is_clipped) + src_subres.w = (u16)(src_subres.w / typeless_info.src_scaling_hint); + else + src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src_w, src_h, src.pitch, true, false, false); + + verify(HERE), src_subres.surface != nullptr; + } + } + + if (dst_is_render_target) + { + auto dst_bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width(); + auto expected_bpp = dst_is_argb8 ? 4 : 2; + if (dst_bpp != expected_bpp) + { + //Enable type scaling in dst + typeless_info.dst_is_typeless = true; + typeless_info.dst_is_depth = dst_subres.is_depth_surface; + typeless_info.dst_scaling_hint = (f32)dst_bpp / expected_bpp; + typeless_info.dst_gcm_format = dst_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; + + dst_w = (u16)(dst_w / typeless_info.dst_scaling_hint); + if (!dst_subres.is_clipped) + dst_subres.w = (u16)(dst_subres.w / typeless_info.dst_scaling_hint); + else + dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst_w, dst_h, dst.pitch, true, false, false); + + verify(HERE), dst_subres.surface != nullptr; + } + } + reader_lock lock(m_cache_mutex); //Check if trivial memcpy can perform the same task @@ -1871,7 +1983,7 @@ namespace rsx u16 max_dst_width = dst.width; u16 max_dst_height = dst.height; areai src_area = { 0, 0, src_w, src_h }; - areai dst_area = { 0, 0, dst.clip_width, dst.clip_height }; + areai dst_area = { 0, 0, dst_w, dst_h }; //1024 height is a hack (for ~720p buffers) //It is possible to have a large buffer that goes up to around 4kx4k but anything above 1280x720 is rare @@ -1949,7 +2061,7 @@ namespace rsx dest_texture = dst_subres.surface->get_surface(); - max_dst_width = dst_subres.surface->get_surface_width(); + max_dst_width = (u16)(dst_subres.surface->get_surface_width() * typeless_info.dst_scaling_hint); max_dst_height = dst_subres.surface->get_surface_height(); } @@ -2000,7 +2112,7 @@ namespace rsx rsx_subresource_layout subres = {}; subres.width_in_block = src.width; subres.height_in_block = src.slice_h; - subres.pitch_in_bytes = pitch_in_block; + subres.pitch_in_block = pitch_in_block; subres.depth = 1; subres.data = { (const gsl::byte*)src.pixels, src.pitch * src.slice_h }; subresource_layout.push_back(subres); @@ -2020,7 +2132,7 @@ namespace rsx u16 src_subres_h = src_subres.h; get_rsx_dimensions(src_subres_w, src_subres_h, src_subres.surface); - const int dst_width = (int)(src_subres_w * scale_x); + const int dst_width = (int)(src_subres_w * scale_x * typeless_info.src_scaling_hint); const int dst_height = (int)(src_subres_h * scale_y); dst_area.x2 = dst_area.x1 + dst_width; @@ -2038,34 +2150,55 @@ namespace rsx vram_texture = src_subres.surface->get_surface(); } - bool format_mismatch = false; + const bool src_is_depth = src_subres.is_depth_surface; + const bool dst_is_depth = dst_is_render_target? dst_subres.is_depth_surface : + dest_texture ? cached_dest->is_depth_texture() : src_is_depth; + + //Type of blit decided by the source, destination use should adapt on the fly + const bool is_depth_blit = src_is_depth; - if (src_subres.is_depth_surface) + bool format_mismatch = (src_is_depth != dst_is_depth); + if (format_mismatch) { - if (dest_texture) + if (dst_is_render_target) { - if (dst_is_render_target) - { - if (!dst_subres.is_depth_surface) - { - LOG_ERROR(RSX, "Depth->RGBA blit requested but not supported"); - return true; - } - } - else - { - if (!cached_dest->has_compatible_format(src_subres.surface)) - format_mismatch = true; - } + LOG_ERROR(RSX, "Depth<->RGBA blit on a framebuffer requested but not supported"); + return false; + } + } + else if (src_is_render_target && cached_dest) + { + switch (cached_dest->get_gcm_format()) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_DEPTH24_D8: + format_mismatch = !dst_is_argb8; + break; + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_DEPTH16: + format_mismatch = dst_is_argb8; + break; + default: + format_mismatch = true; + break; } - - is_depth_blit = true; } //TODO: Check for other types of format mismatch if (format_mismatch) { lock.upgrade(); + + //Mark for removal as the memory is not reusable now + if (cached_dest->is_locked()) + { + cached_dest->unprotect(); + m_cache[get_block_address(cached_dest->get_section_base())].remove_one(); + } + + cached_dest->set_dirty(true); + m_unreleased_texture_objects++; + invalidate_range_impl_base(cached_dest->get_section_base(), cached_dest->get_section_size(), true, false, false, true, std::forward(extras)...); dest_texture = 0; @@ -2100,7 +2233,7 @@ namespace rsx //Reproject clip offsets onto source to simplify blit if (dst.clip_x || dst.clip_y) { - const u16 scaled_clip_offset_x = (const u16)((f32)dst.clip_x / scale_x); + const u16 scaled_clip_offset_x = (const u16)((f32)dst.clip_x / (scale_x * typeless_info.src_scaling_hint)); const u16 scaled_clip_offset_y = (const u16)((f32)dst.clip_y / scale_y); src_area.x1 += scaled_clip_offset_x; @@ -2143,14 +2276,42 @@ namespace rsx cached_dest->touch(); } - const f32 scale = rsx::get_resolution_scale(); - if (src_is_render_target) - src_area = src_area * scale; + if (rsx::get_resolution_scale_percent() != 100) + { + const f32 resolution_scale = rsx::get_resolution_scale(); + if (src_is_render_target) + { + if (src_subres.surface->get_surface_width() > g_cfg.video.min_scalable_dimension) + { + src_area.x1 = (u16)(src_area.x1 * resolution_scale); + src_area.x2 = (u16)(src_area.x2 * resolution_scale); + } - if (dst_is_render_target) - dst_area = dst_area * scale; + if (src_subres.surface->get_surface_height() > g_cfg.video.min_scalable_dimension) + { + src_area.y1 = (u16)(src_area.y1 * resolution_scale); + src_area.y2 = (u16)(src_area.y2 * resolution_scale); + } + } + + if (dst_is_render_target) + { + if (dst_subres.surface->get_surface_width() > g_cfg.video.min_scalable_dimension) + { + dst_area.x1 = (u16)(dst_area.x1 * resolution_scale); + dst_area.x2 = (u16)(dst_area.x2 * resolution_scale); + } + + if (dst_subres.surface->get_surface_height() > g_cfg.video.min_scalable_dimension) + { + dst_area.y1 = (u16)(dst_area.y1 * resolution_scale); + dst_area.y2 = (u16)(dst_area.y2 * resolution_scale); + } + } + } - blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit); + typeless_info.analyse(); + blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info); notify_surface_changed(dst.rsx_address); blit_op_result result = true; @@ -2177,18 +2338,20 @@ namespace rsx if (m_cache_update_tag.load(std::memory_order_consume) != m_flush_always_update_timestamp) { writer_lock lock(m_cache_mutex); + bool update_tag = false; for (const auto &It : m_flush_always_cache) { auto& section = find_cached_texture(It.first, It.second); if (section.get_protection() != utils::protection::no) { - auto &range = m_cache[get_block_address(It.first)]; + //NOTE: find_cached_texture will increment block ctr section.reprotect(utils::protection::no); - range.notify(); + update_tag = true; } } + if (update_tag) update_cache_tag(); m_flush_always_update_timestamp = m_cache_update_tag.load(std::memory_order_consume); } } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index d0d2ef9cb78f..fb4fa39a2071 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -209,7 +209,7 @@ void GLGSRender::end() //If ds is not initialized clear it; it seems new depth textures should have depth cleared auto copy_rtt_contents = [](gl::render_target *surface) { - if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format()) + if (surface->get_internal_format() == surface->old_contents->get_internal_format()) { //Copy data from old contents onto this one //1. Clip a rectangular region defning the data @@ -259,7 +259,7 @@ void GLGSRender::end() if (clear_depth) { gl_state.depth_mask(GL_TRUE); - gl_state.clear_depth(1.0); + gl_state.clear_depth(1.f); gl_state.clear_stencil(255); mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } @@ -284,7 +284,7 @@ void GLGSRender::end() } if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() && - ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8) + ds->old_contents->get_internal_format() == gl::texture::internal_format::rgba8) { m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id()); ds->old_contents = nullptr; @@ -401,6 +401,8 @@ void GLGSRender::end() //Bind textures and resolve external copy operations std::chrono::time_point textures_start = steady_clock::now(); int unused_location; + void *unused = nullptr; + gl::texture_view* tmp_view; for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { @@ -413,18 +415,18 @@ void GLGSRender::end() if (tex.enabled()) { - GLenum target = gl::get_target(sampler_state->image_type); if (sampler_state->image_handle) { - glBindTexture(target, sampler_state->image_handle); + sampler_state->image_handle->bind(); } - else if (sampler_state->external_subresource_desc.external_handle) + else if (sampler_state->external_subresource_desc.external_handle && + (tmp_view = m_gl_texture_cache.create_temporary_subresource(unused, sampler_state->external_subresource_desc))) { - void *unused = nullptr; - glBindTexture(target, m_gl_texture_cache.create_temporary_subresource(unused, sampler_state->external_subresource_desc)); + tmp_view->bind(); } else { + auto target = gl::get_target(sampler_state->image_type); glBindTexture(target, m_null_textures[target]->id()); } } @@ -447,12 +449,12 @@ void GLGSRender::end() if (sampler_state->image_handle) { - glBindTexture(GL_TEXTURE_2D, sampler_state->image_handle); + sampler_state->image_handle->bind(); } else if (sampler_state->external_subresource_desc.external_handle) { void *unused = nullptr; - glBindTexture(GL_TEXTURE_2D, m_gl_texture_cache.create_temporary_subresource(unused, sampler_state->external_subresource_desc)); + m_gl_texture_cache.create_temporary_subresource(unused, sampler_state->external_subresource_desc)->bind(); } else { @@ -660,20 +662,16 @@ void GLGSRender::on_init_thread() //Array stream buffer { - auto &tex = m_gl_persistent_stream_buffer; - tex.create(); - tex.set_target(gl::texture::target::textureBuffer); + m_gl_persistent_stream_buffer = std::make_unique(GL_TEXTURE_BUFFER, 0, 0, 0, 0, GL_R8UI); glActiveTexture(GL_TEXTURE0 + texture_index_offset); - tex.bind(); + glBindTexture(GL_TEXTURE_BUFFER, m_gl_persistent_stream_buffer->id()); } //Register stream buffer { - auto &tex = m_gl_volatile_stream_buffer; - tex.create(); - tex.set_target(gl::texture::target::textureBuffer); + m_gl_volatile_stream_buffer = std::make_unique(GL_TEXTURE_BUFFER, 0, 0, 0, 0, GL_R8UI); glActiveTexture(GL_TEXTURE0 + texture_index_offset + 1); - tex.bind(); + glBindTexture(GL_TEXTURE_BUFFER, m_gl_volatile_stream_buffer->id()); } //Fallback null texture instead of relying on texture0 @@ -681,28 +679,20 @@ void GLGSRender::on_init_thread() std::vector pixeldata = {0, 0, 0, 0}; //1D - auto tex1D = std::make_unique(); - tex1D->create(); - tex1D->set_target(gl::texture::target::texture1D); - tex1D->config().width(1).min_lod(0.f).max_lod(0.f).pixels(pixeldata.data()).apply(); + auto tex1D = std::make_unique(GL_TEXTURE_1D, 1, 1, 1, 1, GL_RGBA8); + tex1D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); //2D - auto tex2D = std::make_unique(); - tex2D->create(); - tex2D->set_target(gl::texture::target::texture2D); - tex2D->config().width(1).height(1).min_lod(0.f).max_lod(0.f).pixels(pixeldata.data()).apply(); + auto tex2D = std::make_unique(GL_TEXTURE_2D, 1, 1, 1, 1, GL_RGBA8); + tex2D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); //3D - auto tex3D = std::make_unique(); - tex3D->create(); - tex3D->set_target(gl::texture::target::texture3D); - tex3D->config().width(1).height(1).depth(1).min_lod(0.f).max_lod(0.f).pixels(pixeldata.data()).apply(); + auto tex3D = std::make_unique(GL_TEXTURE_3D, 1, 1, 1, 1, GL_RGBA8); + tex3D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); //CUBE - auto texCUBE = std::make_unique(); - texCUBE->create(); - texCUBE->set_target(gl::texture::target::textureCUBE); - texCUBE->config().width(1).height(1).depth(1).min_lod(0.f).max_lod(0.f).pixels(pixeldata.data()).apply(); + auto texCUBE = std::make_unique(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, GL_RGBA8); + texCUBE->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); m_null_textures[GL_TEXTURE_1D] = std::move(tex1D); m_null_textures[GL_TEXTURE_2D] = std::move(tex2D); @@ -745,8 +735,8 @@ void GLGSRender::on_init_thread() m_persistent_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size)); m_volatile_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size)); - m_gl_persistent_stream_buffer.copy_from(m_persistent_stream_view); - m_gl_volatile_stream_buffer.copy_from(m_volatile_stream_view); + m_gl_persistent_stream_buffer->copy_from(m_persistent_stream_view); + m_gl_volatile_stream_buffer->copy_from(m_volatile_stream_view); m_vao.element_array_buffer = *m_index_ring_buffer; @@ -871,7 +861,7 @@ void GLGSRender::on_exit() if (m_flip_tex_color) { - m_flip_tex_color.remove(); + m_flip_tex_color.reset(); } if (m_vao) @@ -879,19 +869,14 @@ void GLGSRender::on_exit() m_vao.remove(); } - m_gl_persistent_stream_buffer.remove(); - m_gl_volatile_stream_buffer.remove(); + m_gl_persistent_stream_buffer.reset(); + m_gl_volatile_stream_buffer.reset(); for (auto &sampler : m_gl_sampler_states) { sampler.remove(); } - for (auto &tex : m_null_textures) - { - tex.second->remove(); - } - if (m_attrib_ring_buffer) { m_attrib_ring_buffer->remove(); @@ -917,6 +902,7 @@ void GLGSRender::on_exit() m_index_ring_buffer->remove(); } + m_null_textures.clear(); m_text_printer.close(); m_gl_texture_cache.destroy(); m_depth_converter.destroy(); @@ -976,25 +962,45 @@ void GLGSRender::clear_surface(u32 arg) mask |= GLenum(gl::buffers::stencil); } - if (arg & 0xf0) + if (auto colormask = (arg & 0xf0)) { - u8 clear_a = rsx::method_registers.clear_color_a(); - u8 clear_r = rsx::method_registers.clear_color_r(); - u8 clear_g = rsx::method_registers.clear_color_g(); - u8 clear_b = rsx::method_registers.clear_color_b(); + switch (rsx::method_registers.surface_color()) + { + case rsx::surface_color_format::x32: + case rsx::surface_color_format::w16z16y16x16: + case rsx::surface_color_format::w32z32y32x32: + { + //Nop + break; + } + case rsx::surface_color_format::g8b8: + { + colormask = rsx::get_g8b8_r8g8_colormask(colormask); + // Fall through + } + default: + { + u8 clear_a = rsx::method_registers.clear_color_a(); + u8 clear_r = rsx::method_registers.clear_color_r(); + u8 clear_g = rsx::method_registers.clear_color_g(); + u8 clear_b = rsx::method_registers.clear_color_b(); - gl_state.color_mask(arg & 0xf0); - gl_state.clear_color(clear_r, clear_g, clear_b, clear_a); + gl_state.color_mask(colormask); + gl_state.clear_color(clear_r, clear_g, clear_b, clear_a); - mask |= GLenum(gl::buffers::color); + mask |= GLenum(gl::buffers::color); - for (auto &rtt : m_rtts.m_bound_render_targets) - { - if (std::get<0>(rtt) != 0) + for (auto &rtt : m_rtts.m_bound_render_targets) { - std::get<1>(rtt)->set_cleared(true); - std::get<1>(rtt)->old_contents = nullptr; + if (std::get<0>(rtt) != 0) + { + std::get<1>(rtt)->set_cleared(true); + std::get<1>(rtt)->old_contents = nullptr; + } } + + break; + } } } @@ -1153,6 +1159,12 @@ void GLGSRender::update_draw_state() bool color_mask_r = rsx::method_registers.color_mask_r(); bool color_mask_a = rsx::method_registers.color_mask_a(); + if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8) + { + //Map GB components onto RG + rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); + } + gl_state.color_mask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); gl_state.stencil_mask(rsx::method_registers.stencil_mask()); @@ -1306,7 +1318,7 @@ void GLGSRender::flip(int buffer) u32 buffer_height = display_buffers[buffer].height; u32 buffer_pitch = display_buffers[buffer].pitch; - if ((u32)buffer < display_buffers_count && buffer_width && buffer_height) + if ((u32)buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch) { // Calculate blit coordinates coordi aspect_ratio; @@ -1344,49 +1356,39 @@ void GLGSRender::flip(int buffer) buffer_width = render_target_texture->width(); buffer_height = render_target_texture->height(); - image = render_target_texture->get_view(); + image = render_target_texture->raw_handle(); } else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address)) { //Hack - this should be the first location to check for output //The render might have been done offscreen or in software and a blit used to display - image = surface->get_raw_view(); - - //Reset color swizzle - glBindTexture(GL_TEXTURE_2D, image); - const GLenum rgba_shuffle[] = { GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA }; - glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, (GLint*)rgba_shuffle); - surface->set_sampler_status(rsx::texture_sampler_status::status_uninitialized); + image = surface->get_raw_texture()->id(); } else { LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU"); if (!buffer_pitch) buffer_pitch = buffer_width * 4; - if (!m_flip_tex_color || m_flip_tex_color.size() != sizei{ (int)buffer_width, (int)buffer_height }) - { - m_flip_tex_color.recreate(gl::texture::target::texture2D); + gl::pixel_unpack_settings unpack_settings; + unpack_settings.aligment(1).row_length(buffer_pitch / 4); - m_flip_tex_color.config() - .size({ (int)buffer_width, (int)buffer_height }) - .type(gl::texture::type::uint_8_8_8_8) - .format(gl::texture::format::bgra); - - m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4); + if (!m_flip_tex_color || m_flip_tex_color->size2D() != sizei{ (int)buffer_width, (int)buffer_height }) + { + m_flip_tex_color.reset(new gl::texture(GL_TEXTURE_2D, buffer_width, buffer_height, 1, 1, GL_RGBA8)); } if (buffer_region.tile) { std::unique_ptr temp(new u8[buffer_height * buffer_pitch]); buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch); - m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8); + m_flip_tex_color->copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8, unpack_settings); } else { - m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8); + m_flip_tex_color->copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8, unpack_settings); } - image = m_flip_tex_color.id(); + image = m_flip_tex_color->id(); } areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 4eec4eb81aeb..7d00c75511d4 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -158,7 +158,7 @@ struct driver_state { if (!test_property(GL_COLOR_WRITEMASK, mask)) { - glColorMask(((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0), ((mask & 0x10) ? 1 : 0)); + glColorMask(((mask & 0x10) ? 1 : 0), ((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0)); properties[GL_COLOR_WRITEMASK] = mask; } } @@ -166,10 +166,10 @@ struct driver_state void color_mask(bool r, bool g, bool b, bool a) { u32 mask = 0; - if (r) mask |= 0x20; - if (g) mask |= 0x40; - if (b) mask |= 0x80; - if (a) mask |= 0x10; + if (r) mask |= 0x10; + if (g) mask |= 0x20; + if (b) mask |= 0x40; + if (a) mask |= 0x80; color_mask(mask); } @@ -281,8 +281,8 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control gl::buffer_view m_persistent_stream_view; gl::buffer_view m_volatile_stream_view; - gl::texture m_gl_persistent_stream_buffer; - gl::texture m_gl_volatile_stream_buffer; + std::unique_ptr m_gl_persistent_stream_buffer; + std::unique_ptr m_gl_volatile_stream_buffer; std::unique_ptr m_attrib_ring_buffer; std::unique_ptr m_fragment_constants_buffer; @@ -323,7 +323,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control //buffer gl::fbo draw_fbo; gl::fbo m_flip_fbo; - gl::texture m_flip_tex_color; + std::unique_ptr m_flip_tex_color; //vaos are mandatory for core profile gl::vao m_vao; diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.cpp b/rpcs3/Emu/RSX/GL/GLHelpers.cpp index 588aee149274..31ca4a8ce56c 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.cpp +++ b/rpcs3/Emu/RSX/GL/GLHelpers.cpp @@ -306,279 +306,6 @@ namespace gl return m_size; } - void texture::settings::apply(const texture &texture) const - { - save_binding_state save(texture); - - texture.pixel_unpack_settings().apply(); - - if (compressed_format(m_internal_format)) - { - int compressed_image_size = m_compressed_image_size; - if (!compressed_image_size) - { - switch (m_internal_format) - { - case texture::internal_format::compressed_rgb_s3tc_dxt1: - compressed_image_size = ((m_width + 2) / 3) * ((m_height + 2) / 3) * 6; - break; - - case texture::internal_format::compressed_rgba_s3tc_dxt1: - compressed_image_size = ((m_width + 3) / 4) * ((m_height + 3) / 4) * 8; - break; - - case texture::internal_format::compressed_rgba_s3tc_dxt3: - case texture::internal_format::compressed_rgba_s3tc_dxt5: - compressed_image_size = ((m_width + 3) / 4) * ((m_height + 3) / 4) * 16; - break; - default: - fmt::throw_exception("Tried to load unimplemented internal_format type." HERE); - break; - } - } - - if (m_parent->get_target() != gl::texture::target::texture2D) - fmt::throw_exception("Mutable compressed texture of non-2D type is unimplemented" HERE); - - glCompressedTexImage2D((GLenum)m_parent->get_target(), m_level, (GLint)m_internal_format, m_width, m_height, 0, compressed_image_size, m_pixels); - } - else - { - switch ((GLenum)m_parent->get_target()) - { - case GL_TEXTURE_1D: - { - glTexImage1D(GL_TEXTURE_1D, m_level, (GLint)m_internal_format, m_width, 0, (GLint)m_format, (GLint)m_type, m_pixels); - break; - } - case GL_TEXTURE_2D: - { - glTexImage2D(GL_TEXTURE_2D, m_level, (GLint)m_internal_format, m_width, m_height, 0, (GLint)m_format, (GLint)m_type, m_pixels); - break; - } - case GL_TEXTURE_3D: - { - glTexImage3D(GL_TEXTURE_3D, m_level, (GLint)m_internal_format, m_width, m_height, m_depth, 0, (GLint)m_format, (GLint)m_type, m_pixels); - break; - } - case GL_TEXTURE_CUBE_MAP: - { - for (int face = 0; face < 6; ++face) - glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, m_level, (GLint)m_internal_format, m_width, m_height, 0, (GLint)m_format, (GLint)m_type, m_pixels); - break; - } - } - } - - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_MAX_LEVEL, m_max_level); - - if (m_pixels && m_generate_mipmap) - { - glGenerateMipmap((GLenum)m_parent->get_target()); - } - - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_WRAP_S, (GLint)m_wrap_s); - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_WRAP_T, (GLint)m_wrap_t); - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_WRAP_R, (GLint)m_wrap_r); - - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_COMPARE_MODE, (GLint)m_compare_mode); - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_COMPARE_FUNC, (GLint)m_compare_func); - - glTexParameterf((GLenum)m_parent->get_target(), GL_TEXTURE_MIN_LOD, m_max_lod); - glTexParameterf((GLenum)m_parent->get_target(), GL_TEXTURE_MAX_LOD, m_min_lod); - glTexParameterf((GLenum)m_parent->get_target(), GL_TEXTURE_LOD_BIAS, m_lod); - - glTexParameterfv((GLenum)m_parent->get_target(), GL_TEXTURE_BORDER_COLOR, m_border_color.rgba); - - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_MIN_FILTER, (GLint)m_min_filter); - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_MAG_FILTER, (GLint)m_mag_filter); - - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_SWIZZLE_R, (GLint)m_swizzle_r); - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_SWIZZLE_G, (GLint)m_swizzle_g); - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_SWIZZLE_B, (GLint)m_swizzle_b); - glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_SWIZZLE_A, (GLint)m_swizzle_a); - - glTexParameterf((GLenum)m_parent->get_target(), GL_TEXTURE_MAX_ANISOTROPY_EXT, m_aniso); - } - - void texture::settings::apply() - { - if (m_parent) - { - apply(*m_parent); - m_parent = nullptr; - } - } - - texture::settings& texture::settings::swizzle(texture::channel r, texture::channel g, texture::channel b, texture::channel a) - { - m_swizzle_r = r; - m_swizzle_g = g; - m_swizzle_b = b; - m_swizzle_a = a; - - return *this; - } - - texture::settings& texture::settings::format(texture::format format) - { - m_format = format; - return *this; - } - - texture::settings& texture::settings::type(texture::type type) - { - m_type = type; - return *this; - } - - texture::settings& texture::settings::internal_format(texture::internal_format format) - { - m_internal_format = format; - return *this; - } - - texture::settings& texture::settings::filter(min_filter min_filter, gl::filter mag_filter) - { - m_min_filter = min_filter; - m_mag_filter = mag_filter; - - return *this; - } - - texture::settings& texture::settings::width(uint width) - { - m_width = width; - return *this; - } - - texture::settings& texture::settings::height(uint height) - { - m_height = height; - return *this; - } - - texture::settings& texture::settings::depth(uint depth) - { - m_depth = depth; - return *this; - } - - texture::settings& texture::settings::size(sizei size) - { - return width(size.width).height(size.height); - } - - texture::settings& texture::settings::level(int value) - { - m_level = value; - return *this; - } - - texture::settings& texture::settings::compressed_image_size(int size) - { - m_compressed_image_size = size; - return *this; - } - - texture::settings& texture::settings::pixels(const void* pixels) - { - m_pixels = pixels; - return *this; - } - - texture::settings& texture::settings::aniso(float value) - { - m_aniso = value; - return *this; - } - - texture::settings& texture::settings::compare_mode(texture::compare_mode value) - { - m_compare_mode = value; - return *this; - } - texture::settings& texture::settings::compare_func(texture::compare_func value) - { - m_compare_func = value; - return *this; - } - texture::settings& texture::settings::compare(texture::compare_func func, texture::compare_mode mode) - { - return compare_func(func).compare_mode(mode); - } - - texture::settings& texture::settings::wrap_s(texture::wrap value) - { - m_wrap_s = value; - return *this; - } - texture::settings& texture::settings::wrap_t(texture::wrap value) - { - m_wrap_t = value; - return *this; - } - texture::settings& texture::settings::wrap_r(texture::wrap value) - { - m_wrap_r = value; - return *this; - } - texture::settings& texture::settings::wrap(texture::wrap s, texture::wrap t, texture::wrap r) - { - return wrap_s(s).wrap_t(t).wrap_r(r); - } - - texture::settings& texture::settings::max_lod(float value) - { - m_max_lod = value; - return *this; - } - texture::settings& texture::settings::min_lod(float value) - { - m_min_lod = value; - return *this; - } - texture::settings& texture::settings::lod(float value) - { - m_lod = value; - return *this; - } - texture::settings& texture::settings::max_level(int value) - { - m_max_level = value; - return *this; - } - texture::settings& texture::settings::generate_mipmap(bool value) - { - m_generate_mipmap = value; - return *this; - } - texture::settings& texture::settings::mipmap(int level, int max_level, float lod, float min_lod, float max_lod, bool generate) - { - return this->level(level).max_level(max_level).lod(lod).min_lod(min_lod).max_lod(max_lod).generate_mipmap(generate); - } - - texture::settings& texture::settings::border_color(color4f value) - { - m_border_color = value; - return *this; - } - - texture_view texture::with_level(int level) - { - return{ get_target(), id() }; - } - - texture::settings texture::config() - { - return{ this }; - } - - void texture::config(const settings& settings_) - { - settings_.apply(*this); - } - bool is_primitive_native(rsx::primitive_type in) { switch (in) diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 2c66ef4a5471..bae8dea9ee79 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -53,6 +53,13 @@ namespace gl #define __glcheck #endif + //Function call wrapped in ARB_DSA vs EXT_DSA compat check +#define DSA_CALL(func, texture_name, target, ...)\ + if (::gl::get_driver_caps().ARB_dsa_supported)\ + gl##func(texture_name, __VA_ARGS__);\ + else\ + gl##func##EXT(texture_name, target, __VA_ARGS__); + class capabilities; class blitter; @@ -730,24 +737,24 @@ namespace gl glGenBuffers(1, &m_id); } - void create(GLsizeiptr size, const void* data_ = nullptr) + void create(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW) { create(); - data(size, data_); + data(size, data_, usage); } - void create(target target_, GLsizeiptr size, const void* data_ = nullptr) + void create(target target_, GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW) { create(); m_target = target_; - data(size, data_); + data(size, data_, usage); } - void data(GLsizeiptr size, const void* data_ = nullptr) + void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW) { target target_ = current_target(); save_binding_state save(target_, *this); - glBufferData((GLenum)target_, size, data_, GL_STREAM_DRAW); + glBufferData((GLenum)target_, size, data_, usage); m_size = size; } @@ -874,7 +881,7 @@ namespace gl buffer::create(); GLbitfield buffer_storage_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; - if (get_driver_caps().vendor_MESA) buffer_storage_flags |= GL_CLIENT_STORAGE_BIT; + if (gl::get_driver_caps().vendor_MESA) buffer_storage_flags |= GL_CLIENT_STORAGE_BIT; glBindBuffer((GLenum)m_target, m_id); glBufferStorage((GLenum)m_target, size, data, buffer_storage_flags); @@ -1306,14 +1313,8 @@ namespace gl } }; - class texture_view; class texture { - GLuint m_id = 0; - GLuint m_level = 0; - class pixel_pack_settings m_pixel_pack_settings; - class pixel_unpack_settings m_pixel_unpack_settings; - public: enum class type { @@ -1462,52 +1463,119 @@ namespace gl depth = GL_TEXTURE_DEPTH_TYPE }; + private: + GLuint m_id = 0; + GLuint m_width = 0; + GLuint m_height = 0; + GLuint m_depth = 0; + GLuint m_mipmaps = 0; + + target m_target = target::texture2D; + internal_format m_internal_format = internal_format::rgba8; + std::array m_component_layout; + + private: class save_binding_state { - GLint m_last_binding; - GLenum m_target; + GLenum target = GL_NONE; + GLuint old_binding = GL_NONE; public: - save_binding_state(const texture& new_binding) noexcept + save_binding_state(GLenum target) { - GLenum pname; - switch (new_binding.get_target()) + this->target = target; + switch (target) { - case target::texture1D: pname = GL_TEXTURE_BINDING_1D; break; - case target::texture2D: pname = GL_TEXTURE_BINDING_2D; break; - case target::texture3D: pname = GL_TEXTURE_BINDING_3D; break; - case target::textureCUBE: pname = GL_TEXTURE_BINDING_CUBE_MAP; break; - case target::textureBuffer: pname = GL_TEXTURE_BINDING_BUFFER; break; - default: - fmt::throw_exception("Unknown target 0x%X" HERE, (u32)new_binding.get_target()); + case GL_TEXTURE_1D: + glGetIntegerv(GL_TEXTURE_BINDING_1D, (GLint*)&old_binding); + break; + case GL_TEXTURE_2D: + glGetIntegerv(GL_TEXTURE_BINDING_2D, (GLint*)&old_binding); + break; + case GL_TEXTURE_3D: + glGetIntegerv(GL_TEXTURE_BINDING_3D, (GLint*)&old_binding); + break; + case GL_TEXTURE_CUBE_MAP: + glGetIntegerv(GL_TEXTURE_BINDING_CUBE_MAP, (GLint*)&old_binding); + break; + case GL_TEXTURE_BUFFER: + glGetIntegerv(GL_TEXTURE_BINDING_BUFFER, (GLint*)&old_binding); + break; } - - glGetIntegerv(pname, &m_last_binding); - - new_binding.bind(); - m_target = (GLenum)new_binding.get_target(); } - ~save_binding_state() noexcept + ~save_binding_state() { - glBindTexture(m_target, m_last_binding); + glBindTexture(target, old_binding); } }; + public: + texture(const texture&) = delete; + texture(texture&& texture_) = delete; - class settings; + texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLenum sized_format) + { + save_binding_state save(target); + glGenTextures(1, &m_id); + glBindTexture(target, m_id); //Must bind to initialize the new texture - private: - target m_target = target::texture2D; + switch (target) + { + default: + fmt::throw_exception("Invalid image target 0x%X" HERE, target); + case GL_TEXTURE_1D: + glTexStorage1D(target, mipmaps, sized_format, width); + height = depth = 1; + break; + case GL_TEXTURE_2D: + case GL_TEXTURE_CUBE_MAP: + glTexStorage2D(target, mipmaps, sized_format, width, height); + depth = 1; + break; + case GL_TEXTURE_3D: + glTexStorage3D(target, mipmaps, sized_format, width, height, depth); + break; + case GL_TEXTURE_BUFFER: + break; + } + + if (target != GL_TEXTURE_BUFFER) + { + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(target, GL_TEXTURE_WRAP_R, GL_REPEAT); + glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1); + + m_width = width; + m_height = height; + m_depth = depth; + m_mipmaps = mipmaps; + } + + m_target = static_cast(target); + m_internal_format = static_cast(sized_format); + m_component_layout = { GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE }; + } - public: - target get_target() const noexcept + ~texture() { - return m_target; + glDeleteTextures(1, &m_id); } - void set_target(target target) noexcept + void set_native_component_layout(const std::array& layout) { - m_target = target; + m_component_layout[0] = layout[0]; + m_component_layout[1] = layout[1]; + m_component_layout[2] = layout[2]; + m_component_layout[3] = layout[3]; + } + + target get_target() const noexcept + { + return m_target; } static bool compressed_format(internal_format format_) noexcept @@ -1529,303 +1597,240 @@ namespace gl return m_id; } - uint level() const noexcept - { - return m_level; - } - - void recreate() noexcept - { - if (created()) - remove(); - - create(); - } - - void recreate(target target_) noexcept - { - if (created()) - remove(); - - create(target_); - } - - void create() noexcept - { - glGenTextures(1, &m_id); - } - - void create(target target_) noexcept - { - set_target(target_); - create(); - } - - bool created() const noexcept - { - return m_id != 0; - } - - void remove() noexcept - { - glDeleteTextures(1, &m_id); - m_id = 0; - } - - void set_id(GLuint id) noexcept + explicit operator bool() const noexcept { - m_id = id; + return (m_id != 0); } - void set_level(int level) noexcept + GLuint width() const { - m_level = level; + return m_width; } - texture_view with_level(int level); - - explicit operator bool() const noexcept + GLuint height() const { - return created(); + return m_height; } - void bind() const noexcept + GLuint depth() const { - glBindTexture((GLenum)get_target(), id()); + return m_depth; } - settings config(); - - void config(const settings& settings_); - - class pixel_pack_settings& pixel_pack_settings() + GLuint levels() const { - return m_pixel_pack_settings; + return m_mipmaps; } - const class pixel_pack_settings& pixel_pack_settings() const + sizei size2D() const { - return m_pixel_pack_settings; + return{ (int)m_width, (int)m_height }; } - class pixel_unpack_settings& pixel_unpack_settings() + texture::internal_format get_internal_format() const { - return m_pixel_unpack_settings; + return m_internal_format; } - const class pixel_unpack_settings& pixel_unpack_settings() const + std::array get_native_component_layout() const { - return m_pixel_unpack_settings; + return m_component_layout; } - int width() const + void copy_from(const void* src, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings) { - save_binding_state save(*this); - GLint result; - glGetTexLevelParameteriv((GLenum)get_target(), level(), GL_TEXTURE_WIDTH, &result); - return (int)result; - } + pixel_settings.apply(); - int height() const - { - save_binding_state save(*this); - GLint result; - glGetTexLevelParameteriv((GLenum)get_target(), level(), GL_TEXTURE_HEIGHT, &result); - return (int)result; + switch ((GLenum)m_target) + { + case GL_TEXTURE_1D: + { + DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, 0, 0, m_width, (GLenum)format, (GLenum)type, src); + break; + } + case GL_TEXTURE_2D: + { + DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, 0, 0, 0, m_width, m_height, (GLenum)format, (GLenum)type, src); + break; + } + case GL_TEXTURE_3D: + { + DSA_CALL(TextureSubImage3D, m_id, GL_TEXTURE_3D, 0, 0, 0, 0, m_width, m_height, m_depth, (GLenum)format, (GLenum)type, src); + break; + } + case GL_TEXTURE_CUBE_MAP: + { + if (::gl::get_driver_caps().ARB_dsa_supported) + { + glTextureSubImage3D(m_id, 0, 0, 0, 0, m_width, m_height, 6, (GLenum)format, (GLenum)type, src); + } + else + { + LOG_WARNING(RSX, "Cubemap upload via texture::copy_from is halfplemented!"); + u8* ptr = (u8*)src; + for (int face = 0; face < 6; ++face) + { + glTextureSubImage2DEXT(m_id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, 0, 0, 0, m_width, m_height, (GLenum)format, (GLenum)type, ptr); + ptr += (m_width * m_height * 4); //TODO + } + } + break; + } + } } - int depth() const + void copy_from(buffer &buf, u32 gl_format_type, u32 offset, u32 length) { - save_binding_state save(*this); - GLint result; - glGetTexLevelParameteriv((GLenum)get_target(), level(), GL_TEXTURE_DEPTH, &result); - return (int)result; - } + if (get_target() != target::textureBuffer) + fmt::throw_exception("OpenGL error: texture cannot copy from buffer" HERE); - sizei size() const - { - return{ width(), height() }; + DSA_CALL(TextureBufferRange, m_id, GL_TEXTURE_BUFFER, gl_format_type, buf.id(), offset, length); } - size3i size3d() const + void copy_from(buffer_view &view) { - return{ width(), height(), depth() }; + copy_from(*view.value(), view.format(), view.offset(), view.range()); } - texture::format get_internal_format() const + void copy_from(const buffer& buf, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings) { - save_binding_state save(*this); - GLint result; - glGetTexLevelParameteriv((GLenum)get_target(), level(), GL_TEXTURE_INTERNAL_FORMAT, &result); - return (texture::format)result; + buffer::save_binding_state save_buffer(buffer::target::pixel_unpack, buf); + copy_from(nullptr, format, type, pixel_settings); } - virtual texture::internal_format get_compatible_internal_format() const + void copy_from(void* src, texture::format format, texture::type type) { - return (texture::internal_format)get_internal_format(); + copy_from(src, format, type, pixel_unpack_settings()); } - texture::channel_type get_channel_type(texture::channel_name channel) const + void copy_from(const buffer& buf, texture::format format, texture::type type) { - save_binding_state save(*this); - GLint result; - glGetTexLevelParameteriv((GLenum)get_target(), level(), (GLenum)channel, &result); - return (texture::channel_type)result; + copy_from(buf, format, type, pixel_unpack_settings()); } - int get_channel_count() const + void copy_to(void* dst, texture::format format, texture::type type, class pixel_pack_settings pixel_settings) const { - int result = 0; - - if (get_channel_type(channel_name::red) != channel_type::none) - result++; - if (get_channel_type(channel_name::green) != channel_type::none) - result++; - if (get_channel_type(channel_name::blue) != channel_type::none) - result++; - if (get_channel_type(channel_name::alpha) != channel_type::none) - result++; - if (get_channel_type(channel_name::depth) != channel_type::none) - result++; - - return result; + pixel_settings.apply(); + if (gl::get_driver_caps().ARB_dsa_supported) + glGetTextureImage(m_id, 0, (GLenum)format, (GLenum)type, m_width * m_height * 16, dst); + else + glGetTextureImageEXT(m_id, (GLenum)m_target, 0, (GLenum)format, (GLenum)type, dst); } - bool compressed() const + void copy_to(const buffer& buf, texture::format format, texture::type type, class pixel_pack_settings pixel_settings) const { - save_binding_state save(*this); - GLint result; - glGetTexLevelParameteriv((GLenum)get_target(), level(), GL_TEXTURE_COMPRESSED, &result); - return result != 0; + buffer::save_binding_state save_buffer(buffer::target::pixel_pack, buf); + copy_to(nullptr, format, type, pixel_settings); } - int compressed_size() const + void copy_to(void* dst, texture::format format, texture::type type) const { - save_binding_state save(*this); - GLint result; - glGetTexLevelParameteriv((GLenum)get_target(), level(), GL_TEXTURE_COMPRESSED_IMAGE_SIZE, &result); - return (int)result; + copy_to(dst, format, type, pixel_pack_settings()); } - texture() = default; - texture(texture&) = delete; - - texture(texture&& texture_) - { - swap(texture_); - } - texture(target target_, GLuint id = 0) + void copy_to(const buffer& buf, texture::format format, texture::type type) const { - m_target = target_; - set_id(id); + copy_to(buf, format, type, pixel_pack_settings()); } + }; - ~texture() - { - if (created()) - remove(); - } + class texture_view + { + GLuint m_id = 0; + GLenum m_target = 0; + GLenum m_format = 0; + texture *m_image_data = nullptr; - void swap(texture& texture_) - { - auto my_old_id = id(); - auto my_old_target = get_target(); - set_id(texture_.id()); - set_target(texture_.get_target()); - texture_.set_id(my_old_id); - texture_.set_target(my_old_target); - } + GLenum component_swizzle[4]; - texture& operator = (const texture& rhs) = delete; - texture& operator = (texture&& rhs) + void create(texture* data, GLenum target, GLenum sized_format, const GLenum* argb_swizzle = nullptr) { - swap(rhs); - return *this; - } + m_target = target; + m_format = sized_format; + m_image_data = data; - void copy_from(const void* src, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings) - { - save_binding_state save(*this); - pixel_settings.apply(); - __glcheck glTexSubImage2D((GLenum)get_target(), level(), 0, 0, width(), height(), (GLenum)format, (GLenum)type, src); - } + const auto num_levels = data->levels(); + const auto num_layers = (target != GL_TEXTURE_CUBE_MAP) ? 1 : 6; - void copy_from(buffer &buf, u32 gl_format_type, u32 offset, u32 length) - { - if (get_target() != target::textureBuffer) - fmt::throw_exception("OpenGL error: texture cannot copy from buffer" HERE); + glGenTextures(1, &m_id); + glTextureView(m_id, target, data->id(), sized_format, 0, num_levels, 0, num_layers); - auto caps = get_driver_caps(); + if (argb_swizzle) + { + component_swizzle[0] = argb_swizzle[1]; + component_swizzle[1] = argb_swizzle[2]; + component_swizzle[2] = argb_swizzle[3]; + component_swizzle[3] = argb_swizzle[0]; - if (caps.EXT_dsa_supported) - __glcheck glTextureBufferRangeEXT(id(), (GLenum)target::textureBuffer, gl_format_type, buf.id(), offset, length); + glBindTexture(m_target, m_id); + glTexParameteriv(m_target, GL_TEXTURE_SWIZZLE_RGBA, (GLint*)component_swizzle); + } else - __glcheck glTextureBufferRange(id(), gl_format_type, buf.id(), offset, length); - } - - void copy_from(buffer_view &view) - { - copy_from(*view.value(), view.format(), view.offset(), view.range()); + { + component_swizzle[0] = GL_RED; + component_swizzle[1] = GL_GREEN; + component_swizzle[2] = GL_BLUE; + component_swizzle[3] = GL_ALPHA; + } } - void copy_from(const buffer& buf, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings) - { - buffer::save_binding_state save_buffer(buffer::target::pixel_unpack, buf); - copy_from(nullptr, format, type, pixel_settings); - } + public: + texture_view(const texture_view&) = delete; + texture_view(texture_view&&) = delete; - void copy_from(void* src, texture::format format, texture::type type) + texture_view(texture* data, GLenum target, GLenum sized_format, const GLenum* argb_swizzle = nullptr) { - copy_from(src, format, type, pixel_unpack_settings()); + create(data, target, sized_format, argb_swizzle); } - void copy_from(const buffer& buf, texture::format format, texture::type type) + texture_view(texture* data, const GLenum* argb_swizzle = nullptr) { - copy_from(buf, format, type, pixel_unpack_settings()); + GLenum target = (GLenum)data->get_target(); + GLenum sized_format = (GLenum)data->get_internal_format(); + create(data, target, sized_format, argb_swizzle); } - void copy_to(void* dst, texture::format format, texture::type type, class pixel_pack_settings pixel_settings) const + ~texture_view() { - save_binding_state save(*this); - pixel_settings.apply(); - __glcheck glGetTexImage((GLenum)get_target(), level(), (GLenum)format, (GLenum)type, dst); + glDeleteTextures(1, &m_id); } - void copy_to(void* dst, texture::type type, class pixel_pack_settings pixel_settings) const + GLuint id() const { - copy_to(dst, get_internal_format(), type, pixel_settings); + return m_id; } - void copy_to(const buffer& buf, texture::format format, texture::type type, class pixel_pack_settings pixel_settings) const + GLenum target() const { - buffer::save_binding_state save_buffer(buffer::target::pixel_pack, buf); - copy_to(nullptr, format, type, pixel_settings); + return m_target; } - void copy_to(const buffer& buf, texture::type type, class pixel_pack_settings pixel_settings) const + GLenum internal_format() const { - buffer::save_binding_state save_buffer(buffer::target::pixel_pack, buf); - copy_to(nullptr, get_internal_format(), type, pixel_settings); + return m_format; } - void copy_to(void* dst, texture::format format, texture::type type) const + bool compare_swizzle(GLenum* argb_swizzle) const { - copy_to(dst, format, type, pixel_pack_settings()); + return (argb_swizzle[0] == component_swizzle[3] && + argb_swizzle[1] == component_swizzle[0] && + argb_swizzle[2] == component_swizzle[1] && + argb_swizzle[3] == component_swizzle[2]); } - void copy_to(void* dst, texture::type type) const + void bind() const { - copy_to(dst, get_internal_format(), type, pixel_pack_settings()); + glBindTexture(m_target, m_id); } - void copy_to(const buffer& buf, texture::format format, texture::type type) const + texture* image() const { - copy_to(buf, format, type, pixel_pack_settings()); + return m_image_data; } - void copy_to(const buffer& buf, texture::type type) const + std::array component_mapping() const { - copy_to(buf, get_internal_format(), type, pixel_pack_settings()); + return{ component_swizzle[3], component_swizzle[0], component_swizzle[1], component_swizzle[2] }; } }; @@ -1929,96 +1934,6 @@ namespace gl } }; - class texture::settings - { - texture *m_parent; - - texture::channel m_swizzle_r = texture::channel::r; - texture::channel m_swizzle_g = texture::channel::g; - texture::channel m_swizzle_b = texture::channel::b; - texture::channel m_swizzle_a = texture::channel::a; - - texture::format m_format = texture::format::rgba; - texture::internal_format m_internal_format = texture::internal_format::rgba; - texture::type m_type = texture::type::ubyte; - - gl::min_filter m_min_filter = gl::min_filter::nearest; - gl::filter m_mag_filter = gl::filter::nearest; - - uint m_width = 0; - uint m_height = 0; - uint m_depth = 1; - int m_level = 0; - - int m_compressed_image_size = 0; - - const void* m_pixels = nullptr; - float m_aniso = 1.f; - texture::compare_mode m_compare_mode = texture::compare_mode::none; - texture::compare_func m_compare_func = texture::compare_func::greater; - - texture::wrap m_wrap_s = texture::wrap::repeat; - texture::wrap m_wrap_t = texture::wrap::repeat; - texture::wrap m_wrap_r = texture::wrap::repeat; - - float m_max_lod = 1000.f; - float m_min_lod = -1000.f; - float m_lod = 0.f; - int m_max_level = 1000; - bool m_generate_mipmap = false; - - color4f m_border_color; - - public: - settings(texture *parent = nullptr) : m_parent(parent) - { - } - - ~settings() - { - apply(); - } - - void apply(const texture &texture) const; - void apply(); - - settings& swizzle( - texture::channel r = texture::channel::r, - texture::channel g = texture::channel::g, - texture::channel b = texture::channel::b, - texture::channel a = texture::channel::a); - - settings& format(texture::format format); - settings& type(texture::type type); - settings& internal_format(texture::internal_format format); - settings& filter(min_filter min_filter, filter mag_filter); - settings& width(uint width); - settings& height(uint height); - settings& depth(uint depth); - settings& size(sizei size); - settings& level(int value); - settings& compressed_image_size(int size); - settings& pixels(const void* pixels); - settings& aniso(float value); - settings& compare_mode(texture::compare_mode value); - settings& compare_func(texture::compare_func value); - settings& compare(texture::compare_func func, texture::compare_mode mode); - - settings& wrap_s(texture::wrap value); - settings& wrap_t(texture::wrap value); - settings& wrap_r(texture::wrap value); - settings& wrap(texture::wrap s, texture::wrap t, texture::wrap r); - - settings& max_lod(float value); - settings& min_lod(float value); - settings& lod(float value); - settings& max_level(int value); - settings& generate_mipmap(bool value); - settings& mipmap(int level, int max_level, float lod, float min_lod, float max_lod, bool generate); - - settings& border_color(color4f value); - }; - enum class indices_type { ubyte = GL_UNSIGNED_BYTE, @@ -2110,14 +2025,8 @@ namespace gl { save_binding_state save(m_parent); - switch (rhs.get_target()) - { - case texture::target::texture1D: glFramebufferTexture1D(GL_FRAMEBUFFER, m_id, GL_TEXTURE_1D, rhs.id(), rhs.level()); break; - case texture::target::texture2D: glFramebufferTexture2D(GL_FRAMEBUFFER, m_id, GL_TEXTURE_2D, rhs.id(), rhs.level()); break; - case texture::target::texture3D: glFramebufferTexture3D(GL_FRAMEBUFFER, m_id, GL_TEXTURE_3D, rhs.id(), rhs.level(), 0); break; - case texture::target::textureBuffer: - fmt::throw_exception("Tried to assign unsupported texture of type textureBuffer to fbo." HERE); - } + verify(HERE), rhs.get_target() == texture::target::texture2D; + glFramebufferTexture2D(GL_FRAMEBUFFER, m_id, GL_TEXTURE_2D, rhs.id(), 0); } void operator = (const GLuint rhs) @@ -2479,7 +2388,7 @@ namespace gl return result; } - int texture(GLint location, int active_texture, const gl::texture& texture) + int texture(GLint location, int active_texture, const gl::texture_view& texture) { glActiveTexture(GL_TEXTURE0 + active_texture); texture.bind(); @@ -2488,12 +2397,12 @@ namespace gl return active_texture; } - int texture(const std::string &name, int active_texture, const gl::texture& texture_) + int texture(const std::string &name, int active_texture, const gl::texture_view& texture_) { return texture(location(name), active_texture, texture_); } - int texture(const std::string &name, const gl::texture& texture_) + int texture(const std::string &name, const gl::texture_view& texture_) { int atex; auto finded = locations.find(name); @@ -2813,43 +2722,4 @@ namespace gl } }; } - - class texture_view : public texture - { - public: - texture_view(texture::target target_, GLuint id) : texture(target_, id) - { - } - - ~texture_view() - { - set_id(0); - } - }; - - class fbo_view : public fbo - { - public: - fbo_view(GLuint id) : fbo(id) - { - } - - ~fbo_view() - { - set_id(0); - } - }; - - class rbo_view : public rbo - { - public: - rbo_view(GLuint id) : rbo(id) - { - } - - ~rbo_view() - { - set_id(0); - } - }; } diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h index bec9fa694b72..29e0015ddd7f 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.h +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -328,7 +328,9 @@ namespace gl u32 num_elements = 0; std::vector> resources; std::unordered_map> temp_image_cache; + std::unordered_map> temp_view_cache; std::unordered_map> font_cache; + std::unordered_map> view_cache; bool is_font_draw = false; ui_overlay_renderer() @@ -390,31 +392,28 @@ namespace gl }; } - gl::texture* load_simple_image(rsx::overlays::image_info* desc, bool temp_resource) + gl::texture_view* load_simple_image(rsx::overlays::image_info* desc, bool temp_resource) { - auto tex = std::make_unique(gl::texture::target::texture2D); - tex->create(); - tex->config() - .size({ desc->w, desc->h }) - .format(gl::texture::format::rgba) - .type(gl::texture::type::uint_8_8_8_8) - .wrap(gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border) - .swizzle(gl::texture::channel::a, gl::texture::channel::b, gl::texture::channel::g, gl::texture::channel::r) - .filter(gl::min_filter::linear, gl::filter::linear) - .apply(); + auto tex = std::make_unique(GL_TEXTURE_2D, desc->w, desc->h, 1, 1, GL_RGBA8); tex->copy_from(desc->data, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); + GLenum remap[] = { GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN }; + auto view = std::make_unique(tex.get(), remap); + + auto result = view.get(); if (!temp_resource) { resources.push_back(std::move(tex)); + view_cache[view_cache.size()] = std::move(view); } else { u64 key = (u64)desc; temp_image_cache[key] = std::move(tex); + temp_view_cache[key] = std::move(view); } - return resources.back().get(); + return result; } void create() @@ -445,38 +444,32 @@ namespace gl temp_image_cache.clear(); } - gl::texture* find_font(rsx::overlays::font *font) + gl::texture_view* find_font(rsx::overlays::font *font) { u64 key = (u64)font; - auto found = font_cache.find(key); - if (found != font_cache.end()) + auto found = view_cache.find(key); + if (found != view_cache.end()) return found->second.get(); //Create font file - auto tex = std::make_unique(gl::texture::target::texture2D); - tex->create(); - tex->config() - .size({ (int)font->width, (int)font->height }) - .format(gl::texture::format::r) - .type(gl::texture::type::ubyte) - .internal_format(gl::texture::internal_format::r8) - .wrap(gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border) - .swizzle(gl::texture::channel::r, gl::texture::channel::r, gl::texture::channel::r, gl::texture::channel::r) - .filter(gl::min_filter::linear, gl::filter::linear) - .apply(); + auto tex = std::make_unique(GL_TEXTURE_2D, (int)font->width, (int)font->height, 1, 1, GL_R8); tex->copy_from(font->glyph_data.data(), gl::texture::format::r, gl::texture::type::ubyte); - auto result = tex.get(); + GLenum remap[] = { GL_RED, GL_RED, GL_RED, GL_RED }; + auto view = std::make_unique(tex.get(), remap); + + auto result = view.get(); font_cache[key] = std::move(tex); + view_cache[key] = std::move(view); return result; } - gl::texture* find_temp_image(rsx::overlays::image_info *desc) + gl::texture_view* find_temp_image(rsx::overlays::image_info *desc) { auto key = (u64)desc; - auto cached = temp_image_cache.find(key); - if (cached != temp_image_cache.end()) + auto cached = temp_view_cache.find(key); + if (cached != temp_view_cache.end()) { return cached->second.get(); } @@ -553,7 +546,7 @@ namespace gl } default: { - glBindTexture(GL_TEXTURE_2D, resources[cmd.first.texture_ref - 1]->id()); + glBindTexture(GL_TEXTURE_2D, view_cache[cmd.first.texture_ref - 1]->id()); break; } } diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 890a8b4a026e..788c698c8992 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -37,6 +37,7 @@ OPENGL_PROC(PFNGLBINDVERTEXARRAYPROC, BindVertexArray); OPENGL_PROC(PFNGLDELETEVERTEXARRAYSPROC, DeleteVertexArrays); OPENGL_PROC(PFNGLDEPTHRANGEFPROC, DepthRangef); OPENGL_PROC(PFNGLTEXIMAGE3DPROC, TexImage3D); +OPENGL_PROC(PFNGLTEXSUBIMAGE3DPROC, TexSubImage3D); OPENGL_PROC(PFNGLVERTEXATTRIB1FPROC, VertexAttrib1f); OPENGL_PROC(PFNGLVERTEXATTRIB1DPROC, VertexAttrib1d); @@ -175,6 +176,12 @@ OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays); OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT); OPENGL_PROC(PFNGLGETTEXTUREIMAGEPROC, GetTextureImage); +OPENGL_PROC(PFNGLTEXTURESUBIMAGE1DEXTPROC, TextureSubImage1DEXT); +OPENGL_PROC(PFNGLTEXTURESUBIMAGE1DPROC, TextureSubImage1D); +OPENGL_PROC(PFNGLTEXTURESUBIMAGE2DEXTPROC, TextureSubImage2DEXT); +OPENGL_PROC(PFNGLTEXTURESUBIMAGE2DPROC, TextureSubImage2D); +OPENGL_PROC(PFNGLTEXTURESUBIMAGE3DEXTPROC, TextureSubImage3DEXT); +OPENGL_PROC(PFNGLTEXTURESUBIMAGE3DPROC, TextureSubImage3D); OPENGL_PROC(PFNGLCLEARBUFFERFVPROC, ClearBufferfv); @@ -212,10 +219,13 @@ OPENGL_PROC(PFNGLDELETESYNCPROC, DeleteSync); //KHR_debug OPENGL_PROC(PFNGLDEBUGMESSAGECALLBACKPROC, DebugMessageCallback); +//Immutable textures OPENGL_PROC(PFNGLTEXSTORAGE1DPROC, TexStorage1D); OPENGL_PROC(PFNGLTEXSTORAGE2DPROC, TexStorage2D); OPENGL_PROC(PFNGLTEXSTORAGE3DPROC, TexStorage3D); -OPENGL_PROC(PFNGLTEXSUBIMAGE3DPROC, TexSubImage3D); + +//Texture_View +OPENGL_PROC(PFNGLTEXTUREVIEWPROC, TextureView); //Texture_Barrier OPENGL_PROC(PFNGLTEXTUREBARRIERPROC, TextureBarrier); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 73c4b6a909f0..42bd5396e4f0 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -52,7 +52,8 @@ color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_forma { ::gl::texture::channel::one, ::gl::texture::channel::r, ::gl::texture::channel::r, ::gl::texture::channel::r } }; case rsx::surface_color_format::g8b8: - return{ ::gl::texture::type::ubyte, ::gl::texture::format::rg, false, 2, 1 }; + return{ ::gl::texture::type::ubyte, ::gl::texture::format::rg, false, 2, 1, + { ::gl::texture::channel::g, ::gl::texture::channel::r, ::gl::texture::channel::g, ::gl::texture::channel::r } }; case rsx::surface_color_format::x32: return{ ::gl::texture::type::f32, ::gl::texture::format::r, true, 1, 4 }; @@ -79,7 +80,7 @@ depth_format rsx::internals::surface_depth_format_to_gl(rsx::surface_depth_forma LOG_ERROR(RSX, "Surface depth buffer: Unsupported surface depth format (0x%x)", (u32)depth_format); case rsx::surface_depth_format::z24s8: if (g_cfg.video.force_high_precision_z_buffer && ::gl::get_driver_caps().ARB_depth_buffer_float_supported) - return{ ::gl::texture::type::float32_uint8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth32f_stencil8 }; + return{ ::gl::texture::type::uint_24_8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth32f_stencil8 }; else return{ ::gl::texture::type::uint_24_8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth24_stencil8 }; } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 720ad7ed0d7b..4e7f98e706c4 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -49,7 +49,7 @@ namespace rsx namespace gl { - class render_target : public texture, public rsx::ref_counted, public rsx::render_target_descriptor + class render_target : public texture, public rsx::ref_counted, public rsx::render_target_descriptor { bool is_cleared = false; @@ -62,14 +62,14 @@ namespace gl u16 surface_width = 0; u16 surface_pixel_size = 0; - texture::internal_format compatible_internal_format = texture::internal_format::rgba8; - std::array native_component_mapping; - u32 current_remap_encoding = 0; + std::unordered_map> views; public: render_target *old_contents = nullptr; - render_target() {} + render_target(GLuint width, GLuint height, GLenum sized_format) + :texture(GL_TEXTURE_2D, width, height, 1, 1, sized_format) + {} void set_cleared(bool clear=true) { @@ -113,59 +113,37 @@ namespace gl return surface_height; } - u32 get_surface() override + texture* get_surface() override { - return get_view(0xAAE4, rsx::default_remap_vector); + return (gl::texture*)this; } - u32 get_view(u32 remap_encoding, const std::pair, std::array>& remap) + texture_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap) { - if (remap_encoding != current_remap_encoding) + auto found = views.find(remap_encoding); + if (found != views.end()) { - current_remap_encoding = remap_encoding; - - bind(); - apply_swizzle_remap(GL_TEXTURE_2D, native_component_mapping, remap); + return found->second.get(); } - return id(); + auto mapping = gl::apply_swizzle_remap(get_native_component_layout(), remap); + auto view = std::make_unique(this, mapping.data()); + auto result = view.get(); + views[remap_encoding] = std::move(view); + return result; } - u32 get_view() + u32 raw_handle() const { - //Get view with components in true native layout - //TODO: Implement real image views - const GLenum rgba_remap[4] = { GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA }; - glBindTexture(GL_TEXTURE_2D, id()); - glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, (GLint*)rgba_remap); - - //Reset view encoding - current_remap_encoding = 0; return id(); } - void set_compatible_format(texture::internal_format format) - { - compatible_internal_format = format; - } - - texture::internal_format get_compatible_internal_format() const override - { - return compatible_internal_format; - } - void update_surface() { internal_width = width(); internal_height = height(); surface_width = rsx::apply_inverse_resolution_scale(internal_width, true); surface_height = rsx::apply_inverse_resolution_scale(internal_height, true); - - bind(); - glGetTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, (GLint*)&native_component_mapping[0]); - glGetTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, (GLint*)&native_component_mapping[1]); - glGetTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_G, (GLint*)&native_component_mapping[2]); - glGetTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, (GLint*)&native_component_mapping[3]); } bool matches_dimensions(u16 _width, u16 _height) const @@ -192,27 +170,15 @@ struct gl_render_target_traits gl::render_target* old_surface ) { - std::unique_ptr result(new gl::render_target()); - auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); auto internal_fmt = rsx::internals::sized_internal_format(surface_color_format); - result->recreate(gl::texture::target::texture2D); + std::unique_ptr result(new gl::render_target(rsx::apply_resolution_scale((u16)width, true), + rsx::apply_resolution_scale((u16)height, true), (GLenum)internal_fmt)); result->set_native_pitch((u16)width * format.channel_count * format.channel_size); - result->set_compatible_format(internal_fmt); - - __glcheck result->config() - .size({ (int)rsx::apply_resolution_scale((u16)width, true), (int)rsx::apply_resolution_scale((u16)height, true) }) - .type(format.type) - .format(format.format) - .internal_format(internal_fmt) - .swizzle(format.swizzle.r, format.swizzle.g, format.swizzle.b, format.swizzle.a) - .wrap(gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border) - .apply(); - - __glcheck result->pixel_pack_settings().swap_bytes(format.swap_bytes).aligment(1); - __glcheck result->pixel_unpack_settings().swap_bytes(format.swap_bytes).aligment(1); + std::array native_layout = { (GLenum)format.swizzle.a, (GLenum)format.swizzle.r, (GLenum)format.swizzle.g, (GLenum)format.swizzle.b }; + result->set_native_component_layout(native_layout); result->old_contents = old_surface; result->set_cleared(); @@ -229,32 +195,17 @@ struct gl_render_target_traits gl::render_target* old_surface ) { - std::unique_ptr result(new gl::render_target()); - auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format); - result->recreate(gl::texture::target::texture2D); - - const auto scale = rsx::get_resolution_scale(); - - __glcheck result->config() - .size({ (int)rsx::apply_resolution_scale((u16)width, true), (int)rsx::apply_resolution_scale((u16)height, true) }) - .type(format.type) - .format(format.format) - .internal_format(format.internal_format) - .swizzle(gl::texture::channel::r, gl::texture::channel::r, gl::texture::channel::r, gl::texture::channel::r) - .wrap(gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border) - .apply(); - - __glcheck result->pixel_pack_settings().aligment(1); - __glcheck result->pixel_unpack_settings().aligment(1); + std::unique_ptr result(new gl::render_target(rsx::apply_resolution_scale((u16)width, true), + rsx::apply_resolution_scale((u16)height, true), (GLenum)format.internal_format)); u16 native_pitch = (u16)width * 2; if (surface_depth_format == rsx::surface_depth_format::z24s8) native_pitch *= 2; + std::array native_layout = { GL_RED, GL_RED, GL_RED, GL_RED }; result->set_native_pitch(native_pitch); - result->set_compatible_format(format.internal_format); - + result->set_native_component_layout(native_layout); result->old_contents = old_surface; result->update_surface(); @@ -291,7 +242,7 @@ struct gl_render_target_traits return false; auto internal_fmt = rsx::internals::sized_internal_format(format); - return rtt->get_compatible_internal_format() == internal_fmt && rtt->matches_dimensions((u16)width, (u16)height); + return rtt->get_internal_format() == internal_fmt && rtt->matches_dimensions((u16)width, (u16)height); } static @@ -309,7 +260,7 @@ struct gl_render_target_traits { auto pixel_format = rsx::internals::surface_color_format_to_gl(color_format); std::vector result(width * height * pixel_format.channel_count * pixel_format.channel_size); - color_buffer->bind(); + glBindTexture(GL_TEXTURE_2D, color_buffer->id()); glGetTexImage(GL_TEXTURE_2D, 0, (GLenum)pixel_format.format, (GLenum)pixel_format.type, result.data()); return result; } @@ -319,7 +270,7 @@ struct gl_render_target_traits std::vector result(width * height * 4); auto pixel_format = rsx::internals::surface_depth_format_to_gl(depth_format); - depth_stencil_buffer->bind(); + glBindTexture(GL_TEXTURE_2D, depth_stencil_buffer->id()); glGetTexImage(GL_TEXTURE_2D, 0, (GLenum)pixel_format.format, (GLenum)pixel_format.type, result.data()); return result; } @@ -354,10 +305,7 @@ struct gl_render_targets : public rsx::surface_store invalidated_resources.remove_if([&](auto &rtt) { if (rtt->deref_count >= 2) - { - rtt->remove(); return true; - } rtt->deref_count++; return false; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 604c32124697..ff34bc215686 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -1,6 +1,5 @@ #include "stdafx.h" #include "GLTexture.h" -#include "GLHelpers.h" #include "../GCM.h" #include "../RSXThread.h" #include "../RSXTexture.h" @@ -8,6 +7,8 @@ namespace gl { + static buffer g_typeless_transfer_buffer; + GLenum get_target(rsx::texture_dimension_extended type) { switch (type) @@ -66,7 +67,7 @@ namespace gl case CELL_GCM_TEXTURE_A8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8); case CELL_GCM_TEXTURE_G8B8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE); case CELL_GCM_TEXTURE_R6G5B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5); - case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE); + case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8); case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_FLOAT); case CELL_GCM_TEXTURE_DEPTH16: return std::make_tuple(GL_DEPTH_COMPONENT, GL_SHORT); case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_HALF_FLOAT); @@ -90,6 +91,38 @@ namespace gl fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format); } + std::tuple get_format_type(texture::internal_format format) + { + switch (format) + { + case texture::internal_format::compressed_rgba_s3tc_dxt1: + case texture::internal_format::compressed_rgba_s3tc_dxt3: + case texture::internal_format::compressed_rgba_s3tc_dxt5: + return std::make_tuple(GL_RGBA, GL_UNSIGNED_BYTE, false); + case texture::internal_format::r8: + return std::make_tuple(GL_R, GL_UNSIGNED_BYTE, false); + case texture::internal_format::r32f: + return std::make_tuple(GL_R, GL_FLOAT, true); + case texture::internal_format::r5g6b5: + return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5, true); + case texture::internal_format::rg8: + return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE, false); + case texture::internal_format::rgba8: + return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, false); + case texture::internal_format::rgba16f: + return std::make_tuple(GL_RGBA, GL_HALF_FLOAT, true); + case texture::internal_format::rgba32f: + return std::make_tuple(GL_RGBA, GL_FLOAT, true); + case texture::internal_format::depth16: + return std::make_tuple(GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, true); + case texture::internal_format::depth24_stencil8: + case texture::internal_format::depth32f_stencil8: + return std::make_tuple(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, true); + default: + fmt::throw_exception("Unexpected internal format 0x%X" HERE, (u32)format); + } + } + GLenum get_srgb_format(GLenum in_format) { switch (in_format) @@ -173,13 +206,12 @@ namespace gl //Apply sampler state settings void sampler_state::apply(rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image) { - const f32 border_color = (f32)tex.border_color() / 255; - const f32 border_color_array[] = { border_color, border_color, border_color, border_color }; + const color4f border_color = rsx::decode_border_color(tex.border_color()); glSamplerParameteri(samplerHandle, GL_TEXTURE_WRAP_S, wrap_mode(tex.wrap_s())); glSamplerParameteri(samplerHandle, GL_TEXTURE_WRAP_T, wrap_mode(tex.wrap_t())); glSamplerParameteri(samplerHandle, GL_TEXTURE_WRAP_R, wrap_mode(tex.wrap_r())); - glSamplerParameterfv(samplerHandle, GL_TEXTURE_BORDER_COLOR, border_color_array); + glSamplerParameterfv(samplerHandle, GL_TEXTURE_BORDER_COLOR, border_color.rgba); if (sampled_image->upload_context != rsx::texture_upload_context::shader_read || tex.get_exact_mipmap_count() <= 1) @@ -221,7 +253,8 @@ namespace gl glSamplerParameteri(samplerHandle, GL_TEXTURE_MAG_FILTER, tex_mag_filter(tex.mag_filter())); const u32 texture_format = tex.format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); - if (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8) + if (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 || + texture_format == CELL_GCM_TEXTURE_DEPTH16_FLOAT || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT) { //NOTE: The stored texture function is reversed wrt the textureProj compare function GLenum compare_mode = (GLenum)tex.zfunc() | GL_NEVER; @@ -288,10 +321,6 @@ namespace gl case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_R5G6B5: case CELL_GCM_TEXTURE_A8R8G8B8: // TODO - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: case CELL_GCM_TEXTURE_COMPRESSED_DXT1: case CELL_GCM_TEXTURE_COMPRESSED_DXT23: case CELL_GCM_TEXTURE_COMPRESSED_DXT45: @@ -299,6 +328,12 @@ namespace gl case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return{ GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE }; + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return{ GL_RED, GL_RED, GL_RED, GL_RED }; + case CELL_GCM_TEXTURE_A4R4G4B4: return{ GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA }; @@ -336,7 +371,7 @@ namespace gl fmt::throw_exception("Unknown format 0x%x" HERE, texture_format); } - GLuint create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, + gl::texture* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type, rsx::texture_colorspace colorspace) { if (is_compressed_format(gcm_format)) @@ -347,43 +382,29 @@ namespace gl height = align(height, 4); } - GLuint id = 0; GLenum target; GLenum internal_format = get_sized_internal_format(gcm_format); if (colorspace != rsx::texture_colorspace::rgb_linear) internal_format = get_srgb_format(internal_format); - glGenTextures(1, &id); - switch (type) { case rsx::texture_dimension_extended::texture_dimension_1d: target = GL_TEXTURE_1D; - glBindTexture(GL_TEXTURE_1D, id); - glTexStorage1D(GL_TEXTURE_1D, mipmaps, internal_format, width); break; case rsx::texture_dimension_extended::texture_dimension_2d: target = GL_TEXTURE_2D; - glBindTexture(GL_TEXTURE_2D, id); - glTexStorage2D(GL_TEXTURE_2D, mipmaps, internal_format, width, height); break; case rsx::texture_dimension_extended::texture_dimension_3d: target = GL_TEXTURE_3D; - glBindTexture(GL_TEXTURE_3D, id); - glTexStorage3D(GL_TEXTURE_3D, mipmaps, internal_format, width, height, depth); break; case rsx::texture_dimension_extended::texture_dimension_cubemap: target = GL_TEXTURE_CUBE_MAP; - glBindTexture(GL_TEXTURE_CUBE_MAP, id); - glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmaps, internal_format, width, height); break; } - glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - - return id; + return new gl::texture(target, width, height, depth, mipmaps, internal_format); } void fill_texture(rsx::texture_dimension_extended dim, u16 mipmap_count, int format, u16 width, u16 height, u16 depth, @@ -494,7 +515,7 @@ namespace gl } } - void apply_swizzle_remap(GLenum target, const std::array& swizzle_remap, const std::pair, std::array>& decoded_remap) + std::array apply_swizzle_remap(const std::array& swizzle_remap, const std::pair, std::array>& decoded_remap) { //Remapping tables; format is A-R-G-B //Remap input table. Contains channel index to read color from @@ -503,7 +524,7 @@ namespace gl //Remap control table. Controls whether the remap value is used, or force either 0 or 1 const auto remap_lookup = decoded_remap.second; - GLenum remap_values[4]; + std::array remap_values; for (u8 channel = 0; channel < 4; ++channel) { @@ -523,10 +544,7 @@ namespace gl } } - glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, remap_values[0]); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, remap_values[1]); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, remap_values[2]); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, remap_values[3]); + return remap_values; } void upload_texture(GLuint id, u32 texaddr, u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type, @@ -538,10 +556,7 @@ namespace gl size_t texture_data_sz = get_placed_texture_storage_size(width, height, depth, gcm_format, mipmaps, is_cubemap, 256, 512); std::vector data_upload_buf(texture_data_sz); - const std::array& glRemap = get_swizzle_remap(gcm_format); - GLenum target; - switch (type) { case rsx::texture_dimension_extended::texture_dimension_1d: @@ -561,18 +576,13 @@ namespace gl glBindTexture(target, id); glPixelStorei(GL_UNPACK_ALIGNMENT, 4); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1); if (static_state) { //Usually for vertex textures - - glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, glRemap[0]); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, glRemap[1]); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, glRemap[2]); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, glRemap[3]); - glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(target, GL_TEXTURE_WRAP_R, GL_REPEAT); @@ -581,10 +591,6 @@ namespace gl glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameterf(target, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.f); } - else - { - apply_swizzle_remap(target, glRemap, decoded_remap); - } //The rest of sampler state is now handled by sampler state objects const auto format_type = get_format_type(gcm_format); @@ -592,4 +598,32 @@ namespace gl const GLenum gl_type = std::get<1>(format_type); fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf); } + + void copy_typeless(texture * dst, const texture * src) + { + GLsizeiptr src_mem = src->width() * src->height(); + GLsizeiptr dst_mem = dst->width() * dst->height(); + + auto max_mem = std::max(src_mem, dst_mem) * 16; + if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size()) + { + if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove(); + g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, GL_STATIC_COPY); + } + + auto format_type = get_format_type(src->get_internal_format()); + pixel_pack_settings pack_settings{}; + pack_settings.swap_bytes(std::get<2>(format_type)); + g_typeless_transfer_buffer.bind(buffer::target::pixel_pack); + src->copy_to(nullptr, (texture::format)std::get<0>(format_type), (texture::type)std::get<1>(format_type), pack_settings); + + format_type = get_format_type(dst->get_internal_format()); + pixel_unpack_settings unpack_settings{}; + unpack_settings.swap_bytes(std::get<2>(format_type)); + g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack); + dst->copy_from(nullptr, (texture::format)std::get<0>(format_type), (texture::type)std::get<1>(format_type), unpack_settings); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE); + } } diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index 3c685e4a8063..066da41ce6fd 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -1,6 +1,7 @@ #include "OpenGL.h" #include "../GCM.h" #include "../Common/TextureUtils.h" +#include "GLHelpers.h" namespace rsx { @@ -13,12 +14,14 @@ namespace gl GLenum get_target(rsx::texture_dimension_extended type); GLenum get_sized_internal_format(u32 gcm_format); std::tuple get_format_type(u32 texture_format); + std::tuple get_format_type(texture::internal_format format); GLenum wrap_mode(rsx::texture_wrap_mode wrap); float max_aniso(rsx::texture_max_anisotropy aniso); std::array get_swizzle_remap(u32 texture_format); - GLuint create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type, rsx::texture_colorspace colorspace); + texture* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type, rsx::texture_colorspace colorspace); + void copy_typeless(texture* dst, const texture* src); /** * is_swizzled - determines whether input bytes are in morton order * subresources_layout - descriptor of the mipmap levels in memory @@ -31,7 +34,7 @@ namespace gl const std::vector& subresources_layout, const std::pair, std::array>& decoded_remap, bool static_state, rsx::texture_colorspace colorspace); - void apply_swizzle_remap(GLenum target, const std::array& swizzle_remap, const std::pair, std::array>& decoded_remap); + std::array apply_swizzle_remap(const std::array& swizzle_remap, const std::pair, std::array>& decoded_remap); class sampler_state { diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 2c2e49fe1335..41a487188887 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -27,6 +27,7 @@ namespace gl class blitter; extern GLenum get_sized_internal_format(u32); + extern void copy_typeless(texture*, const texture*); extern blitter *g_hw_blitter; class blitter @@ -48,22 +49,50 @@ namespace gl blit_src.remove(); } - u32 scale_image(u32 src, u32 dst, areai src_rect, areai dst_rect, bool linear_interpolation, bool is_depth_copy) + void scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, + bool is_depth_copy, const rsx::typeless_xfer& xfer_info) { + std::unique_ptr typeless_src; + std::unique_ptr typeless_dst; + u32 src_id = src->id(); + u32 dst_id = dst->id(); + + if (xfer_info.src_is_typeless) + { + const auto internal_width = (u16)(src->width() * xfer_info.src_scaling_hint); + const auto internal_fmt = get_sized_internal_format(xfer_info.src_gcm_format); + typeless_src = std::make_unique(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, internal_fmt); + copy_typeless(typeless_src.get(), src); + + src_id = typeless_src->id(); + src_rect.x1 = (u16)(src_rect.x1 * xfer_info.src_scaling_hint); + src_rect.x2 = (u16)(src_rect.x2 * xfer_info.src_scaling_hint); + } + + if (xfer_info.dst_is_typeless) + { + const auto internal_width = (u16)(dst->width() * xfer_info.dst_scaling_hint); + const auto internal_fmt = get_sized_internal_format(xfer_info.dst_gcm_format); + typeless_dst = std::make_unique(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, internal_fmt); + copy_typeless(typeless_dst.get(), dst); + + dst_id = typeless_dst->id(); + dst_rect.x1 = (u16)(dst_rect.x1 * xfer_info.dst_scaling_hint); + dst_rect.x2 = (u16)(dst_rect.x2 * xfer_info.dst_scaling_hint); + } + s32 old_fbo = 0; glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo); - u32 dst_tex = dst; filter interp = linear_interpolation ? filter::linear : filter::nearest; - GLenum attachment = is_depth_copy ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0; blit_src.bind(); - glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0); blit_src.check(); blit_dst.bind(); - glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_id, 0); blit_dst.check(); GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST); @@ -72,6 +101,12 @@ namespace gl blit_src.blit(blit_dst, src_rect, dst_rect, is_depth_copy ? buffers::depth : buffers::color, interp); + if (xfer_info.dst_is_typeless) + { + //Transfer contents from typeless dst back to original dst + copy_typeless(dst, typeless_dst.get()); + } + blit_src.bind(); glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0); @@ -82,7 +117,6 @@ namespace gl glEnable(GL_SCISSOR_TEST); glBindFramebuffer(GL_FRAMEBUFFER, old_fbo); - return dst_tex; } }; @@ -93,8 +127,11 @@ namespace gl u32 pbo_id = 0; u32 pbo_size = 0; - u32 vram_texture = 0; - u32 scaled_texture = 0; + gl::texture* vram_texture = nullptr; + + std::unique_ptr view; + std::unique_ptr managed_texture; + std::unique_ptr scaled_texture; bool is_depth = false; @@ -188,6 +225,7 @@ namespace gl glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); glBufferStorage(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_MAP_READ_BIT); + glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); pbo_size = buffer_size; } @@ -203,19 +241,27 @@ namespace gl synchronized = false; is_depth = false; - vram_texture = 0; + vram_texture = nullptr; + managed_texture.reset(); + view.reset(); } - void create(u16 w, u16 h, u16 depth, u16 mipmaps, void*, + void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture_view* _view, gl::texture* image, u32 rsx_pitch, bool read_only, gl::texture::format gl_format, gl::texture::type gl_type, bool swap_bytes) { if (read_only) { + managed_texture.reset(image); + view.reset(_view); + aa_mode = rsx::surface_antialiasing::center_1_sample; } else { + view.reset(); + managed_texture.reset(); + if (pbo_id == 0) init_buffer(); @@ -232,18 +278,21 @@ namespace gl this->depth = depth; this->mipmaps = mipmaps; - vram_texture = image->id(); + vram_texture = image; set_format(gl_format, gl_type, swap_bytes); } - void create_read_only(u32 id, u32 width, u32 height, u32 depth, u32 mipmaps) + void create_read_only(gl::texture* image, gl::texture_view* _view, u32 width, u32 height, u32 depth, u32 mipmaps) { //Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle this->width = width; this->height = height; this->depth = depth; this->mipmaps = mipmaps; - vram_texture = id; + + managed_texture.reset(image); + view.reset(_view); + vram_texture = image; rsx_pitch = 0; real_pitch = 0; @@ -292,23 +341,17 @@ namespace gl void set_source(gl::texture &source) { - vram_texture = source.id(); + vram_texture = &source; } void copy_texture(bool=false) { - if (!glIsTexture(vram_texture)) - { - LOG_ERROR(RSX, "Attempted to download rtt texture, but texture handle was invalid! (0x%X)", vram_texture); - return; - } - if (!pbo_id) { init_buffer(); } - u32 target_texture = vram_texture; + gl::texture* target_texture = vram_texture; if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) || (real_pitch != rsx_pitch)) { @@ -331,83 +374,63 @@ namespace gl areai src_area = { 0, 0, 0, 0 }; const areai dst_area = { 0, 0, (s32)real_width, (s32)real_height }; - GLenum ifmt = 0; - glBindTexture(GL_TEXTURE_2D, vram_texture); - glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&ifmt); - glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &src_area.x2); - glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &src_area.y2); + auto ifmt = vram_texture->get_internal_format(); + src_area.x2 = vram_texture->width(); + src_area.y2 = vram_texture->height(); if (src_area.x2 != dst_area.x2 || src_area.y2 != dst_area.y2) { - if (scaled_texture != 0) + if (scaled_texture) { - int sw, sh, fmt; - glBindTexture(GL_TEXTURE_2D, scaled_texture); - glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &sw); - glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &sh); - glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt); - - if ((u32)sw != real_width || (u32)sh != real_height || (GLenum)fmt != ifmt) + auto sfmt = scaled_texture->get_internal_format(); + if (scaled_texture->width() != real_width || + scaled_texture->height() != real_height || + sfmt != ifmt) { - glDeleteTextures(1, &scaled_texture); - scaled_texture = 0; + //Discard current scaled texture + scaled_texture.reset(); } } - if (scaled_texture == 0) + if (!scaled_texture) { - glGenTextures(1, &scaled_texture); - glBindTexture(GL_TEXTURE_2D, scaled_texture); - glTexStorage2D(GL_TEXTURE_2D, 1, ifmt, real_width, real_height); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + scaled_texture = std::make_unique(GL_TEXTURE_2D, real_width, real_height, 1, 1, (GLenum)ifmt); } bool linear_interp = false; //TODO: Make optional or detect full sized sources - g_hw_blitter->scale_image(vram_texture, scaled_texture, src_area, dst_area, linear_interp, is_depth); - target_texture = scaled_texture; + g_hw_blitter->scale_image(vram_texture, scaled_texture.get(), src_area, dst_area, linear_interp, is_depth, {}); + target_texture = scaled_texture.get(); } } - glPixelStorei(GL_PACK_SWAP_BYTES, pack_unpack_swap_bytes); - glPixelStorei(GL_PACK_ALIGNMENT, 1); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); + glGetError(); glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); - glGetError(); + pixel_pack_settings pack_settings; + pack_settings.aligment(1); - if (get_driver_caps().EXT_dsa_supported) - glGetTextureImageEXT(target_texture, GL_TEXTURE_2D, 0, (GLenum)format, (GLenum)type, nullptr); - else - glGetTextureImage(target_texture, 0, (GLenum)format, (GLenum)type, pbo_size, nullptr); + //NOTE: AMD properietary driver bug - disable swap bytes + if (!::gl::get_driver_caps().vendor_AMD) + pack_settings.swap_bytes(pack_unpack_swap_bytes); - if (GLenum err = glGetError()) + target_texture->copy_to(nullptr, format, type, pack_settings); + + if (auto error = glGetError()) { - bool recovered = false; - if (target_texture == scaled_texture) + if (error == GL_OUT_OF_MEMORY && ::gl::get_driver_caps().vendor_AMD) { - if (get_driver_caps().EXT_dsa_supported) - glGetTextureImageEXT(vram_texture, GL_TEXTURE_2D, 0, (GLenum)format, (GLenum)type, nullptr); - else - glGetTextureImage(vram_texture, 0, (GLenum)format, (GLenum)type, pbo_size, nullptr); - - if (!glGetError()) - { - recovered = true; - const u32 min_dimension = cpu_address_range / rsx_pitch; - LOG_WARNING(RSX, "Failed to read back a scaled image, but the original texture can be read back. Consider setting min scalable dimension below or equal to %d", min_dimension); - } + //AMD driver bug + //Pixel transfer fails with GL_OUT_OF_MEMORY. Usually happens with float textures + //Failed operations also leak a large amount of memory + LOG_ERROR(RSX, "Memory transfer failure (AMD bug). Format=0x%x, Type=0x%x", (u32)format, (u32)type); } - - if (!recovered && rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) + else { - LOG_ERROR(RSX, "Texture readback failed. Disable resolution scaling to get the 'Write Color Buffers' option to work properly"); + LOG_ERROR(RSX, "Memory transfer failed with error 0x%x. Format=0x%x, Type=0x%x", error, (u32)format, (u32)type); } } - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); m_fence.reset(); synchronized = true; @@ -424,11 +447,11 @@ namespace gl u32 min_width = std::min((u16)tex->width(), width); u32 min_height = std::min((u16)tex->height(), height); - tex->bind(); + glBindTexture(GL_TEXTURE_2D, tex->id()); glPixelStorei(GL_UNPACK_SWAP_BYTES, pack_unpack_swap_bytes); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id); - glTexSubImage2D((GLenum)tex->get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE); } bool flush() @@ -485,9 +508,60 @@ namespace gl //byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty rsx::shuffle_texel_data_wzyx(dst, rsx_pitch, width, height); } + else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD) + { + //AMD driver bug - cannot use pack_swap_bytes + //Manually byteswap texel data + switch (type) + { + case texture::type::f16: + case texture::type::sshort: + case texture::type::ushort: + case texture::type::ushort_5_6_5: + case texture::type::ushort_4_4_4_4: + case texture::type::ushort_1_5_5_5_rev: + case texture::type::ushort_5_5_5_1: + { + const u32 num_reps = cpu_address_range / 2; + be_t* in = (be_t*)(dst); + u16* out = (u16*)dst; + + for (u32 n = 0; n < num_reps; ++n) + { + out[n] = in[n]; + } + + break; + } + case texture::type::f32: + case texture::type::sint: + case texture::type::uint: + case texture::type::uint_10_10_10_2: + case texture::type::uint_24_8: + case texture::type::uint_2_10_10_10_rev: + case texture::type::uint_8_8_8_8: + { + u32 num_reps = cpu_address_range / 4; + be_t* in = (be_t*)(dst); + u32* out = (u32*)dst; + + for (u32 n = 0; n < num_reps; ++n) + { + out[n] = in[n]; + } + + break; + } + default: + { + LOG_ERROR(RSX, "Texture type 0x%x is not implemented " HERE, (u32)type); + break; + } + } + } glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); reset_write_statistics(); @@ -513,19 +587,17 @@ namespace gl if (pbo_id == 0) { //Read-only texture, destroy texture memory - glDeleteTextures(1, &vram_texture); + managed_texture.reset(); + view.reset(); } else { //Destroy pbo cache since vram texture is managed elsewhere glDeleteBuffers(1, &pbo_id); - - if (scaled_texture) - glDeleteTextures(1, &scaled_texture); + scaled_texture.reset(); } - vram_texture = 0; - scaled_texture = 0; + vram_texture = nullptr; pbo_id = 0; pbo_size = 0; @@ -568,14 +640,19 @@ namespace gl return vram_texture == 0; } - u32 get_raw_view() const + gl::texture_view* get_raw_view() const { - return vram_texture; + return view.get(); } - u32 get_raw_texture() const + gl::texture* get_raw_texture() const { - return vram_texture; + return managed_texture.get(); + } + + std::unique_ptr& get_view() + { + return view; } bool is_depth_texture() const @@ -585,44 +662,50 @@ namespace gl bool has_compatible_format(gl::texture* tex) const { - GLenum fmt; - glBindTexture(GL_TEXTURE_2D, vram_texture); - glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&fmt); + //TODO + return (tex->get_internal_format() == vram_texture->get_internal_format()); + } + }; + + class texture_cache : public rsx::texture_cache + { + private: + + struct discardable_storage + { + std::unique_ptr image; + std::unique_ptr view; + + discardable_storage() + {} - if (auto as_rtt = dynamic_cast(tex)) + discardable_storage(std::unique_ptr& tex) { - const auto rtt_internal_fmt = (GLenum)as_rtt->get_compatible_internal_format(); - if (rtt_internal_fmt != fmt) - { - //When high precision Z is enabled, RTT depth surfaces use DEPTH32F instead of DEPTH24 - if (rtt_internal_fmt == GL_DEPTH32F_STENCIL8 && fmt == GL_DEPTH24_STENCIL8) - return true; + image = std::move(tex); + } - return false; - } - else - { - //Match - return true; - } + discardable_storage(std::unique_ptr& _view) + { + view = std::move(_view); } - return (gl::texture::format)fmt == tex->get_internal_format(); - } - }; + discardable_storage(std::unique_ptr& tex, std::unique_ptr& _view) + { + image = std::move(tex); + view = std::move(_view); + } + }; - class texture_cache : public rsx::texture_cache - { private: blitter m_hw_blitter; - std::vector m_temporary_surfaces; + std::vector m_temporary_surfaces; - cached_texture_section& create_texture(u32 id, u32 texaddr, u32 texsize, u32 w, u32 h, u32 depth, u32 mipmaps) + cached_texture_section& create_texture(gl::texture* image, gl::texture_view* view, u32 texaddr, u32 texsize, u32 w, u32 h, u32 depth, u32 mipmaps) { cached_texture_section& tex = find_cached_texture(texaddr, texsize, true, w, h, depth); tex.reset(texaddr, texsize, false); - tex.create_read_only(id, w, h, depth, mipmaps); + tex.create_read_only(image, view, w, h, depth, mipmaps); read_only_range = tex.get_min_max(read_only_range); return tex; } @@ -646,110 +729,86 @@ namespace gl void clear_temporary_subresources() { - for (u32 &id : m_temporary_surfaces) - { - glDeleteTextures(1, &id); - } - m_temporary_surfaces.resize(0); } - u32 create_temporary_subresource_impl(u32 src_id, GLenum sized_internal_fmt, GLenum dst_type, u32 gcm_format, + gl::texture_view* create_temporary_subresource_impl(gl::texture* src, GLenum sized_internal_fmt, GLenum dst_type, u32 gcm_format, u16 x, u16 y, u16 width, u16 height, const texture_channel_remap_t& remap, bool copy) { - u32 dst_id = 0; - if (sized_internal_fmt == GL_NONE) sized_internal_fmt = gl::get_sized_internal_format(gcm_format); - GLenum ifmt; - glBindTexture(GL_TEXTURE_2D, src_id); - glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&ifmt); - - switch (ifmt) + gl::texture::internal_format ifmt = static_cast(sized_internal_fmt); + if (src) { - case GL_DEPTH_COMPONENT16: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH24_STENCIL8: - sized_internal_fmt = ifmt; - break; + ifmt = src->get_internal_format(); + switch (ifmt) + { + case gl::texture::internal_format::depth16: + case gl::texture::internal_format::depth24_stencil8: + case gl::texture::internal_format::depth32f_stencil8: + //HACK! Should use typeless transfer instead + sized_internal_fmt = (GLenum)ifmt; + break; + } } - glGenTextures(1, &dst_id); - glBindTexture(dst_type, dst_id); - - if (dst_type == GL_TEXTURE_2D) - glTexStorage2D(GL_TEXTURE_2D, 1, sized_internal_fmt, width, height); - else if (dst_type == GL_TEXTURE_1D) - glTexStorage1D(GL_TEXTURE_1D, 1, sized_internal_fmt, width); - - glTexParameteri(dst_type, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(dst_type, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(dst_type, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(dst_type, GL_TEXTURE_MAX_LEVEL, 0); - - m_temporary_surfaces.push_back(dst_id); + auto dst = std::make_unique(dst_type, width, height, 1, 1, sized_internal_fmt); if (copy) { //Empty GL_ERROR glGetError(); - glCopyImageSubData(src_id, GL_TEXTURE_2D, 0, x, y, 0, - dst_id, dst_type, 0, 0, 0, 0, width, height, 1); + glCopyImageSubData(src->id(), GL_TEXTURE_2D, 0, x, y, 0, + dst->id(), dst_type, 0, 0, 0, 0, width, height, 1); //Check for error if (GLenum err = glGetError()) { LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err); - return 0; + return nullptr; } } - //TODO: Native texture views are needed here. It works because this routine is only called with rendertarget data - if (ifmt != sized_internal_fmt) + std::array swizzle; + if (!src || (GLenum)ifmt != sized_internal_fmt) { - err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, ifmt); + if (src) + { + //Format mismatch + err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt); + } + //Apply base component map onto the new texture if a data cast has been done - apply_component_mapping_flags(dst_type, gcm_format, rsx::texture_create_flags::default_component_order); + swizzle = get_component_mapping(gcm_format, rsx::texture_create_flags::default_component_order); } else { - //Inherit the parent's default mapping. The caller should ensure the native order is set beforehand - GLint src_remap[4]; - glBindTexture(GL_TEXTURE_2D, src_id); - glGetTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, (GLint*)&src_remap[0]); - glGetTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_G, (GLint*)&src_remap[1]); - glGetTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, (GLint*)&src_remap[2]); - glGetTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, (GLint*)&src_remap[3]); - - glBindTexture(dst_type, dst_id); - glTexParameteriv(dst_type, GL_TEXTURE_SWIZZLE_RGBA, src_remap); + swizzle = src->get_native_component_layout(); } if (memcmp(remap.first.data(), rsx::default_remap_vector.first.data(), 4) || memcmp(remap.second.data(), rsx::default_remap_vector.second.data(), 4)) - set_up_remap_vector(dst_id, dst_type, remap); + swizzle = apply_swizzle_remap(swizzle, remap); + + auto view = std::make_unique(dst.get(), dst_type, sized_internal_fmt, swizzle.data()); + auto result = view.get(); - return dst_id; + m_temporary_surfaces.push_back({ dst, view }); + return result; } - void apply_component_mapping_flags(GLenum target, u32 gcm_format, rsx::texture_create_flags flags) + std::array get_component_mapping(u32 gcm_format, rsx::texture_create_flags flags) { - //NOTE: Depth textures should always read RRRR switch (gcm_format) { case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - { - glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, GL_RED); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, GL_RED); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, GL_RED); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, GL_RED); - return; - } + //Dont bother letting this propagate + return{ GL_RED, GL_RED, GL_RED, GL_RED }; default: break; } @@ -758,44 +817,21 @@ namespace gl { case rsx::texture_create_flags::default_component_order: { - auto remap = gl::get_swizzle_remap(gcm_format); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, remap[1]); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, remap[2]); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, remap[3]); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, remap[0]); - break; + return gl::get_swizzle_remap(gcm_format); } case rsx::texture_create_flags::native_component_order: { - glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, GL_RED); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, GL_GREEN); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, GL_BLUE); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, GL_ALPHA); - break; + return{ GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE }; } case rsx::texture_create_flags::swapped_native_component_order: { - glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, GL_ALPHA); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, GL_RED); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, GL_GREEN); - glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, GL_BLUE); - break; + return{ GL_BLUE, GL_ALPHA, GL_RED, GL_GREEN }; } + default: + fmt::throw_exception("Unknown texture create flags" HERE); } } - void set_up_remap_vector(u32 texture_id, GLenum texture_type, const texture_channel_remap_t& remap_vector) - { - std::array swizzle_remap; - glBindTexture(texture_type, texture_id); - glGetTexParameteriv(texture_type, GL_TEXTURE_SWIZZLE_A, (GLint*)&swizzle_remap[0]); - glGetTexParameteriv(texture_type, GL_TEXTURE_SWIZZLE_R, (GLint*)&swizzle_remap[1]); - glGetTexParameteriv(texture_type, GL_TEXTURE_SWIZZLE_G, (GLint*)&swizzle_remap[2]); - glGetTexParameteriv(texture_type, GL_TEXTURE_SWIZZLE_B, (GLint*)&swizzle_remap[3]); - - apply_swizzle_remap(texture_type, swizzle_remap, remap_vector); - } - protected: void free_texture_section(cached_texture_section& tex) override @@ -803,40 +839,24 @@ namespace gl tex.destroy(); } - u32 create_temporary_subresource_view(void*&, u32* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, + gl::texture_view* create_temporary_subresource_view(void*&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) override { return create_temporary_subresource_impl(*src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true); } - u32 create_temporary_subresource_view(void*&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, + gl::texture_view* create_temporary_subresource_view(void*&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) override { - if (auto as_rtt = dynamic_cast(src)) - { - return create_temporary_subresource_impl(src->id(), (GLenum)as_rtt->get_compatible_internal_format(), - GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true); - } - else - { - return create_temporary_subresource_impl(src->id(), GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h, - remap_vector, true); - } + return create_temporary_subresource_impl(src, (GLenum)src->get_internal_format(), + GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true); } - u32 generate_cubemap_from_images(void*&, u32 gcm_format, u16 size, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override + gl::texture_view* generate_cubemap_from_images(void*&, u32 gcm_format, u16 size, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override { const GLenum ifmt = gl::get_sized_internal_format(gcm_format); - GLuint dst_id = 0; - - glGenTextures(1, &dst_id); - glBindTexture(GL_TEXTURE_CUBE_MAP, dst_id); - glTexStorage2D(GL_TEXTURE_CUBE_MAP, 1, ifmt, size, size); - - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAX_LEVEL, 0); + auto dst_image = std::make_unique(GL_TEXTURE_CUBE_MAP, size, size, 1, 1, ifmt); + auto view = std::make_unique(dst_image.get(), GL_TEXTURE_CUBE_MAP, ifmt); //Empty GL_ERROR glGetError(); @@ -845,35 +865,27 @@ namespace gl { if (slice.src) { - glCopyImageSubData(slice.src, GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0, - dst_id, GL_TEXTURE_CUBE_MAP, 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.w, slice.h, 1); + glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0, + dst_image->id(), GL_TEXTURE_CUBE_MAP, 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.w, slice.h, 1); } } - m_temporary_surfaces.push_back(dst_id); - if (GLenum err = glGetError()) { LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err); - return 0; + return nullptr; } - return dst_id; + auto result = view.get(); + m_temporary_surfaces.push_back({ dst_image, view }); + return result; } - u32 generate_3d_from_2d_images(void*&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override + gl::texture_view* generate_3d_from_2d_images(void*&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override { const GLenum ifmt = gl::get_sized_internal_format(gcm_format); - GLuint dst_id = 0; - - glGenTextures(1, &dst_id); - glBindTexture(GL_TEXTURE_3D, dst_id); - glTexStorage3D(GL_TEXTURE_3D, 1, ifmt, width, height, depth); - - glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAX_LEVEL, 0); + auto dst_image = std::make_unique(GL_TEXTURE_3D, width, height, depth, 1, ifmt); + auto view = std::make_unique(dst_image.get(), GL_TEXTURE_3D, ifmt); //Empty GL_ERROR glGetError(); @@ -882,49 +894,47 @@ namespace gl { if (slice.src) { - glCopyImageSubData(slice.src, GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0, - dst_id, GL_TEXTURE_3D, 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.w, slice.h, 1); + glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0, + dst_image->id(), GL_TEXTURE_3D, 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.w, slice.h, 1); } } - m_temporary_surfaces.push_back(dst_id); - if (GLenum err = glGetError()) { LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err); - return 0; + return nullptr; } - return dst_id; + auto result = view.get(); + m_temporary_surfaces.push_back({ dst_image, view }); + return result; } - u32 generate_atlas_from_images(void*&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, + gl::texture_view* generate_atlas_from_images(void*&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, const texture_channel_remap_t& remap_vector) override { - auto result = create_temporary_subresource_impl(sections_to_copy.front().src, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false); + auto result = create_temporary_subresource_impl(nullptr, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false); for (const auto ®ion : sections_to_copy) { - glCopyImageSubData(region.src, GL_TEXTURE_2D, 0, region.src_x, region.src_y, 0, - result, GL_TEXTURE_2D, 0, region.dst_x, region.dst_y, 0, region.w, region.h, 1); + glCopyImageSubData(region.src->id(), GL_TEXTURE_2D, 0, region.src_x, region.src_y, 0, + result->image()->id(), GL_TEXTURE_2D, 0, region.dst_x, region.dst_y, 0, region.w, region.h, 1); } return result; } - void update_image_contents(void*&, u32 dst, u32 src, u16 width, u16 height) override + void update_image_contents(void*&, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override { - glCopyImageSubData(src, GL_TEXTURE_2D, 0, 0, 0, 0, - dst, GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1); + glCopyImageSubData(src->id(), GL_TEXTURE_2D, 0, 0, 0, 0, + dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1); } cached_texture_section* create_new_texture(void*&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags, - rsx::texture_colorspace colorspace, const texture_channel_remap_t& /*remap_vector*/) override + rsx::texture_colorspace colorspace, const texture_channel_remap_t& remap_vector) override { - u32 vram_texture = gl::create_texture(gcm_format, width, height, depth, mipmaps, type, colorspace); bool depth_flag = false; - switch (gcm_format) { case CELL_GCM_TEXTURE_DEPTH24_D8: @@ -933,15 +943,17 @@ namespace gl break; } - auto target = gl::get_target(type); - glBindTexture(target, vram_texture); - apply_component_mapping_flags(target, gcm_format, flags); + auto image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type, colorspace); + auto swizzle = get_component_mapping(gcm_format, flags); + swizzle = gl::apply_swizzle_remap(swizzle, remap_vector); + auto view = new gl::texture_view(image, swizzle.data()); - auto& cached = create_texture(vram_texture, rsx_address, rsx_size, width, height, depth, mipmaps); + auto& cached = create_texture(image, view, rsx_address, rsx_size, width, height, depth, mipmaps); cached.set_dirty(false); cached.set_depth_flag(depth_flag); cached.set_view_flags(flags); cached.set_context(context); + cached.set_gcm_format(gcm_format); cached.set_sampler_status(rsx::texture_sampler_status::status_uninitialized); cached.set_image_type(type); @@ -1011,7 +1023,7 @@ namespace gl section->set_sampler_status(rsx::texture_sampler_status::status_ready); } - gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, + gl::upload_texture(section->get_raw_texture()->id(), rsx_address, gcm_format, width, height, depth, mipmaps, input_swizzled, type, subresource_layout, remap_vector, false, colorspace); return section; } @@ -1021,8 +1033,13 @@ namespace gl if (flags == section.get_view_flags()) return; - glBindTexture(GL_TEXTURE_2D, section.get_raw_texture()); - apply_component_mapping_flags(GL_TEXTURE_2D, gcm_format, flags); + auto swizzle = get_component_mapping(gcm_format, flags); + auto& view = section.get_view(); + + if (!view->compare_swizzle(swizzle.data())) + { + view.reset(new gl::texture_view(view->image(), swizzle.data())); + } section.set_view_flags(flags); section.set_sampler_status(rsx::texture_sampler_status::status_uninitialized); @@ -1030,7 +1047,15 @@ namespace gl void set_up_remap_vector(cached_texture_section& section, const texture_channel_remap_t& remap_vector) override { - set_up_remap_vector(section.get_raw_texture(), GL_TEXTURE_2D, remap_vector); + auto& view = section.get_view(); + auto swizzle = view->component_mapping(); + + swizzle = apply_swizzle_remap(swizzle, remap_vector); + if (!view->compare_swizzle(swizzle.data())) + { + view.reset(new gl::texture_view(view->image(), swizzle.data())); + } + section.set_sampler_status(rsx::texture_sampler_status::status_ready); } @@ -1046,44 +1071,39 @@ namespace gl bool render_target_format_is_compatible(gl::texture* tex, u32 gcm_format) override { - if (auto as_rtt = dynamic_cast(tex)) + auto ifmt = tex->get_internal_format(); + switch (gcm_format) { - auto ifmt = as_rtt->get_compatible_internal_format(); - switch (gcm_format) - { - default: - //TODO - err_once("Format incompatibility detected, reporting failure to force data copy (GL_INTERNAL_FORMAT=0x%X, GCM_FORMAT=0x%X)", (u32)ifmt, gcm_format); - return false; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return (ifmt == gl::texture::internal_format::rgba16f); - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - return (ifmt == gl::texture::internal_format::rgba32f); - case CELL_GCM_TEXTURE_X32_FLOAT: - return (ifmt == gl::texture::internal_format::r32f); - case CELL_GCM_TEXTURE_R5G6B5: - return (ifmt == gl::texture::internal_format::r5g6b5); - case CELL_GCM_TEXTURE_A8R8G8B8: - return (ifmt == gl::texture::internal_format::rgba8 || - ifmt == gl::texture::internal_format::depth24_stencil8 || - ifmt == gl::texture::internal_format::depth32f_stencil8); - case CELL_GCM_TEXTURE_B8: - return (ifmt == gl::texture::internal_format::r8); - case CELL_GCM_TEXTURE_G8B8: - return (ifmt == gl::texture::internal_format::rg8); - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - return (ifmt == gl::texture::internal_format::depth24_stencil8 || - ifmt == gl::texture::internal_format::depth32f_stencil8 || - ifmt == gl::texture::internal_format::depth_stencil); - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - return (ifmt == gl::texture::internal_format::depth16 || - ifmt == gl::texture::internal_format::depth); - } + default: + //TODO + err_once("Format incompatibility detected, reporting failure to force data copy (GL_INTERNAL_FORMAT=0x%X, GCM_FORMAT=0x%X)", (u32)ifmt, gcm_format); + return false; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + return (ifmt == gl::texture::internal_format::rgba16f); + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + return (ifmt == gl::texture::internal_format::rgba32f); + case CELL_GCM_TEXTURE_X32_FLOAT: + return (ifmt == gl::texture::internal_format::r32f); + case CELL_GCM_TEXTURE_R5G6B5: + return (ifmt == gl::texture::internal_format::r5g6b5); + case CELL_GCM_TEXTURE_A8R8G8B8: + return (ifmt == gl::texture::internal_format::rgba8 || + ifmt == gl::texture::internal_format::depth24_stencil8 || + ifmt == gl::texture::internal_format::depth32f_stencil8); + case CELL_GCM_TEXTURE_B8: + return (ifmt == gl::texture::internal_format::r8); + case CELL_GCM_TEXTURE_G8B8: + return (ifmt == gl::texture::internal_format::rg8); + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return (ifmt == gl::texture::internal_format::depth24_stencil8 || + ifmt == gl::texture::internal_format::depth32f_stencil8 || + ifmt == gl::texture::internal_format::depth_stencil); + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return (ifmt == gl::texture::internal_format::depth16 || + ifmt == gl::texture::internal_format::depth); } - - fmt::throw_exception("Format comparison for non-rendertargets is not implemented" HERE); } public: diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 23664a126c98..916ce3f108ae 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -241,7 +241,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() (m_attrib_ring_buffer->size() - upload_info.persistent_mapping_offset) : m_max_texbuffer_size; m_persistent_stream_view.update(m_attrib_ring_buffer.get(), upload_info.persistent_mapping_offset, (u32)view_size); - m_gl_persistent_stream_buffer.copy_from(m_persistent_stream_view); + m_gl_persistent_stream_buffer->copy_from(m_persistent_stream_view); upload_info.persistent_mapping_offset = 0; } } @@ -258,7 +258,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() (m_attrib_ring_buffer->size() - upload_info.volatile_mapping_offset) : m_max_texbuffer_size; m_volatile_stream_view.update(m_attrib_ring_buffer.get(), upload_info.volatile_mapping_offset, (u32)view_size); - m_gl_volatile_stream_buffer.copy_from(m_volatile_stream_view); + m_gl_volatile_stream_buffer->copy_from(m_volatile_stream_view); upload_info.volatile_mapping_offset = 0; } } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 39c82d478b85..d643bff0f10e 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -500,7 +500,7 @@ namespace rsx } //Execute backend-local tasks first - do_local_task(ctrl->put.load() == internal_get.load()); + do_local_task(performance_counters.FIFO_is_idle); //Update sub-units zcull_ctrl->update(this); @@ -520,6 +520,18 @@ namespace rsx sync_point_request = false; } + else if (performance_counters.FIFO_is_idle) + { + //Registers not updated, do housekeeping since queue is idle + if (has_deferred_call) + { + flush_command_queue(); + } + else + { + do_internal_task(); + } + } //Now load the FIFO ctrl registers ctrl->get.store(internal_get.load()); @@ -527,30 +539,15 @@ namespace rsx if (put == internal_get || !Emu.IsRunning()) { - if (has_deferred_call) - { - flush_command_queue(); - } - else if (!performance_counters.FIFO_is_idle) + if (!performance_counters.FIFO_is_idle) { performance_counters.FIFO_idle_timestamp = get_system_time(); performance_counters.FIFO_is_idle = true; } - else - { - do_internal_task(); - } continue; } - if (performance_counters.FIFO_is_idle) - { - //Update performance counters with time spent in idle mode - performance_counters.FIFO_is_idle = false; - performance_counters.idle_time += (get_system_time() - performance_counters.FIFO_idle_timestamp); - } - //Validate put and get registers //TODO: Who should handle graphics exceptions?? const u32 get_address = RSXIOMem.RealAddr(internal_get); @@ -580,6 +577,16 @@ namespace rsx if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) { u32 offs = cmd & 0x1ffffffc; + if (offs == internal_get.load()) + { + //Jump to self + if (!performance_counters.FIFO_is_idle) + { + performance_counters.FIFO_idle_timestamp = get_system_time(); + performance_counters.FIFO_is_idle = true; + } + } + //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); internal_get = offs; continue; @@ -587,6 +594,16 @@ namespace rsx if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) { u32 offs = cmd & 0xfffffffc; + if (offs == internal_get.load()) + { + //Jump to self + if (!performance_counters.FIFO_is_idle) + { + performance_counters.FIFO_idle_timestamp = get_system_time(); + performance_counters.FIFO_is_idle = true; + } + } + //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); internal_get = offs; continue; @@ -616,6 +633,12 @@ namespace rsx } if (cmd == 0) //nop { + if (!performance_counters.FIFO_is_idle) + { + performance_counters.FIFO_idle_timestamp = get_system_time(); + performance_counters.FIFO_is_idle = true; + } + internal_get += 4; continue; } @@ -662,6 +685,13 @@ namespace rsx if (internal_get < put && ((internal_get + (count + 1) * 4) > put)) LOG_ERROR(RSX, "Get pointer jumping over put pointer! This is bad!"); + if (performance_counters.FIFO_is_idle) + { + //Update performance counters with time spent in idle mode + performance_counters.FIFO_is_idle = false; + performance_counters.idle_time += (get_system_time() - performance_counters.FIFO_idle_timestamp); + } + for (u32 i = 0; i < count; i++) { u32 reg = ((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD) ? first_cmd : first_cmd + i; @@ -1564,8 +1594,8 @@ namespace rsx { case CELL_GCM_TEXTURE_A8R8G8B8: case CELL_GCM_TEXTURE_D8R8G8B8: - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_A4R4G4B4: //TODO + case CELL_GCM_TEXTURE_R5G6B5: //TODO { u32 remap = tex.remap(); result.redirected_textures |= (1 << i); @@ -1573,8 +1603,9 @@ namespace rsx break; } case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: { const auto compare_mode = (rsx::comparison_function)tex.zfunc(); if (result.textures_alpha_kill[i] == 0 && @@ -1731,8 +1762,9 @@ namespace rsx break; } case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: { const auto compare_mode = (rsx::comparison_function)tex.zfunc(); if (result.textures_alpha_kill[i] == 0 && diff --git a/rpcs3/Emu/RSX/VK/VKFormats.cpp b/rpcs3/Emu/RSX/VK/VKFormats.cpp index c15181c65545..f96193cb8f75 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.cpp +++ b/rpcs3/Emu/RSX/VK/VKFormats.cpp @@ -3,185 +3,369 @@ namespace vk { + gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev) + { + gpu_formats_support result = {}; -gpu_formats_support get_optimal_tiling_supported_formats(VkPhysicalDevice physical_device) -{ - gpu_formats_support result = {}; - - VkFormatProperties props; - vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_D24_UNORM_S8_UINT, &props); + VkFormatProperties props; + vkGetPhysicalDeviceFormatProperties(dev, VK_FORMAT_D24_UNORM_S8_UINT, &props); - result.d24_unorm_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT); + result.d24_unorm_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT); - vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_D32_SFLOAT_S8_UINT, &props); - result.d32_sfloat_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT); + vkGetPhysicalDeviceFormatProperties(dev, VK_FORMAT_D32_SFLOAT_S8_UINT, &props); + result.d32_sfloat_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT); - //Hide d24_s8 if force high precision z buffer is enabled - if (g_cfg.video.force_high_precision_z_buffer && result.d32_sfloat_s8) - result.d24_unorm_s8 = false; + //Hide d24_s8 if force high precision z buffer is enabled + if (g_cfg.video.force_high_precision_z_buffer && result.d32_sfloat_s8) + result.d24_unorm_s8 = false; - return result; -} + return result; + } -VkFormat get_compatible_depth_surface_format(const gpu_formats_support &support, rsx::surface_depth_format format) -{ - switch (format) - { - case rsx::surface_depth_format::z16: return VK_FORMAT_D16_UNORM; - case rsx::surface_depth_format::z24s8: + VkFormat get_compatible_depth_surface_format(const gpu_formats_support &support, rsx::surface_depth_format format) { - if (support.d24_unorm_s8) return VK_FORMAT_D24_UNORM_S8_UINT; - if (support.d32_sfloat_s8) return VK_FORMAT_D32_SFLOAT_S8_UINT; - fmt::throw_exception("No hardware support for z24s8" HERE); + switch (format) + { + case rsx::surface_depth_format::z16: return VK_FORMAT_D16_UNORM; + case rsx::surface_depth_format::z24s8: + { + if (support.d24_unorm_s8) return VK_FORMAT_D24_UNORM_S8_UINT; + if (support.d32_sfloat_s8) return VK_FORMAT_D32_SFLOAT_S8_UINT; + fmt::throw_exception("No hardware support for z24s8" HERE); + } + } + fmt::throw_exception("Invalid format (0x%x)" HERE, (u32)format); } - } - fmt::throw_exception("Invalid format (0x%x)" HERE, (u32)format); -} -std::tuple get_min_filter_and_mip(rsx::texture_minify_filter min_filter) -{ - switch (min_filter) + std::tuple get_min_filter_and_mip(rsx::texture_minify_filter min_filter) { - case rsx::texture_minify_filter::nearest: return std::make_tuple(VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST); - case rsx::texture_minify_filter::linear: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); - case rsx::texture_minify_filter::nearest_nearest: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); - case rsx::texture_minify_filter::linear_nearest: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); - case rsx::texture_minify_filter::nearest_linear: return std::make_tuple(VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_LINEAR); - case rsx::texture_minify_filter::linear_linear: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR); - case rsx::texture_minify_filter::convolution_min: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR); + switch (min_filter) + { + case rsx::texture_minify_filter::nearest: return std::make_tuple(VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST); + case rsx::texture_minify_filter::linear: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); + case rsx::texture_minify_filter::nearest_nearest: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); + case rsx::texture_minify_filter::linear_nearest: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); + case rsx::texture_minify_filter::nearest_linear: return std::make_tuple(VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_LINEAR); + case rsx::texture_minify_filter::linear_linear: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR); + case rsx::texture_minify_filter::convolution_min: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR); + } + fmt::throw_exception("Invalid max filter" HERE); } - fmt::throw_exception("Invalid max filter" HERE); -} -VkFilter get_mag_filter(rsx::texture_magnify_filter mag_filter) -{ - switch (mag_filter) + VkFilter get_mag_filter(rsx::texture_magnify_filter mag_filter) { - case rsx::texture_magnify_filter::nearest: return VK_FILTER_NEAREST; - case rsx::texture_magnify_filter::linear: return VK_FILTER_LINEAR; - case rsx::texture_magnify_filter::convolution_mag: return VK_FILTER_LINEAR; + switch (mag_filter) + { + case rsx::texture_magnify_filter::nearest: return VK_FILTER_NEAREST; + case rsx::texture_magnify_filter::linear: return VK_FILTER_LINEAR; + case rsx::texture_magnify_filter::convolution_mag: return VK_FILTER_LINEAR; + } + fmt::throw_exception("Invalid mag filter (0x%x)" HERE, (u32)mag_filter); } - fmt::throw_exception("Invalid mag filter (0x%x)" HERE, (u32)mag_filter); -} -VkBorderColor get_border_color(u8 color) -{ - // TODO: Handle simulated alpha tests and modify texture operations accordingly - if ((color / 0x10) >= 0x8) - return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - else - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; -} - -VkSamplerAddressMode vk_wrap_mode(rsx::texture_wrap_mode gcm_wrap) -{ - switch (gcm_wrap) + VkBorderColor get_border_color(u32 color) { - case rsx::texture_wrap_mode::wrap: return VK_SAMPLER_ADDRESS_MODE_REPEAT; - case rsx::texture_wrap_mode::mirror: return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; - case rsx::texture_wrap_mode::clamp_to_edge: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - case rsx::texture_wrap_mode::border: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - case rsx::texture_wrap_mode::clamp: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - case rsx::texture_wrap_mode::mirror_once_clamp_to_edge: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; - case rsx::texture_wrap_mode::mirror_once_border: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; - case rsx::texture_wrap_mode::mirror_once_clamp: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + switch (color) + { + case 0x00000000: + { + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + } + case 0xFFFFFFFF: + { + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } + case 0xFF000000: + { + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + } + default: + { + auto color4 = rsx::decode_border_color(color); + if ((color4.r + color4.g + color4.b) > 1.35f) + { + //If color elements are brighter than roughly 0.5 average, use white border + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } + else + { + if (color4.a > 0.5f) + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + else + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + } + } + } } - fmt::throw_exception("unhandled texture clamp mode" HERE); -} -float max_aniso(rsx::texture_max_anisotropy gcm_aniso) -{ - switch (gcm_aniso) + VkSamplerAddressMode vk_wrap_mode(rsx::texture_wrap_mode gcm_wrap) { - case rsx::texture_max_anisotropy::x1: return 1.0f; - case rsx::texture_max_anisotropy::x2: return 2.0f; - case rsx::texture_max_anisotropy::x4: return 4.0f; - case rsx::texture_max_anisotropy::x6: return 6.0f; - case rsx::texture_max_anisotropy::x8: return 8.0f; - case rsx::texture_max_anisotropy::x10: return 10.0f; - case rsx::texture_max_anisotropy::x12: return 12.0f; - case rsx::texture_max_anisotropy::x16: return 16.0f; + switch (gcm_wrap) + { + case rsx::texture_wrap_mode::wrap: return VK_SAMPLER_ADDRESS_MODE_REPEAT; + case rsx::texture_wrap_mode::mirror: return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + case rsx::texture_wrap_mode::clamp_to_edge: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case rsx::texture_wrap_mode::border: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + case rsx::texture_wrap_mode::clamp: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case rsx::texture_wrap_mode::mirror_once_clamp_to_edge: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + case rsx::texture_wrap_mode::mirror_once_border: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + case rsx::texture_wrap_mode::mirror_once_clamp: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + } + fmt::throw_exception("unhandled texture clamp mode" HERE); } - fmt::throw_exception("Texture anisotropy error: bad max aniso (%d)" HERE, (u32)gcm_aniso); -} + float max_aniso(rsx::texture_max_anisotropy gcm_aniso) + { + switch (gcm_aniso) + { + case rsx::texture_max_anisotropy::x1: return 1.0f; + case rsx::texture_max_anisotropy::x2: return 2.0f; + case rsx::texture_max_anisotropy::x4: return 4.0f; + case rsx::texture_max_anisotropy::x6: return 6.0f; + case rsx::texture_max_anisotropy::x8: return 8.0f; + case rsx::texture_max_anisotropy::x10: return 10.0f; + case rsx::texture_max_anisotropy::x12: return 12.0f; + case rsx::texture_max_anisotropy::x16: return 16.0f; + } + fmt::throw_exception("Texture anisotropy error: bad max aniso (%d)" HERE, (u32)gcm_aniso); + } -std::array get_component_mapping(u32 format) -{ - //Component map in ARGB format - std::array mapping = {}; - switch (format) + std::array get_component_mapping(u32 format) { - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_R6G5B5: - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - mapping = { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; break; - - case CELL_GCM_TEXTURE_A4R4G4B4: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; break; - - case CELL_GCM_TEXTURE_G8B8: - mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; - - case CELL_GCM_TEXTURE_B8: - mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; - - case CELL_GCM_TEXTURE_X16: - //Blue component is also R (Mass Effect 3) - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; - - case CELL_GCM_TEXTURE_X32_FLOAT: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; - - case CELL_GCM_TEXTURE_Y16_X16: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; - - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; - - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G }; break; + //Component map in ARGB format + std::array mapping = {}; + + switch (format) + { + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + mapping = { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; break; + + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_A4R4G4B4: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; break; + + case CELL_GCM_TEXTURE_G8B8: + mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_B8: + mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_X16: + //Blue component is also R (Mass Effect 3) + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_X32_FLOAT: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_Y16_X16: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; + + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G }; break; - case CELL_GCM_TEXTURE_D8R8G8B8: - mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; break; + case CELL_GCM_TEXTURE_D8R8G8B8: + mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; break; - case CELL_GCM_TEXTURE_D1R5G5B5: - mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; break; + case CELL_GCM_TEXTURE_D1R5G5B5: + mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; break; - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - mapping = { VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_R }; break; + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + mapping = { VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_R }; break; - case CELL_GCM_TEXTURE_A8R8G8B8: - mapping = { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; break; + case CELL_GCM_TEXTURE_A8R8G8B8: + mapping = { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; break; - default: - fmt::throw_exception("Invalid or unsupported component mapping for texture format (0x%x)" HERE, format); - } + default: + fmt::throw_exception("Invalid or unsupported component mapping for texture format (0x%x)" HERE, format); + } - return mapping; -} + return mapping; + } + VkFormat get_compatible_sampler_format(const gpu_formats_support &support, u32 format) + { + switch (format) + { + case CELL_GCM_TEXTURE_B8: return VK_FORMAT_R8_UNORM; + case CELL_GCM_TEXTURE_A1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_A4R4G4B4: return VK_FORMAT_R4G4B4A4_UNORM_PACK16; + case CELL_GCM_TEXTURE_R5G6B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_A8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return VK_FORMAT_BC2_UNORM_BLOCK; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return VK_FORMAT_BC3_UNORM_BLOCK; + case CELL_GCM_TEXTURE_G8B8: return VK_FORMAT_R8G8_UNORM; + case CELL_GCM_TEXTURE_R6G5B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; // Expand, discard high bit? + case CELL_GCM_TEXTURE_DEPTH24_D8: return support.d24_unorm_s8? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return support.d24_unorm_s8 ? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT; + case CELL_GCM_TEXTURE_DEPTH16: return VK_FORMAT_D16_UNORM; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return VK_FORMAT_D16_UNORM; + case CELL_GCM_TEXTURE_X16: return VK_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_Y16_X16: return VK_FORMAT_R16G16_UNORM; + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return VK_FORMAT_R16G16_SFLOAT; + case CELL_GCM_TEXTURE_R5G5B5A1: return VK_FORMAT_R5G5B5A1_UNORM_PACK16; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return VK_FORMAT_R16G16B16A16_SFLOAT; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return VK_FORMAT_R32G32B32A32_SFLOAT; + case CELL_GCM_TEXTURE_X32_FLOAT: return VK_FORMAT_R32_SFLOAT; + case CELL_GCM_TEXTURE_D1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_D8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; // Expand + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8B8A8_UNORM; // Expand + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return VK_FORMAT_R8G8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return VK_FORMAT_R8G8_SNORM; + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_R8G8_UNORM; // Not right + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8_UNORM; // Not right + } + fmt::throw_exception("Invalid or unsupported sampler format for texture format (0x%x)" HERE, format); + } + + VkFormat get_compatible_srgb_format(VkFormat rgb_format) + { + switch (rgb_format) + { + case VK_FORMAT_B8G8R8A8_UNORM: + return VK_FORMAT_B8G8R8A8_SRGB; + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + return VK_FORMAT_BC1_RGBA_SRGB_BLOCK; + case VK_FORMAT_BC2_UNORM_BLOCK: + return VK_FORMAT_BC2_SRGB_BLOCK; + case VK_FORMAT_BC3_UNORM_BLOCK: + return VK_FORMAT_BC3_SRGB_BLOCK; + default: + return rgb_format; + } + } + + u8 get_format_texel_width(VkFormat format) + { + switch (format) + { + case VK_FORMAT_R8_UNORM: + return 1; + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R16_SFLOAT: + case VK_FORMAT_R16_UNORM: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8_SNORM: + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_R4G4B4A4_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_R5G5B5A1_UNORM_PACK16: + return 2; + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: + case VK_FORMAT_BC2_SRGB_BLOCK: + case VK_FORMAT_BC3_SRGB_BLOCK: + return 4; + case VK_FORMAT_R16G16B16A16_SFLOAT: + return 8; + case VK_FORMAT_R32G32B32A32_SFLOAT: + return 16; + case VK_FORMAT_D16_UNORM: + return 2; + case VK_FORMAT_D32_SFLOAT_S8_UINT: //TODO: Translate to D24S8 + case VK_FORMAT_D24_UNORM_S8_UINT: + return 4; + } + + fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format); + } + + std::pair get_format_element_size(VkFormat format) + { + // Return value is {ELEMENT_SIZE, NUM_ELEMENTS_PER_TEXEL} + // NOTE: Due to endianness issues, coalesced larger types are preferred + // e.g UINT1 to hold 4x1 bytes instead of UBYTE4 to hold 4x1 + + switch (format) + { + //8-bit + case VK_FORMAT_R8_UNORM: + return{ 1, 1 }; + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8_SNORM: + return{ 2, 1 }; //UNSIGNED_SHORT_8_8 + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + return{ 4, 1 }; //UNSIGNED_INT_8_8_8_8 + //16-bit + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R16_SFLOAT: + case VK_FORMAT_R16_UNORM: + return{ 2, 1 }; //UNSIGNED_SHORT and HALF_FLOAT + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16_SFLOAT: + return{ 2, 2 }; //HALF_FLOAT + case VK_FORMAT_R16G16B16A16_SFLOAT: + return{ 2, 4 }; //HALF_FLOAT + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_R4G4B4A4_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_R5G5B5A1_UNORM_PACK16: + return{ 2, 1 }; //UNSIGNED_SHORT_X_Y_Z_W + //32-bit + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32_SFLOAT: + return{ 4, 1 }; //FLOAT + case VK_FORMAT_R32G32B32A32_SFLOAT: + return{ 4, 4 }; //FLOAT + //DXT + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: + case VK_FORMAT_BC2_SRGB_BLOCK: + case VK_FORMAT_BC3_SRGB_BLOCK: + return{ 4, 1 }; + //Depth + case VK_FORMAT_D16_UNORM: + return{ 2, 1 }; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + case VK_FORMAT_D24_UNORM_S8_UINT: + return{ 4, 1 }; + } + + fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format); + } } diff --git a/rpcs3/Emu/RSX/VK/VKFormats.h b/rpcs3/Emu/RSX/VK/VKFormats.h index 63bad8a6106c..18f729ad97ef 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.h +++ b/rpcs3/Emu/RSX/VK/VKFormats.h @@ -4,19 +4,13 @@ namespace vk { - struct gpu_formats_support - { - bool d24_unorm_s8 : 1; - bool d32_sfloat_s8 : 1; - }; + VkBorderColor get_border_color(u32 color); - gpu_formats_support get_optimal_tiling_supported_formats(VkPhysicalDevice physical_device); VkFormat get_compatible_depth_surface_format(const gpu_formats_support &support, rsx::surface_depth_format format); - VkStencilOp get_stencil_op(rsx::stencil_op op); - VkLogicOp get_logic_op(rsx::logic_op op); - VkFrontFace get_front_face_ccw(rsx::front_face ffv); - VkCullModeFlags get_cull_face(u32 cfv); - VkBorderColor get_border_color(u8 color); + VkFormat get_compatible_sampler_format(const gpu_formats_support &support, u32 format); + VkFormat get_compatible_srgb_format(VkFormat rgb_format); + u8 get_format_texel_width(VkFormat format); + std::pair get_format_element_size(VkFormat format); std::tuple get_min_filter_and_mip(rsx::texture_minify_filter min_filter); VkFilter get_mag_filter(rsx::texture_magnify_filter mag_filter); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index d72bdb415c72..5c84b1bc3f1d 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -100,7 +100,10 @@ namespace vk } case rsx::surface_color_format::g8b8: - return std::make_pair(VK_FORMAT_R8G8_UNORM, vk::default_component_map()); + { + VkComponentMapping gb_rg = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; + return std::make_pair(VK_FORMAT_R8G8_UNORM, gb_rg); + } case rsx::surface_color_format::x32: return std::make_pair(VK_FORMAT_R32_SFLOAT, vk::default_component_map()); @@ -541,10 +544,6 @@ VKGSRender::VKGSRender() : GSRender() m_device = (vk::render_device*)(&m_swapchain->get_device()); - m_memory_type_mapping = m_device->get_memory_mapping(); - - m_optimal_tiling_supported_formats = vk::get_optimal_tiling_supported_formats(m_device->gpu()); - vk::set_current_thread_ctx(m_thread_context); vk::set_current_renderer(m_swapchain->get_device()); @@ -570,7 +569,7 @@ VKGSRender::VKGSRender() : GSRender() m_secondary_command_buffer.access_hint = vk::command_buffer::access_type_hint::all; //Precalculated stuff - m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats); + m_render_passes = get_precomputed_render_passes(*m_device, m_device->get_formats_support()); std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device); //Occlusion @@ -589,14 +588,15 @@ VKGSRender::VKGSRender() : GSRender() semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; //VRAM allocation + const auto& memory_map = m_device->get_memory_mapping(); m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000); - m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); + m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer"); - m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); + m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); - m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); + m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); - m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0)); + m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0)); for (auto &ctx : frame_context_storage) { @@ -604,7 +604,7 @@ VKGSRender::VKGSRender() : GSRender() ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast(sizes.size())); } - null_buffer = std::make_unique(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); + null_buffer = std::make_unique(*m_device, 32, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); null_buffer_view = std::make_unique(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32); vk::initialize_compiler_context(); @@ -613,7 +613,7 @@ VKGSRender::VKGSRender() : GSRender() { size_t idx = vk::get_render_pass_location( m_swapchain->get_surface_format(), VK_FORMAT_UNDEFINED, 1); m_text_writer.reset(new vk::text_writer()); - m_text_writer->init(*m_device, m_memory_type_mapping, m_render_passes[idx]); + m_text_writer->init(*m_device, m_render_passes[idx]); } m_depth_converter.reset(new vk::depth_convert_pass()); @@ -622,6 +622,9 @@ VKGSRender::VKGSRender() : GSRender() m_depth_scaler.reset(new vk::depth_scaling_pass()); m_depth_scaler->create(*m_device); + m_attachment_clear_pass.reset(new vk::attachment_clear_pass()); + m_attachment_clear_pass->create(*m_device); + m_prog_buffer.reset(new VKProgramBuffer(m_render_passes.data())); if (g_cfg.video.disable_vertex_cache) @@ -629,7 +632,7 @@ VKGSRender::VKGSRender() : GSRender() else m_vertex_cache.reset(new vk::weak_vertex_cache()); - m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.2")); + m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.25")); open_command_buffer(); @@ -648,11 +651,11 @@ VKGSRender::VKGSRender() : GSRender() m_current_frame = &frame_context_storage[0]; - m_texture_cache.initialize((*m_device), m_memory_type_mapping, m_optimal_tiling_supported_formats, m_swapchain->get_graphics_queue(), + m_texture_cache.initialize((*m_device), m_swapchain->get_graphics_queue(), m_texture_upload_buffer_ring_info); m_ui_renderer.reset(new vk::ui_overlay_renderer()); - m_ui_renderer->create(*m_current_command_buffer, m_memory_type_mapping, m_texture_upload_buffer_ring_info); + m_ui_renderer->create(*m_current_command_buffer, m_texture_upload_buffer_ring_info); supports_multidraw = !g_cfg.video.strict_rendering_mode; supports_native_ui = (bool)g_cfg.misc.use_native_interface; @@ -749,6 +752,10 @@ VKGSRender::~VKGSRender() m_depth_scaler->destroy(); m_depth_scaler.reset(); + //Attachment clear helper + m_attachment_clear_pass->destroy(); + m_attachment_clear_pass.reset(); + //Pipeline descriptors vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr); vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr); @@ -780,7 +787,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) vk::texture_cache::thrashed_set result; { std::lock_guard lock(m_secondary_cb_guard); - result = std::move(m_texture_cache.invalidate_address(address, is_writing, false, m_secondary_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue())); + result = std::move(m_texture_cache.invalidate_address(address, is_writing, false, m_secondary_command_buffer, m_swapchain->get_graphics_queue())); } if (!result.violation_handled) @@ -842,7 +849,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) m_flush_requests.producer_wait(); } - m_texture_cache.flush_all(result, m_secondary_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()); + m_texture_cache.flush_all(result, m_secondary_command_buffer, m_swapchain->get_graphics_queue()); if (has_queue_ref) { @@ -858,7 +865,7 @@ void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) { std::lock_guard lock(m_secondary_cb_guard); if (m_texture_cache.invalidate_range(address_base, size, true, true, false, - m_secondary_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()).violation_handled) + m_secondary_command_buffer, m_swapchain->get_graphics_queue()).violation_handled) { m_texture_cache.purge_dirty(); { @@ -1019,6 +1026,10 @@ void VKGSRender::update_draw_state() //Update depth bounds min/max vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max()); } + else + { + vkCmdSetDepthBounds(*m_current_command_buffer, 0.f, 1.f); + } set_viewport(); @@ -1073,8 +1084,44 @@ void VKGSRender::end() std::chrono::time_point textures_start = vertex_end; - auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); + //Clear any 'dirty' surfaces - possible is a recycled cache surface is used + std::vector buffers_to_clear; + buffers_to_clear.reserve(4); + const auto targets = rsx::utility::get_rtt_indexes(rsx::method_registers.surface_color_target()); + + //Check for memory clears + if (ds && ds->dirty) + { + //Clear this surface before drawing on it + VkClearValue clear_value = {}; + clear_value.depthStencil = { 1.f, 255 }; + buffers_to_clear.push_back({ vk::get_aspect_flags(ds->info.format), 0, clear_value }); + ds->dirty = false; + } + + for (u32 index = 0; index < targets.size(); ++index) + { + if (auto rtt = std::get<1>(m_rtts.m_bound_render_targets[index])) + { + if (rtt->dirty) + { + buffers_to_clear.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, index, {} }); + rtt->dirty = false; + } + } + } + + if (buffers_to_clear.size() > 0) + { + begin_render_pass(); + + VkClearRect rect = { {{0, 0}, {m_draw_fbo->width(), m_draw_fbo->height()}}, 0, 1 }; + vkCmdClearAttachments(*m_current_command_buffer, (u32)buffers_to_clear.size(), + buffers_to_clear.data(), 1, &rect); + + close_render_pass(); + } //Check for data casts if (ds && ds->old_contents) @@ -1086,7 +1133,6 @@ void VKGSRender::end() m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean); ds->old_contents = nullptr; - ds->dirty = false; } else if (!g_cfg.video.strict_rendering_mode) { @@ -1174,7 +1220,8 @@ void VKGSRender::end() *sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); - const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8); + const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 || + texture_format == CELL_GCM_TEXTURE_DEPTH16_FLOAT || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT); VkCompareOp depth_compare_mode = compare_enabled ? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true) : VK_COMPARE_OP_NEVER; bool replace = !fs_sampler_handles[i]; @@ -1389,41 +1436,6 @@ void VKGSRender::end() begin_render_pass(); - //Clear any 'dirty' surfaces - possible is a recycled cache surface is used - std::vector buffers_to_clear; - buffers_to_clear.reserve(4); - const auto targets = rsx::utility::get_rtt_indexes(rsx::method_registers.surface_color_target()); - - if (ds && ds->dirty) - { - //Clear this surface before drawing on it - VkClearValue depth_clear_value; - depth_clear_value.depthStencil.depth = 1.f; - depth_clear_value.depthStencil.stencil = 255; - - VkClearAttachment clear_desc = { ds->attachment_aspect_flag, 0, depth_clear_value }; - buffers_to_clear.push_back(clear_desc); - - ds->dirty = false; - } - - for (int index = 0; index < targets.size(); ++index) - { - if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0 && std::get<1>(m_rtts.m_bound_render_targets[index])->dirty) - { - const u32 real_index = (index == 1 && targets.size() == 1) ? 0 : static_cast(index); - buffers_to_clear.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, real_index, {} }); - - std::get<1>(m_rtts.m_bound_render_targets[index])->dirty = false; - } - } - - if (buffers_to_clear.size() > 0) - { - VkClearRect clear_rect = { 0, 0, m_draw_fbo->width(), m_draw_fbo->height(), 0, 1 }; - vkCmdClearAttachments(*m_current_command_buffer, static_cast(buffers_to_clear.size()), buffers_to_clear.data(), 1, &clear_rect); - } - bool primitive_emulated = false; vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated); @@ -1675,41 +1687,100 @@ void VKGSRender::clear_surface(u32 mask) if (mask & 0x2) { - if (surface_depth_format == rsx::surface_depth_format::z24s8) + if (surface_depth_format == rsx::surface_depth_format::z24s8 && + rsx::method_registers.stencil_mask() != 0) { u8 clear_stencil = rsx::method_registers.stencil_clear_value(); - depth_stencil_clear_values.depthStencil.stencil = clear_stencil; depth_stencil_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; } } - if (mask & 0xF0) + if (auto colormask = (mask & 0xF0)) { if (m_draw_buffers_count > 0) { - u8 clear_a = rsx::method_registers.clear_color_a(); - u8 clear_r = rsx::method_registers.clear_color_r(); - u8 clear_g = rsx::method_registers.clear_color_g(); - u8 clear_b = rsx::method_registers.clear_color_b(); - - color_clear_values.color.float32[0] = (float)clear_r / 255; - color_clear_values.color.float32[1] = (float)clear_g / 255; - color_clear_values.color.float32[2] = (float)clear_b / 255; - color_clear_values.color.float32[3] = (float)clear_a / 255; - - for (u32 index = 0; index < m_draw_buffers_count; ++index) + bool use_fast_clear = false; + bool ignore_clear = false; + switch (rsx::method_registers.surface_color()) { - clear_descriptors.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, index, color_clear_values }); - } + case rsx::surface_color_format::x32: + case rsx::surface_color_format::w16z16y16x16: + case rsx::surface_color_format::w32z32y32x32: + //NOP + ignore_clear = true; + break; + case rsx::surface_color_format::g8b8: + colormask = rsx::get_g8b8_r8g8_colormask(colormask); + use_fast_clear = (colormask == (0x10 | 0x20)); + ignore_clear = (colormask == 0); + colormask |= (0x40 | 0x80); + break; + default: + use_fast_clear = (colormask == (0x10 | 0x20 | 0x40 | 0x80)); + break; + }; - for (auto &rtt : m_rtts.m_bound_render_targets) + if (!ignore_clear) { - if (std::get<0>(rtt) != 0) + u8 clear_a = rsx::method_registers.clear_color_a(); + u8 clear_r = rsx::method_registers.clear_color_r(); + u8 clear_g = rsx::method_registers.clear_color_g(); + u8 clear_b = rsx::method_registers.clear_color_b(); + + color_clear_values.color.float32[0] = (float)clear_r / 255; + color_clear_values.color.float32[1] = (float)clear_g / 255; + color_clear_values.color.float32[2] = (float)clear_b / 255; + color_clear_values.color.float32[3] = (float)clear_a / 255; + + if (use_fast_clear) { - std::get<1>(rtt)->dirty = false; - std::get<1>(rtt)->old_contents = nullptr; + for (u32 index = 0; index < m_draw_buffers_count; ++index) + { + clear_descriptors.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, index, color_clear_values }); + } + } + else + { + color4f clear_color = + { + color_clear_values.color.float32[0], + color_clear_values.color.float32[1], + color_clear_values.color.float32[2], + color_clear_values.color.float32[3] + }; + + const auto fbo_format = vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first; + const auto rp_index = vk::get_render_pass_location(fbo_format, VK_FORMAT_UNDEFINED, 1); + const auto renderpass = m_render_passes[rp_index]; + + m_attachment_clear_pass->update_config(colormask, clear_color); + + for (u32 index = 0; index < m_draw_buffers_count; ++index) + { + if (auto rtt = std::get<1>(m_rtts.m_bound_render_targets[index])) + { + vk::insert_texture_barrier(*m_current_command_buffer, rtt); + m_attachment_clear_pass->run(*m_current_command_buffer, rtt, + region.rect, renderpass, m_framebuffers_to_clean); + } + else + fmt::throw_exception("Unreachable" HERE); + } + + //Fush unconditinally - parameters might not persist + //TODO: Better parameter management for overlay passes + flush_command_queue(); + } + + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (std::get<0>(rtt) != 0) + { + std::get<1>(rtt)->dirty = false; + std::get<1>(rtt)->old_contents = nullptr; + } } } } @@ -1728,12 +1799,9 @@ void VKGSRender::clear_surface(u32 mask) if (clear_descriptors.size() > 0) { - vk::enter_uninterruptible(); begin_render_pass(); vkCmdClearAttachments(*m_current_command_buffer, (u32)clear_descriptors.size(), clear_descriptors.data(), 1, ®ion); - close_render_pass(); - vk::leave_uninterruptible(); } } @@ -1764,7 +1832,7 @@ void VKGSRender::copy_render_targets_to_dma_location() continue; m_texture_cache.flush_memory_to_cache(m_surface_info[index].address, m_surface_info[index].pitch * m_surface_info[index].height, true, - *m_current_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()); + *m_current_command_buffer, m_swapchain->get_graphics_queue()); } } @@ -1773,7 +1841,7 @@ void VKGSRender::copy_render_targets_to_dma_location() if (m_depth_surface_info.pitch) { m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, true, - *m_current_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()); + *m_current_command_buffer, m_swapchain->get_graphics_queue()); } } @@ -1962,6 +2030,7 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) m_text_writer->reset_descriptors(); } + m_attachment_clear_pass->free_resources(); m_depth_converter->free_resources(); m_depth_scaler->free_resources(); m_ui_renderer->free_resources(); @@ -2150,33 +2219,43 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) vk::pipeline_props properties = {}; + // Input assembly bool emulated_primitive_type; - - properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - properties.ia.topology = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, emulated_primitive_type); + properties.state.set_primitive_type(vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, emulated_primitive_type)); const bool restarts_valid = rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed && !emulated_primitive_type && !rsx::method_registers.current_draw_clause.is_disjoint_primitive; if (rsx::method_registers.restart_index_enabled() && !vk::emulate_primitive_restart() && restarts_valid) - properties.ia.primitiveRestartEnable = VK_TRUE; - else - properties.ia.primitiveRestartEnable = VK_FALSE; + properties.state.enable_primitive_restart(); + + // Rasterizer state + properties.state.set_attachment_count(m_draw_buffers_count); + properties.state.set_front_face(vk::get_front_face(rsx::method_registers.front_face_mode())); + properties.state.enable_depth_clamp(rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled()); + properties.state.enable_depth_bias(true); + properties.state.enable_depth_bounds_test(true); - for (int i = 0; i < 4; ++i) + if (rsx::method_registers.depth_test_enabled()) { - properties.att_state[i].colorWriteMask = 0xf; - properties.att_state[i].blendEnable = VK_FALSE; + //NOTE: Like stencil, depth write is meaningless without depth test + properties.state.set_depth_mask(rsx::method_registers.depth_write_enabled()); + properties.state.enable_depth_test(vk::get_compare_func(rsx::method_registers.depth_func())); } - VkColorComponentFlags mask = 0; - if (rsx::method_registers.color_mask_a()) mask |= VK_COLOR_COMPONENT_A_BIT; - if (rsx::method_registers.color_mask_b()) mask |= VK_COLOR_COMPONENT_B_BIT; - if (rsx::method_registers.color_mask_g()) mask |= VK_COLOR_COMPONENT_G_BIT; - if (rsx::method_registers.color_mask_r()) mask |= VK_COLOR_COMPONENT_R_BIT; + if (rsx::method_registers.logic_op_enabled()) + properties.state.enable_logic_op(vk::get_logic_op(rsx::method_registers.logic_operation())); - for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) - { - properties.att_state[idx].colorWriteMask = mask; - } + if (rsx::method_registers.cull_face_enabled()) + properties.state.enable_cull_face(vk::get_cull_face(rsx::method_registers.cull_face_mode())); + + bool color_mask_b = rsx::method_registers.color_mask_b(); + bool color_mask_g = rsx::method_registers.color_mask_g(); + bool color_mask_r = rsx::method_registers.color_mask_r(); + bool color_mask_a = rsx::method_registers.color_mask_a(); + + if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8) + rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); + + properties.state.set_color_mask(color_mask_r, color_mask_g, color_mask_b, color_mask_a); bool mrt_blend_enabled[] = { @@ -2223,84 +2302,42 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) { if (mrt_blend_enabled[idx]) { - properties.att_state[idx].blendEnable = VK_TRUE; - properties.att_state[idx].srcColorBlendFactor = sfactor_rgb; - properties.att_state[idx].dstColorBlendFactor = dfactor_rgb; - properties.att_state[idx].srcAlphaBlendFactor = sfactor_a; - properties.att_state[idx].dstAlphaBlendFactor = dfactor_a; - properties.att_state[idx].colorBlendOp = equation_rgb; - properties.att_state[idx].alphaBlendOp = equation_a; + properties.state.enable_blend(idx, sfactor_rgb, sfactor_a, dfactor_rgb, dfactor_a, equation_rgb, equation_a); } } } - properties.cs.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - properties.cs.attachmentCount = m_draw_buffers_count; - properties.cs.pAttachments = properties.att_state; - - if (rsx::method_registers.logic_op_enabled()) - { - properties.cs.logicOpEnable = true; - properties.cs.logicOp = vk::get_logic_op(rsx::method_registers.logic_operation()); - } - - properties.ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - properties.ds.depthWriteEnable = rsx::method_registers.depth_write_enabled() ? VK_TRUE : VK_FALSE; - - if (rsx::method_registers.depth_bounds_test_enabled()) - { - properties.ds.depthBoundsTestEnable = VK_TRUE; - } - else - properties.ds.depthBoundsTestEnable = VK_FALSE; - if (rsx::method_registers.stencil_test_enabled()) { - properties.ds.stencilTestEnable = VK_TRUE; - properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers.stencil_op_fail()); - properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()); - properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()); - properties.ds.front.compareOp = vk::get_compare_func(rsx::method_registers.stencil_func()); - - if (rsx::method_registers.two_sided_stencil_test_enabled()) + if (!rsx::method_registers.two_sided_stencil_test_enabled()) { - properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail()); - properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass()); - properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail()); - properties.ds.back.compareOp = vk::get_compare_func(rsx::method_registers.back_stencil_func()); + properties.state.enable_stencil_test( + vk::get_stencil_op(rsx::method_registers.stencil_op_fail()), + vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()), + vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()), + vk::get_compare_func(rsx::method_registers.stencil_func()), + 0xFF, 0xFF); //write mask, func_mask, ref are dynamic } else - properties.ds.back = properties.ds.front; - } - else - properties.ds.stencilTestEnable = VK_FALSE; + { + properties.state.enable_stencil_test_separate(0, + vk::get_stencil_op(rsx::method_registers.stencil_op_fail()), + vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()), + vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()), + vk::get_compare_func(rsx::method_registers.stencil_func()), + 0xFF, 0xFF); //write mask, func_mask, ref are dynamic - if (rsx::method_registers.depth_test_enabled()) - { - properties.ds.depthTestEnable = VK_TRUE; - properties.ds.depthCompareOp = vk::get_compare_func(rsx::method_registers.depth_func()); + properties.state.enable_stencil_test_separate(1, + vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail()), + vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail()), + vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass()), + vk::get_compare_func(rsx::method_registers.back_stencil_func()), + 0xFF, 0xFF); //write mask, func_mask, ref are dynamic + } } - else - properties.ds.depthTestEnable = VK_FALSE; - - properties.rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - properties.rs.polygonMode = VK_POLYGON_MODE_FILL; - properties.rs.depthClampEnable = rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled(); - properties.rs.rasterizerDiscardEnable = VK_FALSE; - - //Disabled by setting factors to 0 as needed - properties.rs.depthBiasEnable = VK_TRUE; - - if (rsx::method_registers.cull_face_enabled()) - properties.rs.cullMode = vk::get_cull_face(rsx::method_registers.cull_face_mode()); - else - properties.rs.cullMode = VK_CULL_MODE_NONE; - - properties.rs.frontFace = vk::get_front_face(rsx::method_registers.front_face_mode()); properties.render_pass = m_render_passes[m_current_renderpass_id]; properties.render_pass_location = (int)m_current_renderpass_id; - properties.num_targets = m_draw_buffers_count; vk::enter_uninterruptible(); @@ -2665,7 +2702,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) clip_width, clip_height, target, surface_addresses, zeta_address, - (*m_device), &*m_current_command_buffer, m_optimal_tiling_supported_formats, m_memory_type_mapping); + (*m_device), &*m_current_command_buffer); //Reset framebuffer information VkFormat old_format = VK_FORMAT_UNDEFINED; @@ -2679,7 +2716,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) m_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once); m_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, - *m_current_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()); + *m_current_command_buffer, m_swapchain->get_graphics_queue()); } m_surface_info[i].address = m_surface_info[i].pitch = 0; @@ -2692,10 +2729,10 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer) { - auto old_format = vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, m_depth_surface_info.depth_format); + auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format); m_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once); m_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, - *m_current_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()); + *m_current_command_buffer, m_swapchain->get_graphics_queue()); } m_depth_surface_info.address = m_depth_surface_info.pitch = 0; @@ -2776,7 +2813,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) } } - auto vk_depth_format = (zeta_address == 0) ? VK_FORMAT_UNDEFINED : vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, depth_fmt); + auto vk_depth_format = (zeta_address == 0) ? VK_FORMAT_UNDEFINED : vk::get_compatible_depth_surface_format(m_device->get_formats_support(), depth_fmt); m_current_renderpass_id = vk::get_render_pass_location(vk::get_compatible_surface_format(color_fmt).first, vk_depth_format, m_draw_buffers_count); //Search old framebuffers for this same configuration @@ -3158,7 +3195,7 @@ void VKGSRender::flip(int buffer) if (m_custom_ui) { - m_ui_renderer->run(*m_current_command_buffer, direct_fbo->width(), direct_fbo->height(), direct_fbo.get(), single_target_pass, m_memory_type_mapping, m_texture_upload_buffer_ring_info, *m_custom_ui); + m_ui_renderer->run(*m_current_command_buffer, direct_fbo->width(), direct_fbo->height(), direct_fbo.get(), single_target_pass, m_texture_upload_buffer_ring_info, *m_custom_ui); } if (g_cfg.video.overlay) @@ -3248,7 +3285,7 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst if (result.dst_image) { if (m_texture_cache.flush_if_cache_miss_likely(result.dst_image->info.format, result.real_dst_address, result.real_dst_size, - *m_current_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue())) + *m_current_command_buffer, m_swapchain->get_graphics_queue())) require_flush = true; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index a335e1ad2efb..6f6dfee12a14 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -254,9 +254,6 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control vk::texture_cache m_texture_cache; rsx::vk_render_targets m_rtts; - vk::gpu_formats_support m_optimal_tiling_supported_formats; - vk::memory_type_mapping m_memory_type_mapping; - std::unique_ptr null_buffer; std::unique_ptr null_buffer_view; @@ -264,6 +261,7 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control std::unique_ptr m_depth_converter; std::unique_ptr m_depth_scaler; std::unique_ptr m_ui_renderer; + std::unique_ptr m_attachment_clear_pass; shared_mutex m_sampler_mutex; u64 surface_store_tag = 0; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 84d7aa2a8b4a..07a46e106f2a 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -108,105 +108,6 @@ namespace vk return result; } - VkFormat get_compatible_sampler_format(u32 format) - { - switch (format) - { - case CELL_GCM_TEXTURE_B8: return VK_FORMAT_R8_UNORM; - case CELL_GCM_TEXTURE_A1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; - case CELL_GCM_TEXTURE_A4R4G4B4: return VK_FORMAT_R4G4B4A4_UNORM_PACK16; - case CELL_GCM_TEXTURE_R5G6B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; - case CELL_GCM_TEXTURE_A8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return VK_FORMAT_BC2_UNORM_BLOCK; - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return VK_FORMAT_BC3_UNORM_BLOCK; - case CELL_GCM_TEXTURE_G8B8: return VK_FORMAT_R8G8_UNORM; - case CELL_GCM_TEXTURE_R6G5B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; // Expand, discard high bit? - case CELL_GCM_TEXTURE_DEPTH24_D8: return VK_FORMAT_D24_UNORM_S8_UINT; //TODO - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return VK_FORMAT_D24_UNORM_S8_UINT; //TODO - case CELL_GCM_TEXTURE_DEPTH16: return VK_FORMAT_D16_UNORM; - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return VK_FORMAT_D16_UNORM; - case CELL_GCM_TEXTURE_X16: return VK_FORMAT_R16_UNORM; - case CELL_GCM_TEXTURE_Y16_X16: return VK_FORMAT_R16G16_UNORM; - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return VK_FORMAT_R16G16_SFLOAT; - case CELL_GCM_TEXTURE_R5G5B5A1: return VK_FORMAT_R5G5B5A1_UNORM_PACK16; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return VK_FORMAT_R16G16B16A16_SFLOAT; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return VK_FORMAT_R32G32B32A32_SFLOAT; - case CELL_GCM_TEXTURE_X32_FLOAT: return VK_FORMAT_R32_SFLOAT; - case CELL_GCM_TEXTURE_D1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; - case CELL_GCM_TEXTURE_D8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; // Expand - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8B8A8_UNORM; // Expand - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return VK_FORMAT_R8G8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return VK_FORMAT_R8G8_SNORM; - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_R8G8_UNORM; // Not right - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8_UNORM; // Not right - } - fmt::throw_exception("Invalid or unsupported sampler format for texture format (0x%x)" HERE, format); - } - - VkFormat get_compatible_srgb_format(VkFormat rgb_format) - { - switch (rgb_format) - { - case VK_FORMAT_B8G8R8A8_UNORM: - return VK_FORMAT_B8G8R8A8_SRGB; - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - return VK_FORMAT_BC1_RGBA_SRGB_BLOCK; - case VK_FORMAT_BC2_UNORM_BLOCK: - return VK_FORMAT_BC2_SRGB_BLOCK; - case VK_FORMAT_BC3_UNORM_BLOCK: - return VK_FORMAT_BC3_SRGB_BLOCK; - default: - return rgb_format; - } - } - - u8 get_format_texel_width(const VkFormat format) - { - switch (format) - { - case VK_FORMAT_R8_UNORM: - return 1; - case VK_FORMAT_R16_UINT: - case VK_FORMAT_R16_SFLOAT: - case VK_FORMAT_R16_UNORM: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8G8_SNORM: - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - case VK_FORMAT_R4G4B4A4_UNORM_PACK16: - case VK_FORMAT_R5G6B5_UNORM_PACK16: - case VK_FORMAT_R5G5B5A1_UNORM_PACK16: - return 2; - case VK_FORMAT_R32_UINT: - case VK_FORMAT_R32_SFLOAT: - case VK_FORMAT_R16G16_UNORM: - case VK_FORMAT_R16G16_SFLOAT: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: - case VK_FORMAT_BC2_SRGB_BLOCK: - case VK_FORMAT_BC3_SRGB_BLOCK: - return 4; - case VK_FORMAT_R16G16B16A16_SFLOAT: - return 8; - case VK_FORMAT_R32G32B32A32_SFLOAT: - return 16; - case VK_FORMAT_D16_UNORM: - return 2; - case VK_FORMAT_D32_SFLOAT_S8_UINT: //TODO: Translate to D24S8 - case VK_FORMAT_D24_UNORM_S8_UINT: - return 4; - } - - fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format); - } - VkAllocationCallbacks default_callbacks() { VkAllocationCallbacks callbacks; @@ -449,18 +350,7 @@ namespace vk { if (image->current_layout == new_layout) return; - VkImageAspectFlags flags = VK_IMAGE_ASPECT_COLOR_BIT; - switch (image->info.format) - { - case VK_FORMAT_D16_UNORM: - flags = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - flags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } - + VkImageAspectFlags flags = get_aspect_flags(image->info.format); change_image_layout(cmd, image->value, image->current_layout, new_layout, { flags, 0, 1, 0, 1 }); image->current_layout = new_layout; } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index cecac23e68aa..4c988dcb67e9 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -62,6 +62,7 @@ namespace vk class physical_device; class command_buffer; struct image; + struct vk_data_heap; vk::context *get_current_thread_ctx(); void set_current_thread_ctx(const vk::context &ctx); @@ -79,6 +80,7 @@ namespace vk VkComponentMapping apply_swizzle_remap(const std::array& base_remap, const std::pair, std::array>& remap_vector); VkImageSubresource default_image_subresource(); VkImageSubresourceRange get_image_subresource_range(uint32_t base_layer, uint32_t base_mip, uint32_t layer_count, uint32_t level_count, VkImageAspectFlags aspect); + VkImageAspectFlags get_aspect_flags(VkFormat format); VkSampler null_sampler(); VkImageView null_image_view(vk::command_buffer&); @@ -89,15 +91,22 @@ namespace vk void destroy_global_resources(); + /** + * Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer. + * Then copy all layers into dst_image. + * dst_image must be in TRANSFER_DST_OPTIMAL layout and upload_buffer have TRANSFER_SRC_BIT usage flag. + */ + void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, + const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, + VkImageAspectFlags flags, vk::vk_data_heap &upload_heap); + + //Other texture management helpers void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range); void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range); void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout); void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect); void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect, bool compatible_formats); - VkFormat get_compatible_sampler_format(u32 format); - VkFormat get_compatible_srgb_format(VkFormat rgb_format); - u8 get_format_texel_width(const VkFormat format); std::pair get_compatible_surface_format(rsx::surface_color_format color_format); size_t get_render_pass_location(VkFormat color_surface_format, VkFormat depth_stencil_format, u8 color_surface_count); @@ -105,6 +114,7 @@ namespace vk void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout layout, VkImageSubresourceRange range); void insert_texture_barrier(VkCommandBuffer cmd, vk::image *image); + //Manage 'uininterruptible' state where secondary operations (e.g violation handlers) will have to wait void enter_uninterruptible(); void leave_uninterruptible(); bool is_uninterruptible(); @@ -125,7 +135,14 @@ namespace vk uint32_t device_local; }; + struct gpu_formats_support + { + bool d24_unorm_s8; + bool d32_sfloat_s8; + }; + memory_type_mapping get_memory_mapping(const physical_device& dev); + gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev); class physical_device { @@ -192,6 +209,7 @@ namespace vk { physical_device *pgpu = nullptr; memory_type_mapping memory_map{}; + gpu_formats_support m_formats_support{}; VkDevice dev = VK_NULL_HANDLE; public: @@ -243,7 +261,9 @@ namespace vk device.pEnabledFeatures = &available_features; CHECK_RESULT(vkCreateDevice(*pgpu, &device, nullptr, &dev)); + memory_map = vk::get_memory_mapping(pdev); + m_formats_support = vk::get_optimal_tiling_supported_formats(pdev); } ~render_device() @@ -290,6 +310,11 @@ namespace vk return memory_map; } + const gpu_formats_support& get_formats_support() const + { + return m_formats_support; + } + operator VkDevice&() { return dev; @@ -2098,6 +2123,164 @@ namespace vk } }; + class graphics_pipeline_state + { + public: + VkPipelineInputAssemblyStateCreateInfo ia; + VkPipelineDepthStencilStateCreateInfo ds; + VkPipelineColorBlendAttachmentState att_state[4]; + VkPipelineColorBlendStateCreateInfo cs; + VkPipelineRasterizationStateCreateInfo rs; + + graphics_pipeline_state() + { + ia = { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO }; + cs = { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO }; + ds = { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO }; + rs = { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO }; + + for (int i = 0; i < 4; ++i) + { + att_state[i] = {}; + } + + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.lineWidth = 1.f; + } + + ~graphics_pipeline_state() + {} + + void set_primitive_type(VkPrimitiveTopology type) + { + ia.topology = type; + } + + void enable_primitive_restart(bool enable = true) + { + ia.primitiveRestartEnable = enable? VK_TRUE : VK_FALSE; + } + + void set_color_mask(bool r, bool g, bool b, bool a) + { + VkColorComponentFlags mask = 0; + if (a) mask |= VK_COLOR_COMPONENT_A_BIT; + if (b) mask |= VK_COLOR_COMPONENT_B_BIT; + if (g) mask |= VK_COLOR_COMPONENT_G_BIT; + if (r) mask |= VK_COLOR_COMPONENT_R_BIT; + + att_state[0].colorWriteMask = mask; + att_state[1].colorWriteMask = mask; + att_state[2].colorWriteMask = mask; + att_state[3].colorWriteMask = mask; + } + + void set_depth_mask(bool enable) + { + ds.depthWriteEnable = enable ? VK_TRUE : VK_FALSE; + } + + void set_stencil_mask(u32 mask) + { + ds.front.writeMask = mask; + ds.back.writeMask = mask; + } + + void set_stencil_mask_separate(int face, u32 mask) + { + if (!face) + ds.front.writeMask = mask; + else + ds.back.writeMask = mask; + } + + void enable_depth_test(VkCompareOp op) + { + ds.depthTestEnable = VK_TRUE; + ds.depthCompareOp = op; + } + + void enable_depth_clamp(bool enable = true) + { + rs.depthClampEnable = enable ? VK_TRUE : VK_FALSE; + } + + void enable_depth_bias(bool enable = true) + { + rs.depthBiasEnable = enable ? VK_TRUE : VK_FALSE; + } + + void enable_depth_bounds_test(bool enable = true) + { + ds.depthBoundsTestEnable = enable? VK_TRUE : VK_FALSE; + } + + void enable_blend(int mrt_index, VkBlendFactor src_factor_rgb, VkBlendFactor src_factor_a, + VkBlendFactor dst_factor_rgb, VkBlendFactor dst_factor_a, + VkBlendOp equation_rgb, VkBlendOp equation_a) + { + att_state[mrt_index].srcColorBlendFactor = src_factor_rgb; + att_state[mrt_index].srcAlphaBlendFactor = src_factor_a; + att_state[mrt_index].dstColorBlendFactor = dst_factor_rgb; + att_state[mrt_index].dstAlphaBlendFactor = dst_factor_a; + att_state[mrt_index].colorBlendOp = equation_rgb; + att_state[mrt_index].alphaBlendOp = equation_a; + att_state[mrt_index].blendEnable = VK_TRUE; + } + + void enable_stencil_test(VkStencilOp fail, VkStencilOp zfail, VkStencilOp pass, + VkCompareOp func, u32 func_mask, u32 ref) + { + ds.front.failOp = fail; + ds.front.passOp = pass; + ds.front.depthFailOp = zfail; + ds.front.compareOp = func; + ds.front.compareMask = func_mask; + ds.front.reference = ref; + ds.back = ds.front; + + ds.stencilTestEnable = VK_TRUE; + } + + void enable_stencil_test_separate(int face, VkStencilOp fail, VkStencilOp zfail, VkStencilOp pass, + VkCompareOp func, u32 func_mask, u32 ref) + { + auto& face_props = (face ? ds.back : ds.front); + face_props.failOp = fail; + face_props.passOp = pass; + face_props.depthFailOp = zfail; + face_props.compareOp = func; + face_props.compareMask = func_mask; + face_props.reference = ref; + + ds.stencilTestEnable = VK_TRUE; + } + + void enable_logic_op(VkLogicOp op) + { + cs.logicOpEnable = VK_TRUE; + cs.logicOp = op; + } + + void enable_cull_face(VkCullModeFlags cull_mode) + { + rs.cullMode = cull_mode; + } + + void set_front_face(VkFrontFace face) + { + rs.frontFace = face; + } + + void set_attachment_count(u32 count) + { + cs.attachmentCount = count; + cs.pAttachments = att_state; + } + }; + namespace glsl { enum program_input_type @@ -2176,13 +2359,4 @@ namespace vk heap->unmap(); } }; - - /** - * Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer. - * Then copy all layers into dst_image. - * dst_image must be in TRANSFER_DST_OPTIMAL layout and upload_buffer have TRANSFER_SRC_BIT usage flag. - */ - void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, VkImage dst_image, - const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - VkImageAspectFlags flags, vk::vk_data_heap &upload_heap); } diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 196bd4a433d6..b523e22a413c 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -30,15 +30,7 @@ namespace vk std::string vs_src; std::string fs_src; - struct - { - int color_attachments = 0; - bool write_color = true; - bool write_depth = true; - bool no_depth_test = true; - bool enable_blend = false; - } - renderpass_config; + graphics_pipeline_state renderpass_config; bool initialized = false; bool compiled = false; @@ -46,6 +38,15 @@ namespace vk u32 num_drawable_elements = 4; u32 first_vertex = 0; + overlay_pass() + { + //Override-able defaults + renderpass_config.set_primitive_type(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP); + } + + ~overlay_pass() + {} + void init_descriptors() { VkDescriptorPoolSize descriptor_pool_sizes[2] = @@ -173,51 +174,16 @@ namespace vk ms.pSampleMask = NULL; ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - VkPipelineInputAssemblyStateCreateInfo ia = {}; - ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; - - VkPipelineRasterizationStateCreateInfo rs = {}; - rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rs.lineWidth = 1.f; - rs.polygonMode = VK_POLYGON_MODE_FILL; - - VkPipelineColorBlendAttachmentState att = {}; - if (renderpass_config.write_color) - { - att.colorWriteMask = 0xf; - - if (renderpass_config.enable_blend) - { - att.blendEnable = VK_TRUE; - att.alphaBlendOp = VK_BLEND_OP_ADD; - att.colorBlendOp = VK_BLEND_OP_ADD; - att.dstAlphaBlendFactor = att.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - att.srcAlphaBlendFactor = att.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - } - } - - VkPipelineColorBlendStateCreateInfo cs = {}; - cs.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - cs.attachmentCount = renderpass_config.color_attachments; - cs.pAttachments = &att; - - VkPipelineDepthStencilStateCreateInfo ds = {}; - ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - ds.depthWriteEnable = renderpass_config.write_depth? VK_TRUE: VK_FALSE; - ds.depthTestEnable = VK_TRUE; - ds.depthCompareOp = VK_COMPARE_OP_ALWAYS; - VkPipeline pipeline; VkGraphicsPipelineCreateInfo info = {}; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.pVertexInputState = &vi; - info.pInputAssemblyState = &ia; - info.pRasterizationState = &rs; - info.pColorBlendState = &cs; + info.pInputAssemblyState = &renderpass_config.ia; + info.pRasterizationState = &renderpass_config.rs; + info.pColorBlendState = &renderpass_config.cs; info.pMultisampleState = &ms; info.pViewportState = &vp; - info.pDepthStencilState = &ds; + info.pDepthStencilState = &renderpass_config.ds; info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; @@ -353,19 +319,23 @@ namespace vk vkCmdDraw(cmd, num_drawable_elements, 1, first_vertex, 0); } - void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, VkImageView src, VkRenderPass render_pass) + virtual void set_up_viewport(vk::command_buffer &cmd, u16 max_w, u16 max_h) { - load_program(cmd, render_pass, src); - VkViewport vp{}; - vp.width = (f32)w; - vp.height = (f32)h; + vp.width = (f32)max_w; + vp.height = (f32)max_h; vp.minDepth = 0.f; vp.maxDepth = 1.f; vkCmdSetViewport(cmd, 0, 1, &vp); - VkRect2D vs = { { 0, 0 },{ 0u + w, 0u + h } }; + VkRect2D vs = { { 0, 0 }, { 0u + max_w, 0u + max_h } }; vkCmdSetScissor(cmd, 0, 1, &vs); + } + + void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, VkImageView src, VkRenderPass render_pass) + { + load_program(cmd, render_pass, src); + set_up_viewport(cmd, w, h); VkRenderPassBeginInfo rp_begin = {}; rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; @@ -426,7 +396,8 @@ namespace vk "}\n" }; - renderpass_config.write_color = false; + renderpass_config.set_depth_mask(true); + renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS); m_vertex_shader.id = 100002; m_fragment_shader.id = 100003; @@ -507,16 +478,19 @@ namespace vk "}\n" }; - renderpass_config.color_attachments = 1; - renderpass_config.write_color = true; - renderpass_config.write_depth = false; - renderpass_config.enable_blend = true; + renderpass_config.set_attachment_count(1); + renderpass_config.set_color_mask(true, true, true, true); + renderpass_config.set_depth_mask(false); + renderpass_config.enable_blend(0, + VK_BLEND_FACTOR_SRC_ALPHA, VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + VK_BLEND_OP_ADD, VK_BLEND_OP_ADD); m_vertex_shader.id = 100004; m_fragment_shader.id = 100005; } - vk::image_view* upload_simple_texture(vk::render_device &dev, vk::command_buffer &cmd, vk::memory_type_mapping &memory_types, + vk::image_view* upload_simple_texture(vk::render_device &dev, vk::command_buffer &cmd, vk::vk_data_heap& upload_heap, u64 key, int w, int h, bool font, bool temp, void *pixel_src) { const VkFormat format = (font) ? VK_FORMAT_R8_UNORM : VK_FORMAT_B8G8R8A8_UNORM; @@ -531,7 +505,7 @@ namespace vk const VkImageSubresourceRange range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; - auto tex = std::make_unique(dev, memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + auto tex = std::make_unique(dev, dev.get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, format, std::max(w, 1), std::max(h, 1), 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0); @@ -574,7 +548,7 @@ namespace vk return result; } - void create(vk::command_buffer &cmd, vk::memory_type_mapping &memory_types, vk::vk_data_heap &upload_heap) + void create(vk::command_buffer &cmd, vk::vk_data_heap &upload_heap) { auto& dev = cmd.get_command_pool().get_owner(); overlay_pass::create(dev); @@ -585,7 +559,7 @@ namespace vk u64 storage_key = 1; for (const auto &res : configuration.texture_raw_data) { - upload_simple_texture(dev, cmd, memory_types, upload_heap, storage_key++, res->w, res->h, false, false, res->data); + upload_simple_texture(dev, cmd, upload_heap, storage_key++, res->w, res->h, false, false, res->data); } configuration.free_resources(); @@ -609,7 +583,7 @@ namespace vk temp_view_cache.clear(); } - vk::image_view* find_font(rsx::overlays::font *font, vk::command_buffer &cmd, vk::memory_type_mapping &memory_types, vk::vk_data_heap &upload_heap) + vk::image_view* find_font(rsx::overlays::font *font, vk::command_buffer &cmd, vk::vk_data_heap &upload_heap) { u64 key = (u64)font; auto found = view_cache.find(key); @@ -617,17 +591,17 @@ namespace vk return found->second.get(); //Create font file - return upload_simple_texture(cmd.get_command_pool().get_owner(), cmd, memory_types, upload_heap, key, font->width, font->height, true, false, font->glyph_data.data()); + return upload_simple_texture(cmd.get_command_pool().get_owner(), cmd, upload_heap, key, font->width, font->height, true, false, font->glyph_data.data()); } - vk::image_view* find_temp_image(rsx::overlays::image_info *desc, vk::command_buffer &cmd, vk::memory_type_mapping &memory_types, vk::vk_data_heap &upload_heap) + vk::image_view* find_temp_image(rsx::overlays::image_info *desc, vk::command_buffer &cmd, vk::vk_data_heap &upload_heap) { u64 key = (u64)desc; auto found = temp_view_cache.find(key); if (found != temp_view_cache.end()) return found->second.get(); - return upload_simple_texture(cmd.get_command_pool().get_owner(), cmd, memory_types, upload_heap, key, desc->w, desc->h, false, true, desc->data); + return upload_simple_texture(cmd.get_command_pool().get_owner(), cmd, upload_heap, key, desc->w, desc->h, false, true, desc->data); } void update_uniforms(vk::glsl::program* /*program*/) override @@ -665,7 +639,7 @@ namespace vk } } - void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* target, VkRenderPass render_pass, vk::memory_type_mapping &memory_types, + void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* target, VkRenderPass render_pass, vk::vk_data_heap &upload_heap, rsx::overlays::user_interface &ui) { m_scale_offset = color4f((f32)ui.virtual_width, (f32)ui.virtual_height, 1.f, 1.f); @@ -697,10 +671,10 @@ namespace vk m_skip_texture_read = true; break; case rsx::overlays::image_resource_id::font_file: - src = find_font(command.first.font_ref, cmd, memory_types, upload_heap)->value; + src = find_font(command.first.font_ref, cmd, upload_heap)->value; break; case rsx::overlays::image_resource_id::raw_image: - src = find_temp_image((rsx::overlays::image_info*)command.first.external_data_ref, cmd, memory_types, upload_heap)->value; + src = find_temp_image((rsx::overlays::image_info*)command.first.external_data_ref, cmd, upload_heap)->value; break; default: src = view_cache[command.first.texture_ref]->value; @@ -749,9 +723,118 @@ namespace vk "}\n" }; - renderpass_config.write_color = false; + renderpass_config.set_depth_mask(true); + renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS); + + m_vertex_shader.id = 100006; + m_fragment_shader.id = 100007; + } + }; + + struct attachment_clear_pass : public overlay_pass + { + color4f clear_color = { 0.f, 0.f, 0.f, 0.f }; + color4f colormask = { 1.f, 1.f, 1.f, 1.f }; + VkRect2D region = {}; + + attachment_clear_pass() + { + vs_src = + { + "#version 450\n" + "#extension GL_ARB_separate_shader_objects : enable\n" + "layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n" + "layout(location=0) out vec2 tc0;\n" + "layout(location=1) out vec4 color;\n" + "layout(location=2) out vec4 mask;\n" + "\n" + "void main()\n" + "{\n" + " vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n" + " vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n" + " tc0 = coords[gl_VertexIndex % 4];\n" + " color = regs[0];\n" + " mask = regs[1];\n" + " gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n" + "}\n" + }; + + fs_src = + { + "#version 420\n" + "#extension GL_ARB_separate_shader_objects : enable\n" + "layout(set=0, binding=0) uniform sampler2D fs0;\n" + "layout(location=0) in vec2 tc0;\n" + "layout(location=1) in vec4 color;\n" + "layout(location=2) in vec4 mask;\n" + "layout(location=0) out vec4 out_color;\n" + "\n" + "void main()\n" + "{\n" + " vec4 original_color = texture(fs0, tc0);\n" + " out_color = mix(original_color, color, bvec4(mask));\n" + "}\n" + }; + + renderpass_config.set_depth_mask(false); + renderpass_config.set_color_mask(true, true, true, true); + renderpass_config.set_attachment_count(1); + m_vertex_shader.id = 100006; m_fragment_shader.id = 100007; } + + void update_uniforms(vk::glsl::program* /*program*/) override + { + auto dst = (f32*)m_ubo->map(0, 128); + dst[0] = clear_color.r; + dst[1] = clear_color.g; + dst[2] = clear_color.b; + dst[3] = clear_color.a; + dst[4] = colormask.r; + dst[5] = colormask.g; + dst[6] = colormask.b; + dst[7] = colormask.a; + m_ubo->unmap(); + } + + void set_up_viewport(vk::command_buffer &cmd, u16 max_w, u16 max_h) override + { + VkViewport vp{}; + vp.width = (f32)max_w; + vp.height = (f32)max_h; + vp.minDepth = 0.f; + vp.maxDepth = 1.f; + vkCmdSetViewport(cmd, 0, 1, &vp); + + vkCmdSetScissor(cmd, 0, 1, ®ion); + } + + bool update_config(u32 clearmask, color4f color) + { + color4f mask = { 0.f, 0.f, 0.f, 0.f }; + if (clearmask & 0x10) mask.r = 1.f; + if (clearmask & 0x20) mask.g = 1.f; + if (clearmask & 0x40) mask.b = 1.f; + if (clearmask & 0x80) mask.a = 1.f; + + if (mask != colormask || color != clear_color) + { + colormask = mask; + clear_color = color; + return true; + } + + return false; + } + + void run(vk::command_buffer &cmd, vk::render_target* target, VkRect2D rect, VkRenderPass render_pass, std::list>& framebuffer_resources) + { + region = rect; + + overlay_pass::run(cmd, target->width(), target->height(), target, + target->get_view(0xAAE4, rsx::default_remap_vector)->value, + render_pass, framebuffer_resources); + } }; } diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index 8049084e4b95..7ee0cab0f56f 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -3,45 +3,41 @@ #include "VKFragmentProgram.h" #include "../Common/ProgramStateCache.h" #include "Utilities/hash.h" +#include "VKHelpers.h" namespace vk { struct pipeline_props { - VkPipelineInputAssemblyStateCreateInfo ia; - VkPipelineDepthStencilStateCreateInfo ds; - VkPipelineColorBlendAttachmentState att_state[4]; - VkPipelineColorBlendStateCreateInfo cs; - VkPipelineRasterizationStateCreateInfo rs; - + graphics_pipeline_state state; VkRenderPass render_pass; int num_targets; int render_pass_location; bool operator==(const pipeline_props& other) const { - if (memcmp(&att_state[0], &other.att_state[0], sizeof(VkPipelineColorBlendAttachmentState))) + if (memcmp(&state.att_state[0], &other.state.att_state[0], sizeof(VkPipelineColorBlendAttachmentState))) return false; if (render_pass_location != other.render_pass_location) return false; - if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) + if (memcmp(&state.rs, &other.state.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) return false; //Cannot memcmp cs due to pAttachments being a pointer to memory - if (cs.attachmentCount != other.cs.attachmentCount || - cs.flags != other.cs.flags || - cs.logicOp != other.cs.logicOp || - cs.logicOpEnable != other.cs.logicOpEnable || - cs.sType != other.cs.sType || - memcmp(cs.blendConstants, other.cs.blendConstants, 4 * sizeof(f32))) + if (state.cs.attachmentCount != other.state.cs.attachmentCount || + state.cs.flags != other.state.cs.flags || + state.cs.logicOp != other.state.cs.logicOp || + state.cs.logicOpEnable != other.state.cs.logicOpEnable || + state.cs.sType != other.state.cs.sType || + memcmp(state.cs.blendConstants, other.state.cs.blendConstants, 4 * sizeof(f32))) return false; - if (memcmp(&ia, &other.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo))) + if (memcmp(&state.ia, &other.state.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo))) return false; - if (memcmp(&ds, &other.ds, sizeof(VkPipelineDepthStencilStateCreateInfo))) + if (memcmp(&state.ds, &other.state.ds, sizeof(VkPipelineDepthStencilStateCreateInfo))) return false; if (num_targets != other.num_targets) @@ -58,16 +54,16 @@ namespace rpcs3 size_t hash_struct(const vk::pipeline_props &pipelineProperties) { size_t seed = hash_base(pipelineProperties.num_targets); - seed ^= hash_struct(pipelineProperties.ia); - seed ^= hash_struct(pipelineProperties.ds); - seed ^= hash_struct(pipelineProperties.rs); + seed ^= hash_struct(pipelineProperties.state.ia); + seed ^= hash_struct(pipelineProperties.state.ds); + seed ^= hash_struct(pipelineProperties.state.rs); //Do not compare pointers to memory! - auto tmp = pipelineProperties.cs; + auto tmp = pipelineProperties.state.cs; tmp.pAttachments = nullptr; seed ^= hash_struct(tmp); - seed ^= hash_struct(pipelineProperties.att_state[0]); + seed ^= hash_struct(pipelineProperties.state.att_state[0]); return hash_base(seed); } } @@ -99,17 +95,16 @@ struct VKTraits void validate_pipeline_properties(const VKVertexProgram&, const VKFragmentProgram &fp, vk::pipeline_props& properties) { //Explicitly disable writing to undefined registers - properties.att_state[0].colorWriteMask &= fp.output_color_masks[0]; - properties.att_state[1].colorWriteMask &= fp.output_color_masks[1]; - properties.att_state[2].colorWriteMask &= fp.output_color_masks[2]; - properties.att_state[3].colorWriteMask &= fp.output_color_masks[3]; + properties.state.att_state[0].colorWriteMask &= fp.output_color_masks[0]; + properties.state.att_state[1].colorWriteMask &= fp.output_color_masks[1]; + properties.state.att_state[2].colorWriteMask &= fp.output_color_masks[2]; + properties.state.att_state[3].colorWriteMask &= fp.output_color_masks[3]; } static pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout) { - VkPipelineShaderStageCreateInfo shader_stages[2] = {}; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; @@ -151,12 +146,12 @@ struct VKTraits VkGraphicsPipelineCreateInfo info = {}; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.pVertexInputState = &vi; - info.pInputAssemblyState = &pipelineProperties.ia; - info.pRasterizationState = &pipelineProperties.rs; - info.pColorBlendState = &pipelineProperties.cs; + info.pInputAssemblyState = &pipelineProperties.state.ia; + info.pRasterizationState = &pipelineProperties.state.rs; + info.pColorBlendState = &pipelineProperties.state.cs; info.pMultisampleState = &ms; info.pViewportState = &vp; - info.pDepthStencilState = &pipelineProperties.ds; + info.pDepthStencilState = &pipelineProperties.state.ds; info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; @@ -208,7 +203,7 @@ class VKProgramBuffer : public program_state_cache { //Extract pointers from pipeline props props.render_pass = m_render_pass_data[props.render_pass_location]; - props.cs.pAttachments = props.att_state; + props.state.cs.pAttachments = props.state.att_state; vp.skip_vertex_input_check = true; getGraphicPipelineState(vp, fp, props, std::forward(args)...); } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 0d667f280be4..2d388e538633 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -122,13 +122,13 @@ namespace rsx surface_color_format format, size_t width, size_t height, vk::render_target* old_surface, - vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &, const vk::memory_type_mapping &mem_mapping) + vk::render_device &device, vk::command_buffer *cmd) { auto fmt = vk::get_compatible_surface_format(format); VkFormat requested_format = fmt.first; std::unique_ptr rtt; - rtt.reset(new vk::render_target(device, mem_mapping.device_local, + rtt.reset(new vk::render_target(device, device.get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, requested_format, @@ -138,29 +138,17 @@ namespace rsx VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, 0)); - change_image_layout(*cmd, rtt.get(), VK_IMAGE_LAYOUT_GENERAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); - //Clear new surface - VkClearColorValue clear_color; - VkImageSubresourceRange range = vk::get_image_subresource_range(0,0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); - clear_color.float32[0] = 0.f; - clear_color.float32[1] = 0.f; - clear_color.float32[2] = 0.f; - clear_color.float32[3] = 0.f; - - vkCmdClearColorImage(*cmd, rtt->value, VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range); change_image_layout(*cmd, rtt.get(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); rtt->native_component_map = fmt.second; rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format); rtt->surface_width = (u16)width; rtt->surface_height = (u16)height; + rtt->dirty = true; if (old_surface != nullptr && old_surface->info.format == requested_format) - { rtt->old_contents = old_surface; - rtt->dirty = true; - } return rtt; } @@ -170,9 +158,9 @@ namespace rsx surface_depth_format format, size_t width, size_t height, vk::render_target* old_surface, - vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &support, const vk::memory_type_mapping &mem_mapping) + vk::render_device &device, vk::command_buffer *cmd) { - VkFormat requested_format = vk::get_compatible_depth_surface_format(support, format); + VkFormat requested_format = vk::get_compatible_depth_surface_format(device.get_formats_support(), format); VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT); if (requested_format != VK_FORMAT_D16_UNORM) @@ -181,7 +169,7 @@ namespace rsx const auto scale = rsx::get_resolution_scale(); std::unique_ptr ds; - ds.reset(new vk::render_target(device, mem_mapping.device_local, + ds.reset(new vk::render_target(device, device.get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, requested_format, @@ -193,15 +181,6 @@ namespace rsx 0)); ds->native_component_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; - change_image_layout(*cmd, ds.get(), VK_IMAGE_LAYOUT_GENERAL, range); - - //Clear new surface.. - VkClearDepthStencilValue clear_depth = {}; - - clear_depth.depth = 1.f; - clear_depth.stencil = 255; - - vkCmdClearDepthStencilImage(*cmd, ds->value, VK_IMAGE_LAYOUT_GENERAL, &clear_depth, 1, &range); change_image_layout(*cmd, ds.get(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); ds->native_pitch = (u16)width * 2; @@ -211,12 +190,10 @@ namespace rsx ds->attachment_aspect_flag = range.aspectMask; ds->surface_width = (u16)width; ds->surface_height = (u16)height; + ds->dirty = true; if (old_surface != nullptr && old_surface->info.format == requested_format) - { ds->old_contents = old_surface; - ds->dirty = true; - } return ds; } diff --git a/rpcs3/Emu/RSX/VK/VKTextOut.h b/rpcs3/Emu/RSX/VK/VKTextOut.h index eb8bee4188d7..d40a8ba07208 100644 --- a/rpcs3/Emu/RSX/VK/VKTextOut.h +++ b/rpcs3/Emu/RSX/VK/VKTextOut.h @@ -252,12 +252,12 @@ namespace vk } } - void init(vk::render_device &dev, vk::memory_type_mapping &memory_types, VkRenderPass &render_pass) + void init(vk::render_device &dev, VkRenderPass &render_pass) { //At worst case, 1 char = 16*16*8 bytes (average about 24*8), so ~256K for 128 chars. Allocating 512k for verts //uniform params are 8k in size, allocating for 120 lines (max lines at 4k, one column per row. Can be expanded - m_vertex_buffer.reset( new vk::buffer(dev, 524288, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 0)); - m_uniforms_buffer.reset(new vk::buffer(dev, 983040, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); + m_vertex_buffer.reset( new vk::buffer(dev, 524288, dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 0)); + m_uniforms_buffer.reset(new vk::buffer(dev, 983040, dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); m_render_pass = render_pass; m_uniform_buffer_size = 983040; diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index a83f0c9203e1..a695ed96e491 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -41,6 +41,20 @@ namespace vk return subres; } + VkImageAspectFlags get_aspect_flags(VkFormat format) + { + switch (format) + { + default: + return VK_IMAGE_ASPECT_COLOR_BIT; + case VK_FORMAT_D16_UNORM: + return VK_IMAGE_ASPECT_DEPTH_BIT; + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + } + } + void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect) { VkImageSubresourceLayers a_src = {}, a_dst = {}; @@ -147,22 +161,69 @@ namespace vk change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); } - void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, VkImage dst_image, + void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, VkImageAspectFlags flags, vk::vk_data_heap &upload_heap) { u32 mipmap_level = 0; u32 block_in_pixel = get_format_block_size_in_texel(format); u8 block_size_in_bytes = get_format_block_size_in_bytes(format); + std::vector staging_buffer; + + //TODO: Depth and stencil transfer together + flags &= ~(VK_IMAGE_ASPECT_STENCIL_BIT); + for (const rsx_subresource_layout &layout : subresource_layout) { u32 row_pitch = align(layout.width_in_block * block_size_in_bytes, 256); u32 image_linear_size = row_pitch * layout.height_in_block * layout.depth; - size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size); - void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size); - gsl::span mapped{ (gsl::byte*)mapped_buffer, ::narrow(image_linear_size) }; + //Map with extra padding bytes in case of realignment + size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8); + void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8); + void *dst = mapped_buffer; + + bool use_staging = false; + if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT || + dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT) + { + //Misalign intentionally to skip the first stencil byte in D24S8 data + //Ensures the real depth data is dword aligned + + if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT) + { + //Emulate D24x8 passthrough to D32 format + //Reads from GPU managed memory are slow at best and at worst unreliable + use_staging = true; + staging_buffer.resize(image_linear_size + 8); + dst = staging_buffer.data() + 4 - 1; + } + else + { + //Skip leading dword when writing to texture + offset_in_buffer += 4; + dst = (char*)(mapped_buffer) + 4 - 1; + } + } + + gsl::span mapped{ (gsl::byte*)dst, ::narrow(image_linear_size) }; upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256); + + if (use_staging) + { + if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT) + { + //Map depth component from D24x8 to a f32 depth value + //NOTE: One byte (contains first S8 value) is skipped + rsx::convert_le_d24x8_to_le_f32(mapped_buffer, (char*)dst + 1, image_linear_size >> 2, 1); + } + else //unused + { + //Copy emulated data back to the target buffer + memcpy(mapped_buffer, dst, image_linear_size); + } + } + upload_heap.unmap(); VkBufferImageCopy copy_info = {}; @@ -176,7 +237,7 @@ namespace vk copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count; copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes; - vkCmdCopyBufferToImage(cmd, upload_heap.heap->value, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info); + vkCmdCopyBufferToImage(cmd, upload_heap.heap->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info); mipmap_level++; } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 6207d3602cb1..e01d3756aa24 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -141,7 +141,7 @@ namespace vk return (protection == utils::protection::rw && uploaded_image_view.get() == nullptr && managed_texture.get() == nullptr); } - void copy_texture(bool manage_cb_lifetime, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) + void copy_texture(bool manage_cb_lifetime, vk::command_buffer& cmd, VkQueue submit_queue) { if (m_device == nullptr) { @@ -157,7 +157,8 @@ namespace vk if (dma_buffer.get() == nullptr) { - dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0)); + auto memory_type = m_device->get_memory_mapping().host_visible_coherent; + dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0)); } if (manage_cb_lifetime) @@ -167,18 +168,7 @@ namespace vk const u16 internal_width = (context != rsx::texture_upload_context::framebuffer_storage? width : std::min(width, rsx::apply_resolution_scale(width, true))); const u16 internal_height = (context != rsx::texture_upload_context::framebuffer_storage? height : std::min(height, rsx::apply_resolution_scale(height, true))); - - VkImageAspectFlags aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT; - switch (vram_texture->info.format) - { - case VK_FORMAT_D16_UNORM: - aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } + VkImageAspectFlags aspect_flag = vk::get_aspect_flags(vram_texture->info.format); //TODO: Read back stencil values (is this really necessary?) VkBufferImageCopy copyRegion = {}; @@ -215,13 +205,13 @@ namespace vk } template - void do_memory_transfer(void *pixels_dst, const void *pixels_src) + void do_memory_transfer(void *pixels_dst, const void *pixels_src, u32 channels_count) { if (sizeof(T) == 1) memcpy(pixels_dst, pixels_src, cpu_address_range); else { - const u32 block_size = width * height; + const u32 block_size = width * height * channels_count; if (swapped) { @@ -238,7 +228,7 @@ namespace vk } } - bool flush(vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) + bool flush(vk::command_buffer& cmd, VkQueue submit_queue) { if (flushed) return true; @@ -253,7 +243,7 @@ namespace vk if (!synchronized) { LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); - copy_texture(true, cmd, memory_types, submit_queue); + copy_texture(true, cmd, submit_queue); result = false; } @@ -262,7 +252,9 @@ namespace vk void* pixels_src = dma_buffer->map(0, cpu_address_range); void* pixels_dst = vm::base(cpu_address_base); - const u8 bpp = real_pitch / width; + const auto texel_layout = vk::get_format_element_size(vram_texture->info.format); + const auto elem_size = texel_layout.first; + const auto channel_count = texel_layout.second; //We have to do our own byte swapping since the driver doesnt do it for us if (real_pitch == rsx_pitch) @@ -283,36 +275,24 @@ namespace vk if (!is_depth_format) { - switch (bpp) + switch (elem_size) { default: - LOG_ERROR(RSX, "Invalid bpp %d", bpp); + LOG_ERROR(RSX, "Invalid element width %d", elem_size); case 1: - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); break; case 2: if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); else - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); break; case 4: if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src); - else - do_memory_transfer(pixels_dst, pixels_src); - break; - case 8: - if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src); - else - do_memory_transfer(pixels_dst, pixels_src); - break; - case 16: - if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); else - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); break; } } @@ -334,7 +314,8 @@ namespace vk break; } - rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples_u, samples_v, pack_unpack_swap_bytes); + u16 row_length = u16(width * channel_count); + rsx::scale_image_nearest(pixels_dst, pixels_src, row_length, height, rsx_pitch, real_pitch, elem_size, samples_u, samples_v, pack_unpack_swap_bytes); switch (vram_texture->info.format) { @@ -491,16 +472,14 @@ namespace vk VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::texture_create_flags flags, const texture_channel_remap_t& remap_vector) { - //NOTE: Depth textures should always read RRRR switch (gcm_format) { case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - { + //Dont bother letting this propagate return{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; - } default: break; } @@ -542,41 +521,41 @@ namespace vk vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy) { - VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; + std::unique_ptr image; + std::unique_ptr view; + + VkImageAspectFlags aspect; + VkImageCreateFlags image_flags; + VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format); - switch (source->info.format) + if (source) { - case VK_FORMAT_D16_UNORM: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } + aspect = vk::get_aspect_flags(source->info.format); + if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT || + vk::get_format_texel_width(dst_format) != vk::get_format_texel_width(source->info.format)) + { + //HACK! Should use typeless transfer + dst_format = source->info.format; + } - VkFormat dst_format = vk::get_compatible_sampler_format(gcm_format); - if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT || - vk::get_format_texel_width(dst_format) != vk::get_format_texel_width(source->info.format)) + image_flags = source->info.flags; + } + else { - dst_format = source->info.format; + aspect = vk::get_aspect_flags(dst_format); + image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE)? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; } - VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; - - std::unique_ptr image; - std::unique_ptr view; - image.reset(new vk::image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, image_type, dst_format, w, h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, source->info.flags)); + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags)); //This method is almost exclusively used to work on framebuffer resources //Keep the original swizzle layout unless there is data format conversion - VkComponentMapping view_swizzle = source->native_component_map; - if (dst_format != source->info.format) + VkComponentMapping view_swizzle; + if (!source || dst_format != source->info.format) { //This is a data cast operation //Use native mapping for the new type @@ -584,6 +563,10 @@ namespace vk const auto remap = get_component_mapping(gcm_format); view_swizzle = { remap[1], remap[2], remap[3], remap[0] }; } + else + { + view_swizzle = source->native_component_map; + } if (memcmp(remap_vector.first.data(), rsx::default_remap_vector.first.data(), 4) || memcmp(remap_vector.second.data(), rsx::default_remap_vector.second.data(), 4)) @@ -594,6 +577,7 @@ namespace vk if (copy) { + VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; VkImageLayout old_src_layout = source->current_layout; vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); @@ -638,8 +622,8 @@ namespace vk std::unique_ptr image; std::unique_ptr view; - VkImageAspectFlags dst_aspect; - VkFormat dst_format = vk::get_compatible_sampler_format(gcm_format); + VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format); + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(dst_format); image.reset(new vk::image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, @@ -647,19 +631,6 @@ namespace vk size, size, 1, 1, 6, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)); - switch (gcm_format) - { - case CELL_GCM_TEXTURE_DEPTH16: - dst_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case CELL_GCM_TEXTURE_DEPTH24_D8: - dst_aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - default: - dst_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - break; - } - VkImageSubresourceRange view_range = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 6 }; view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, VK_IMAGE_VIEW_TYPE_CUBE, image->info.format, image->native_component_map, view_range)); @@ -681,21 +652,7 @@ namespace vk { if (section.src) { - VkImageAspectFlags src_aspect; - switch (section.src->info.format) - { - case VK_FORMAT_D16_UNORM: - src_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - src_aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - default: - src_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - break; - } - + VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format); VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; VkImageLayout old_src_layout = section.src->current_layout; vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); @@ -728,28 +685,15 @@ namespace vk std::unique_ptr image; std::unique_ptr view; - VkImageAspectFlags dst_aspect; - VkFormat dst_format = vk::get_compatible_sampler_format(gcm_format); + VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format); + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(dst_format); image.reset(new vk::image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_3D, - vk::get_compatible_sampler_format(gcm_format), + dst_format, width, height, depth, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0)); - switch (gcm_format) - { - case CELL_GCM_TEXTURE_DEPTH16: - dst_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case CELL_GCM_TEXTURE_DEPTH24_D8: - dst_aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - default: - dst_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - break; - } - VkImageSubresourceRange view_range = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 }; view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, VK_IMAGE_VIEW_TYPE_3D, image->info.format, image->native_component_map, view_range)); @@ -771,21 +715,7 @@ namespace vk { if (section.src) { - VkImageAspectFlags src_aspect; - switch (section.src->info.format) - { - case VK_FORMAT_D16_UNORM: - src_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - src_aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - default: - src_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - break; - } - + VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format); VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; VkImageLayout old_src_layout = section.src->current_layout; vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); @@ -815,64 +745,42 @@ namespace vk vk::image_view* generate_atlas_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, const texture_channel_remap_t& remap_vector) override { - auto result = create_temporary_subresource_view_impl(cmd, sections_to_copy.front().src, VK_IMAGE_TYPE_2D, + auto result = create_temporary_subresource_view_impl(cmd, nullptr, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, remap_vector, false); VkImage dst = result->info.image; - VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; - - switch (sections_to_copy.front().src->info.format) - { - case VK_FORMAT_D16_UNORM: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } - - VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; - vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); + VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 }; + vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); for (const auto ®ion : sections_to_copy) { + VkImageAspectFlags src_aspect = vk::get_aspect_flags(region.src->info.format); + VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; VkImageLayout old_src_layout = region.src->current_layout; - vk::change_image_layout(cmd, region.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); + vk::change_image_layout(cmd, region.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); VkImageCopy copy_rgn; copy_rgn.srcOffset = { region.src_x, region.src_y, 0 }; copy_rgn.dstOffset = { region.dst_x, region.dst_y, 0 }; - copy_rgn.dstSubresource = { aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; - copy_rgn.srcSubresource = { aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; + copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; + copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; copy_rgn.extent = { region.w, region.h, 1 }; vkCmdCopyImage(cmd, region.src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_rgn); - vk::change_image_layout(cmd, region.src, old_src_layout, subresource_range); + vk::change_image_layout(cmd, region.src, old_src_layout, src_range); } - vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); + vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); return result; } void update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) override { VkImage dst = dst_view->info.image; - VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; - - switch (src->info.format) - { - case VK_FORMAT_D16_UNORM: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } - + VkImageAspectFlags aspect = vk::get_aspect_flags(src->info.format); VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); @@ -937,18 +845,20 @@ namespace vk switch (gcm_format) { case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: aspect_flags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; usage_flags |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; vk_format = m_formats_support.d24_unorm_s8? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT; break; case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: aspect_flags = VK_IMAGE_ASPECT_DEPTH_BIT; usage_flags |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; vk_format = VK_FORMAT_D16_UNORM; break; default: aspect_flags = VK_IMAGE_ASPECT_COLOR_BIT; - vk_format = get_compatible_sampler_format(gcm_format); + vk_format = get_compatible_sampler_format(m_formats_support, gcm_format); if (colorspace != rsx::texture_colorspace::rgb_linear) vk_format = get_compatible_srgb_format(vk_format); @@ -973,6 +883,7 @@ namespace vk region.create(width, height, section_depth, mipmaps, view, image, 0, true, gcm_format); region.set_dirty(false); region.set_context(context); + region.set_gcm_format(gcm_format); region.set_sampler_status(rsx::texture_sampler_status::status_uninitialized); region.set_image_type(type); @@ -1029,7 +940,7 @@ namespace vk section->set_sampler_status(rsx::texture_sampler_status::status_ready); } - vk::copy_mipmaped_image_using_buffer(cmd, image->value, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask, + vk::copy_mipmaped_image_using_buffer(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask, *m_texture_upload_heap); vk::leave_uninterruptible(); @@ -1136,12 +1047,11 @@ namespace vk public: - void initialize(vk::render_device& device, vk::memory_type_mapping& memory_types, vk::gpu_formats_support& formats_support, - VkQueue submit_queue, vk::vk_data_heap& upload_heap) + void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap) { - m_memory_types = memory_types; - m_formats_support = formats_support; m_device = &device; + m_memory_types = device.get_memory_mapping(); + m_formats_support = device.get_formats_support(); m_submit_queue = submit_queue; m_texture_upload_heap = &upload_heap; } @@ -1214,7 +1124,7 @@ namespace vk template sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts) { - return upload_texture(cmd, tex, m_rtts, cmd, m_memory_types, const_cast(m_submit_queue)); + return upload_texture(cmd, tex, m_rtts, cmd, const_cast(m_submit_queue)); } vk::image *upload_image_simple(vk::command_buffer& /*cmd*/, u32 address, u32 width, u32 height) @@ -1275,7 +1185,7 @@ namespace vk vk::image* deferred_op_src = nullptr; vk::image* deferred_op_dst = nullptr; - void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool /*interpolate*/, bool is_depth) + void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool /*interpolate*/, bool is_depth, const rsx::typeless_xfer& /*typeless*/) { VkImageAspectFlagBits aspect = VK_IMAGE_ASPECT_COLOR_BIT; if (is_depth) aspect = (VkImageAspectFlagBits)(src->info.format == VK_FORMAT_D16_UNORM ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); @@ -1327,7 +1237,7 @@ namespace vk } helper(&cmd); - auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, m_memory_types, const_cast(m_submit_queue)); + auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, const_cast(m_submit_queue)); vk_blit_op_result result = reply.succeeded; result.real_dst_address = reply.real_dst_address; diff --git a/rpcs3/Emu/RSX/overlays.h b/rpcs3/Emu/RSX/overlays.h index 5ed6f66cdcc7..602b9cb3132b 100644 --- a/rpcs3/Emu/RSX/overlays.h +++ b/rpcs3/Emu/RSX/overlays.h @@ -291,6 +291,8 @@ namespace rsx static_cast(m_time_thingy.get())->auto_resize(); m_dim_background->back_color.a = 0.8f; + m_description->back_color.a = 0.f; + m_time_thingy->back_color.a = 0.f; return_code = selection_code::canceled; } diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index a6fddafdc772..00cd01cede84 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -9,47 +9,6 @@ namespace rsx { - struct blit_src_info - { - blit_engine::transfer_source_format format; - blit_engine::transfer_origin origin; - u16 offset_x; - u16 offset_y; - u16 width; - u16 height; - u16 slice_h; - u16 pitch; - void *pixels; - - bool compressed_x; - bool compressed_y; - u32 rsx_address; - }; - - struct blit_dst_info - { - blit_engine::transfer_destination_format format; - u16 offset_x; - u16 offset_y; - u16 width; - u16 height; - u16 pitch; - u16 clip_x; - u16 clip_y; - u16 clip_width; - u16 clip_height; - u16 max_tile_h; - f32 scale_x; - f32 scale_y; - - bool swizzled; - void *pixels; - - bool compressed_x; - bool compressed_y; - u32 rsx_address; - }; - enum protection_policy { protect_policy_one_page, //Only guard one page, preferrably one where this section 'wholly' fits diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index c358c5420baf..92aff462f0e1 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -82,7 +82,7 @@ namespace rsx * N - Sample count */ template - void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v) + void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 element_size, u8 samples_u, u8 samples_v) { u32 dst_offset = 0; u32 src_offset = 0; @@ -112,51 +112,39 @@ namespace rsx } } - void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v) + void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 element_size, u8 samples_u, u8 samples_v) { - switch (pixel_size) + switch (element_size) { case 1: - scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; case 2: - scale_image_fallback_impl((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; case 4: - scale_image_fallback_impl((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); - break; - case 8: - scale_image_fallback_impl((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); - break; - case 16: - scale_image_fallback_impl((u128*)dst, (const u128*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; default: - fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + fmt::throw_exception("unsupported element size %d" HERE, element_size); } } - void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v) + void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 element_size, u8 samples_u, u8 samples_v) { - switch (pixel_size) + switch (element_size) { case 1: - scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; case 2: - scale_image_fallback_impl>((u16*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl>((u16*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; case 4: - scale_image_fallback_impl>((u32*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); - break; - case 8: - scale_image_fallback_impl>((u64*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); - break; - case 16: - scale_image_fallback_impl>((u128*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl>((u32*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; default: - fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + fmt::throw_exception("unsupported element size %d" HERE, element_size); } } @@ -185,9 +173,9 @@ namespace rsx } template - void scale_image_fast(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding) + void scale_image_fast(void *dst, const void *src, u8 element_size, u16 src_width, u16 src_height, u16 padding) { - switch (pixel_size) + switch (element_size) { case 1: scale_image_impl((u8*)dst, (const u8*)src, src_width, src_height, padding); @@ -202,14 +190,14 @@ namespace rsx scale_image_impl((u64*)dst, (const u64*)src, src_width, src_height, padding); break; default: - fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + fmt::throw_exception("unsupported pixel size %d" HERE, element_size); } } template - void scale_image_fast_with_byte_swap(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding) + void scale_image_fast_with_byte_swap(void *dst, const void *src, u8 element_size, u16 src_width, u16 src_height, u16 padding) { - switch (pixel_size) + switch (element_size) { case 1: scale_image_impl((u8*)dst, (const u8*)src, src_width, src_height, padding); @@ -224,17 +212,17 @@ namespace rsx scale_image_impl, N>((u64*)dst, (const be_t*)src, src_width, src_height, padding); break; default: - fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + fmt::throw_exception("unsupported pixel size %d" HERE, element_size); } } - void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v, bool swap_bytes) + void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 element_size, u8 samples_u, u8 samples_v, bool swap_bytes) { //Scale this image by repeating pixel data n times //n = expected_pitch / real_pitch //Use of fixed argument templates for performance reasons - const u16 dst_width = dst_pitch / pixel_size; + const u16 dst_width = dst_pitch / element_size; const u16 padding = dst_width - (src_width * samples_u); if (!swap_bytes) @@ -244,30 +232,30 @@ namespace rsx switch (samples_u) { case 1: - scale_image_fast<1>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<1>(dst, src, element_size, src_width, src_height, padding); break; case 2: - scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<2>(dst, src, element_size, src_width, src_height, padding); break; case 3: - scale_image_fast<3>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<3>(dst, src, element_size, src_width, src_height, padding); break; case 4: - scale_image_fast<4>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<4>(dst, src, element_size, src_width, src_height, padding); break; case 8: - scale_image_fast<8>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<8>(dst, src, element_size, src_width, src_height, padding); break; case 16: - scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<16>(dst, src, element_size, src_width, src_height, padding); break; default: - scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1); + scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, 1); } } else { - scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); } } else @@ -277,30 +265,30 @@ namespace rsx switch (samples_u) { case 1: - scale_image_fast_with_byte_swap<1>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<1>(dst, src, element_size, src_width, src_height, padding); break; case 2: - scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<2>(dst, src, element_size, src_width, src_height, padding); break; case 3: - scale_image_fast_with_byte_swap<3>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<3>(dst, src, element_size, src_width, src_height, padding); break; case 4: - scale_image_fast_with_byte_swap<4>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<4>(dst, src, element_size, src_width, src_height, padding); break; case 8: - scale_image_fast_with_byte_swap<8>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<8>(dst, src, element_size, src_width, src_height, padding); break; case 16: - scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<16>(dst, src, element_size, src_width, src_height, padding); break; default: - scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1); + scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, 1); } } else { - scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); } } } @@ -413,4 +401,26 @@ namespace rsx ++src_ptr; } } + + void convert_le_d24x8_to_le_f32(void *dst, void *src, u32 row_length_in_texels, u32 num_rows) + { + const u32 num_pixels = row_length_in_texels * num_rows; + verify(HERE), (num_pixels & 3) == 0; + + const auto num_iterations = (num_pixels >> 2); + + __m128i* dst_ptr = (__m128i*)dst; + __m128i* src_ptr = (__m128i*)src; + + const __m128 scale_vector = _mm_set1_ps(1.f / 16777214.f); + const __m128i mask = _mm_set1_epi32(0x00FFFFFF); + for (u32 n = 0; n < num_iterations; ++n) + { + const __m128 src_vector = _mm_cvtepi32_ps(_mm_and_si128(mask, _mm_loadu_si128(src_ptr))); + const __m128 normalized_vector = _mm_mul_ps(src_vector, scale_vector); + _mm_stream_si128(dst_ptr, (__m128i&)normalized_vector); + ++dst_ptr; + ++src_ptr; + } + } } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index a34cf5ae54e5..37064e9da98b 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -1,6 +1,7 @@ #pragma once #include "../System.h" +#include "Utilities/geometry.h" #include "gcm_enums.h" #include @@ -60,6 +61,47 @@ namespace rsx f32 gamma = 1.f; //NO GAMMA CORRECTION }; + struct blit_src_info + { + blit_engine::transfer_source_format format; + blit_engine::transfer_origin origin; + u16 offset_x; + u16 offset_y; + u16 width; + u16 height; + u16 slice_h; + u16 pitch; + void *pixels; + + bool compressed_x; + bool compressed_y; + u32 rsx_address; + }; + + struct blit_dst_info + { + blit_engine::transfer_destination_format format; + u16 offset_x; + u16 offset_y; + u16 width; + u16 height; + u16 pitch; + u16 clip_x; + u16 clip_y; + u16 clip_width; + u16 clip_height; + u16 max_tile_h; + f32 scale_x; + f32 scale_y; + + bool swizzled; + void *pixels; + + bool compressed_x; + bool compressed_y; + u32 rsx_address; + }; + static const std::pair, std::array> default_remap_vector = { { CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G, CELL_GCM_TEXTURE_REMAP_FROM_B }, @@ -237,6 +279,7 @@ namespace rsx void convert_le_f32_to_be_d24(void *dst, void *src, u32 row_length_in_texels, u32 num_rows); void convert_le_d24x8_to_be_d24x8(void *dst, void *src, u32 row_length_in_texels, u32 num_rows); + void convert_le_d24x8_to_le_f32(void *dst, void *src, u32 row_length_in_texels, u32 num_rows); void fill_scale_offset_matrix(void *dest_, bool transpose, float offset_x, float offset_y, float offset_z, @@ -411,4 +454,31 @@ namespace rsx return ((u64)index + index_base) & 0x000FFFFF; } + // Convert color write mask for G8B8 to R8G8 + static inline u32 get_g8b8_r8g8_colormask(u32 mask) + { + u32 result = 0; + if (mask & 0x20) result |= 0x20; + if (mask & 0x40) result |= 0x10; + + return result; + } + + static inline void get_g8b8_r8g8_colormask(bool &red, bool &green, bool &blue, bool &alpha) + { + red = blue; + green = green; + blue = false; + alpha = false; + } + + static inline color4f decode_border_color(u32 colorref) + { + color4f result; + result.b = (colorref & 0xFF) / 255.f; + result.g = ((colorref >> 8) & 0xFF) / 255.f; + result.r = ((colorref >> 16) & 0xFF) / 255.f; + result.a = ((colorref >> 24) & 0xFF) / 255.f; + return result; + } }