Skip to content

Commit

Permalink
rsx: Texture cache fixes and improvements
Browse files Browse the repository at this point in the history
rsx: Conditional lock hack removed
vulkan - Fixes
- Remove unused texture class
- Fix native pitch calculation (WCB)
rsx: Catch hanging begin/end pairs when flushing deferred draw calls
vulkan: Register DXT compressed formats
vulkan: Register depth formats
gl: Workaround for 'texture stitching' when gathering flip surface
- TODO: Add a proper flip hack option
rsx: Fix texture memory size calculation
- DXT textures dont have real pitch. Since pitch is used to calculate memory size, make sure it always evaluates to rsx_size
rsx: Fix cpu copy detection
rsx: Validate blit dst surface and dont make assumptions about region blit order
- Also relax restrictions on memory owned by the blit engine if strict rendering is not enabled
rsx: Fix depth texture detection
rsx: Do not manually offset into dst. The overlapped range check does so automatically
rsx: Minor optimizations
rsx: Minor fixes
- Fix to detect incompatible formats when using GPU texture scaling and show message
- Better 'is_depth_texture' algorithm to eliminate false positives
  • Loading branch information
kd-11 committed Sep 21, 2017
1 parent 9108460 commit d6ed2d9
Show file tree
Hide file tree
Showing 13 changed files with 262 additions and 575 deletions.
202 changes: 118 additions & 84 deletions rpcs3/Emu/RSX/Common/texture_cache.h

Large diffs are not rendered by default.

22 changes: 10 additions & 12 deletions rpcs3/Emu/RSX/GL/GLGSRender.cpp
Expand Up @@ -939,7 +939,7 @@ bool GLGSRender::check_program_state()
{
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);

if (!surface && m_gl_texture_cache.is_depth_texture(texaddr))
if (!surface && m_gl_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex)))
return std::make_tuple(true, 0);
}

Expand Down Expand Up @@ -1094,24 +1094,22 @@ void GLGSRender::flip(int buffer)
m_flip_fbo.recreate();
m_flip_fbo.bind();

//The render might have been done offscreen and a blit used to display
//Check the texture cache for a blitted copy
const u32 size = buffer_pitch * buffer_height;
auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address);

if (surface != nullptr)
{
m_flip_fbo.color = surface->get_raw_view();
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
else if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();

m_flip_fbo.color = *render_target_texture;
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
m_flip_fbo.color = surface->get_raw_view();
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
else
{
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
Expand Down Expand Up @@ -1248,7 +1246,7 @@ void GLGSRender::do_local_task()
//Check if the suggested section is valid
if (!q.section_to_flush->is_flushed())
{
q.section_to_flush->flush();
m_gl_texture_cache.flush_address(q.address_to_flush);
q.result = true;
}
else
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/RSX/GL/GLTexture.cpp
Expand Up @@ -19,7 +19,7 @@ namespace gl
case CELL_GCM_TEXTURE_A8R8G8B8: return GL_RGBA8;
case CELL_GCM_TEXTURE_G8B8: return GL_RG8;
case CELL_GCM_TEXTURE_R6G5B5: return GL_RGB565;
case CELL_GCM_TEXTURE_DEPTH24_D8: return GL_DEPTH_COMPONENT24;
case CELL_GCM_TEXTURE_DEPTH24_D8: return GL_DEPTH24_STENCIL8;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return GL_DEPTH_COMPONENT24;
case CELL_GCM_TEXTURE_DEPTH16: return GL_DEPTH_COMPONENT16;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return GL_DEPTH_COMPONENT16;
Expand Down Expand Up @@ -442,7 +442,7 @@ namespace gl
}
}

void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type,
void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type,
std::vector<rsx_subresource_layout>& subresources_layout, std::pair<std::array<u8, 4>, std::array<u8, 4>>& decoded_remap, bool static_state)
{
const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap;
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/RSX/GL/GLTexture.h
Expand Up @@ -25,7 +25,7 @@ namespace gl
* - second vector contains overrides to force the value to either 0 or 1 instead of reading from texture
* static_state - set up the texture without consideration for sampler state (useful for vertex textures which have no real sampler state on RSX)
*/
void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type,
void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type,
std::vector<rsx_subresource_layout>& subresources_layout, std::pair<std::array<u8, 4>, std::array<u8, 4>>& decoded_remap, bool static_state);

class sampler_state
Expand Down
32 changes: 26 additions & 6 deletions rpcs3/Emu/RSX/GL/GLTextureCache.h
Expand Up @@ -476,6 +476,19 @@ namespace gl
{
u32 dst_id = 0;

GLenum ifmt;
glBindTexture(GL_TEXTURE_2D, src_id);
glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&ifmt);

switch (ifmt)
{
case GL_DEPTH_COMPONENT16:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH24_STENCIL8:
sized_internal_fmt = ifmt;
break;
}

glGenTextures(1, &dst_id);
glBindTexture(GL_TEXTURE_2D, dst_id);

Expand Down Expand Up @@ -552,12 +565,15 @@ namespace gl
}

auto& cached = create_texture(vram_texture, rsx_address, rsx_size, width, height);
cached.protect(utils::protection::ro);
cached.set_dirty(false);
cached.set_depth_flag(depth_flag);
cached.set_view_flags(flags);
cached.set_context(context);

//Its not necessary to lock blit dst textures as they are just reused as necessary
if (context != rsx::texture_upload_context::blit_engine_dst || g_cfg.video.strict_rendering_mode)
cached.protect(utils::protection::ro);

return &cached;
}

Expand All @@ -572,7 +588,7 @@ namespace gl
//Swizzling is ignored for blit engine copy and emulated using remapping
bool input_swizzled = (context == rsx::texture_upload_context::blit_engine_src)? false : swizzled;

gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, pitch, input_swizzled, type, subresource_layout, remap_vector, false);
gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, input_swizzled, type, subresource_layout, remap_vector, false);
return section;
}

Expand Down Expand Up @@ -620,11 +636,11 @@ namespace gl
m_hw_blitter.destroy();
}

bool is_depth_texture(const u32 rsx_address) override
bool is_depth_texture(const u32 rsx_address, const u32 rsx_size) override
{
reader_lock lock(m_cache_mutex);

/* auto found = m_cache.find(rsx_address);
auto found = m_cache.find(get_block_address(rsx_address));
if (found == m_cache.end())
return false;

Expand All @@ -636,8 +652,12 @@ namespace gl
if (tex.is_dirty())
continue;

return tex.is_depth_texture();
}*/
if (!tex.overlaps(rsx_address, true))
continue;

if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
return tex.is_depth_texture();
}

return false;
}
Expand Down
13 changes: 11 additions & 2 deletions rpcs3/Emu/RSX/RSXThread.cpp
Expand Up @@ -394,6 +394,7 @@ namespace rsx
// Deferred calls are used to batch draws together
u32 deferred_primitive_type = 0;
u32 deferred_call_size = 0;
s32 deferred_begin_end = 0;
std::vector<u32> deferred_stack;
bool has_deferred_call = false;

Expand Down Expand Up @@ -453,6 +454,10 @@ namespace rsx
if (emit_end)
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);

if (deferred_begin_end > 0) //Hanging draw call (useful for immediate rendering where the begin call needs to be noted)
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type);

deferred_begin_end = 0;
deferred_primitive_type = 0;
deferred_call_size = 0;
has_deferred_call = false;
Expand Down Expand Up @@ -569,6 +574,11 @@ namespace rsx
case NV4097_SET_BEGIN_END:
{
// Hook; Allows begin to go through, but ignores end
if (value)
deferred_begin_end++;
else
deferred_begin_end--;

if (value && value != deferred_primitive_type)
deferred_primitive_type = value;
else
Expand Down Expand Up @@ -1065,8 +1075,7 @@ namespace rsx
current_vertex_program.skip_vertex_input_check = false;

current_vertex_program.rsx_vertex_inputs.resize(0);
current_vertex_program.data.resize(512 * 4);
current_vertex_program.rsx_vertex_inputs.reserve(rsx::limits::vertex_count);
current_vertex_program.data.resize((512 - transform_program_start) * 4);

u32* ucode_src = rsx::method_registers.transform_program.data() + (transform_program_start * 4);
u32* ucode_dst = current_vertex_program.data.data();
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/RSX/VK/VKGSRender.cpp
Expand Up @@ -1696,7 +1696,7 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)

bool VKGSRender::check_program_status()
{
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple<bool, u16>
{
vk::render_target *surface = nullptr;

Expand All @@ -1706,7 +1706,7 @@ bool VKGSRender::check_program_status()
{
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);

if (!surface && m_texture_cache.is_depth_texture(texaddr))
if (!surface && m_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex)))
return std::make_tuple(true, 0);
}

Expand Down
63 changes: 55 additions & 8 deletions rpcs3/Emu/RSX/VK/VKHelpers.cpp
Expand Up @@ -6,10 +6,10 @@ namespace vk
context* g_current_vulkan_ctx = nullptr;
render_device g_current_renderer;

texture g_null_texture;
std::unique_ptr<image> g_null_texture;
std::unique_ptr<image_view> g_null_image_view;

VkSampler g_null_sampler = nullptr;
VkImageView g_null_image_view = nullptr;

bool g_cb_no_interrupt_flag = false;

Expand Down Expand Up @@ -131,6 +131,47 @@ namespace vk
fmt::throw_exception("Invalid or unsupported sampler format for texture format (0x%x)" HERE, format);
}

u8 get_format_texel_width(const VkFormat format)
{
switch (format)
{
case VK_FORMAT_R8_UNORM:
return 1;
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16_UNORM:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8_SNORM:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
return 2;
case VK_FORMAT_R32_UINT:
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
return 4;
case VK_FORMAT_R16G16B16A16_SFLOAT:
return 8;
case VK_FORMAT_R32G32B32A32_SFLOAT:
return 16;
case VK_FORMAT_D16_UNORM:
return 2;
case VK_FORMAT_D32_SFLOAT_S8_UINT: //TODO: Translate to D24S8
case VK_FORMAT_D24_UNORM_S8_UINT:
return 4;
}

fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format);
}

VkAllocationCallbacks default_callbacks()
{
VkAllocationCallbacks callbacks;
Expand Down Expand Up @@ -170,22 +211,28 @@ namespace vk
VkImageView null_image_view()
{
if (g_null_image_view)
return g_null_image_view;
return g_null_image_view->value;

g_null_texture.reset(new image(g_current_renderer, get_memory_mapping(g_current_renderer.gpu()).device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D, VK_FORMAT_B8G8R8A8_UNORM, 4, 4, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT, 0));

g_null_image_view.reset(new image_view(g_current_renderer, g_null_texture->value, VK_IMAGE_VIEW_TYPE_2D,
VK_FORMAT_B8G8R8A8_UNORM, {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A},
{VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}));

g_null_texture.create(g_current_renderer, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_USAGE_SAMPLED_BIT, 4, 4);
g_null_image_view = g_null_texture;
return g_null_image_view;
return g_null_image_view->value;
}

void destroy_global_resources()
{
g_null_texture.destroy();
g_null_texture.reset();
g_null_image_view .reset();

if (g_null_sampler)
vkDestroySampler(g_current_renderer, g_null_sampler, nullptr);

g_null_sampler = nullptr;
g_null_image_view = nullptr;
}

void set_current_thread_ctx(const vk::context &ctx)
Expand Down
57 changes: 1 addition & 56 deletions rpcs3/Emu/RSX/VK/VKHelpers.h
Expand Up @@ -81,6 +81,7 @@ namespace vk
void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect);

VkFormat get_compatible_sampler_format(u32 format);
u8 get_format_texel_width(const VkFormat format);
std::pair<VkFormat, VkComponentMapping> get_compatible_surface_format(rsx::surface_color_format color_format);
size_t get_render_pass_location(VkFormat color_surface_format, VkFormat depth_stencil_format, u8 color_surface_count);

Expand Down Expand Up @@ -475,57 +476,6 @@ namespace vk
VkDevice m_device;
};

class texture
{
VkImageView m_view = nullptr;
VkImage m_image_contents = nullptr;
VkMemoryRequirements m_memory_layout;
VkFormat m_internal_format;
VkImageUsageFlags m_flags;
VkImageAspectFlagBits m_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED;
VkImageViewType m_view_type = VK_IMAGE_VIEW_TYPE_2D;
VkImageUsageFlags m_usage = VK_IMAGE_USAGE_SAMPLED_BIT;
VkImageTiling m_tiling = VK_IMAGE_TILING_LINEAR;

vk::memory_block_deprecated vram_allocation;
vk::render_device *owner = nullptr;

u32 m_width;
u32 m_height;
u32 m_mipmaps;

vk::texture *staging_texture = nullptr;
bool ready = false;

public:
texture(vk::swap_chain_image &img);
texture() {}
~texture() {}

void create(vk::render_device &device, VkFormat format, VkImageType image_type, VkImageViewType view_type, VkImageCreateFlags image_flags, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle);
void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, VkImageTiling tiling, u32 width, u32 height, u32 mipmaps, bool gpu_only, VkComponentMapping swizzle);
void create(vk::render_device &device, VkFormat format, VkImageUsageFlags usage, u32 width, u32 height, u32 mipmaps = 1, bool gpu_only = false, VkComponentMapping swizzle = default_component_map());
void destroy();

void init(rsx::fragment_texture &tex, vk::command_buffer &cmd, bool ignore_checks = false);
void flush(vk::command_buffer & cmd);

//Fill with debug color 0xFF
void init_debug();

void change_layout(vk::command_buffer &cmd, VkImageLayout new_layout);
VkImageLayout get_layout();

const u32 width();
const u32 height();
const u16 mipmaps();
const VkFormat get_format();

operator VkImageView();
operator VkImage();
};

struct buffer
{
VkBuffer value;
Expand Down Expand Up @@ -779,11 +729,6 @@ namespace vk
{
return view;
}

operator vk::texture()
{
return vk::texture(*this);
}
};

class swap_chain
Expand Down

0 comments on commit d6ed2d9

Please sign in to comment.