Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rsx/vk: Fix some spec violations #10900

Merged
merged 6 commits into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 4 additions & 3 deletions rpcs3/Emu/RSX/Common/TextureUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ namespace

for (unsigned layer = 0; layer < layer_count; layer++)
{
u16 miplevel_width_in_texel = width_in_texel, miplevel_height_in_texel = height_in_texel;
u16 miplevel_width_in_texel = width_in_texel, miplevel_height_in_texel = height_in_texel, miplevel_depth = depth;
for (unsigned mip_level = 0; mip_level < mipmap_count; mip_level++)
{
result.push_back({});
Expand All @@ -446,7 +446,7 @@ namespace
current_subresource_layout.height_in_texel = miplevel_height_in_texel;
current_subresource_layout.level = mip_level;
current_subresource_layout.layer = layer;
current_subresource_layout.depth = depth;
current_subresource_layout.depth = miplevel_depth;
current_subresource_layout.border = border_size;

if constexpr (block_edge_in_texel == 1)
Expand Down Expand Up @@ -482,13 +482,14 @@ namespace
full_height_in_block = rsx::next_pow2(current_subresource_layout.height_in_block + border_size + border_size);
}

const u32 slice_sz = src_pitch_in_block * block_size_in_bytes * full_height_in_block * depth;
const u32 slice_sz = src_pitch_in_block * block_size_in_bytes * full_height_in_block * miplevel_depth;
current_subresource_layout.pitch_in_block = src_pitch_in_block;
current_subresource_layout.data = std::span<const std::byte>(texture_data_pointer + offset_in_src, slice_sz);

offset_in_src += slice_sz;
miplevel_width_in_texel = std::max(miplevel_width_in_texel / 2, 1);
miplevel_height_in_texel = std::max(miplevel_height_in_texel / 2, 1);
miplevel_depth = std::max(miplevel_depth / 2, 1);
}

offset_in_src = utils::align(offset_in_src, 128);
Expand Down
21 changes: 11 additions & 10 deletions rpcs3/Emu/RSX/GL/GLGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,14 +496,14 @@ void GLGSRender::clear_surface(u32 arg)
if (skip_current_frame) return;

// If stencil write mask is disabled, remove clear_stencil bit
if (!rsx::method_registers.stencil_mask()) arg &= ~0x2u;
if (!rsx::method_registers.stencil_mask()) arg &= ~RSX_GCM_CLEAR_STENCIL_BIT;

// Ignore invalid clear flags
if ((arg & 0xf3) == 0) return;
if ((arg & RSX_GCM_CLEAR_ANY_MASK) == 0) return;

u8 ctx = rsx::framebuffer_creation_context::context_draw;
if (arg & 0xF0) ctx |= rsx::framebuffer_creation_context::context_clear_color;
if (arg & 0x3) ctx |= rsx::framebuffer_creation_context::context_clear_depth;
if (arg & RSX_GCM_CLEAR_COLOR_MASK) ctx |= rsx::framebuffer_creation_context::context_clear_color;
if (arg & RSX_GCM_CLEAR_DEPTH_STENCIL_MASK) ctx |= rsx::framebuffer_creation_context::context_clear_depth;

init_buffers(static_cast<rsx::framebuffer_creation_context>(ctx), true);

Expand All @@ -521,9 +521,9 @@ void GLGSRender::clear_surface(u32 arg)
bool update_color = false, update_z = false;
rsx::surface_depth_format2 surface_depth_format = rsx::method_registers.surface_depth_fmt();

if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); arg & 0x3)
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); arg & RSX_GCM_CLEAR_DEPTH_STENCIL_MASK)
{
if (arg & 0x1)
if (arg & RSX_GCM_CLEAR_DEPTH_BIT)
{
u32 max_depth_value = get_max_depth_value(surface_depth_format);
u32 clear_depth = rsx::method_registers.z_clear_value(is_depth_stencil_format(surface_depth_format));
Expand All @@ -535,7 +535,7 @@ void GLGSRender::clear_surface(u32 arg)

if (is_depth_stencil_format(surface_depth_format))
{
if (arg & 0x2)
if (arg & RSX_GCM_CLEAR_STENCIL_BIT)
{
u8 clear_stencil = rsx::method_registers.stencil_clear_value();

Expand All @@ -544,22 +544,23 @@ void GLGSRender::clear_surface(u32 arg)
mask |= GLenum(gl::buffers::stencil);
}

if ((arg & 0x3) != 0x3 || !full_frame)
if (const auto ds_mask = (arg & RSX_GCM_CLEAR_DEPTH_STENCIL_MASK);
ds_mask != RSX_GCM_CLEAR_DEPTH_STENCIL_MASK || !full_frame)
{
ensure(mask);

if (ds->state_flags & rsx::surface_state_flags::erase_bkgnd && // Needs initialization
ds->old_contents.empty() && !g_cfg.video.read_depth_buffer) // No way to load data from memory, so no initialization given
{
// Only one aspect was cleared. Make sure to memory initialize the other before removing dirty flag
if (arg == 1)
if (ds_mask == RSX_GCM_CLEAR_DEPTH_BIT)
{
// Depth was cleared, initialize stencil
gl_state.stencil_mask(0xFF);
gl_state.clear_stencil(0xFF);
mask |= GLenum(gl::buffers::stencil);
}
else
else if (ds_mask == RSX_GCM_CLEAR_STENCIL_BIT)
{
// Stencil was cleared, initialize depth
gl_state.depth_mask(GL_TRUE);
Expand Down
3 changes: 2 additions & 1 deletion rpcs3/Emu/RSX/RSXThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1894,6 +1894,8 @@ namespace rsx
if (!(textures_ref & 1)) continue;

auto &tex = rsx::method_registers.fragment_textures[i];
current_fp_texture_state.clear(i);

if (tex.enabled())
{
current_fragment_program.texture_params[i].scale[0] = sampler_descriptors[i]->scale_x;
Expand All @@ -1905,7 +1907,6 @@ namespace rsx
m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty;

u32 texture_control = 0;
current_fp_texture_state.clear(i);
current_fp_texture_state.set_dimension(sampler_descriptors[i]->image_type, i);

if (tex.alpha_kill_enabled())
Expand Down
25 changes: 13 additions & 12 deletions rpcs3/Emu/RSX/VK/VKGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1197,14 +1197,14 @@ void VKGSRender::clear_surface(u32 mask)
if (skip_current_frame || swapchain_unavailable) return;

// If stencil write mask is disabled, remove clear_stencil bit
if (!rsx::method_registers.stencil_mask()) mask &= ~0x2u;
if (!rsx::method_registers.stencil_mask()) mask &= ~RSX_GCM_CLEAR_STENCIL_BIT;

// Ignore invalid clear flags
if (!(mask & 0xF3)) return;
if (!(mask & RSX_GCM_CLEAR_ANY_MASK)) return;

u8 ctx = rsx::framebuffer_creation_context::context_draw;
if (mask & 0xF0) ctx |= rsx::framebuffer_creation_context::context_clear_color;
if (mask & 0x3) ctx |= rsx::framebuffer_creation_context::context_clear_depth;
if (mask & RSX_GCM_CLEAR_COLOR_MASK) ctx |= rsx::framebuffer_creation_context::context_clear_color;
if (mask & RSX_GCM_CLEAR_DEPTH_STENCIL_MASK) ctx |= rsx::framebuffer_creation_context::context_clear_depth;
init_buffers(rsx::framebuffer_creation_context{ctx});

if (!framebuffer_status_valid) return;
Expand Down Expand Up @@ -1232,9 +1232,9 @@ void VKGSRender::clear_surface(u32 mask)
bool update_color = false, update_z = false;
auto surface_depth_format = rsx::method_registers.surface_depth_fmt();

if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); mask & 0x3)
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); mask & RSX_GCM_CLEAR_DEPTH_STENCIL_MASK)
{
if (mask & 0x1)
if (mask & RSX_GCM_CLEAR_DEPTH_BIT)
{
u32 max_depth_value = get_max_depth_value(surface_depth_format);

Expand All @@ -1249,7 +1249,7 @@ void VKGSRender::clear_surface(u32 mask)

if (is_depth_stencil_format(surface_depth_format))
{
if (mask & 0x2)
if (mask & RSX_GCM_CLEAR_STENCIL_BIT)
{
u8 clear_stencil = rsx::method_registers.stencil_clear_value();
depth_stencil_clear_values.depthStencil.stencil = clear_stencil;
Expand All @@ -1273,13 +1273,14 @@ void VKGSRender::clear_surface(u32 mask)
ds->old_contents.empty() && !g_cfg.video.read_depth_buffer) // No way to load data from memory, so no initialization given
{
// Only one aspect was cleared. Make sure to memory initialize the other before removing dirty flag
if (mask == 1)
const auto ds_mask = (mask & RSX_GCM_CLEAR_DEPTH_STENCIL_MASK);
if (ds_mask == RSX_GCM_CLEAR_DEPTH_BIT && (ds->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT))
{
// Depth was cleared, initialize stencil
depth_stencil_clear_values.depthStencil.stencil = 0xFF;
depth_stencil_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
else
else if (ds_mask == RSX_GCM_CLEAR_STENCIL_BIT)
{
// Stencil was cleared, initialize depth
depth_stencil_clear_values.depthStencil.depth = 1.f;
Expand All @@ -1294,7 +1295,7 @@ void VKGSRender::clear_surface(u32 mask)
}
}

if (auto colormask = (mask & 0xF0))
if (auto colormask = (mask & RSX_GCM_CLEAR_COLOR_MASK))
{
if (!m_draw_buffers.empty())
{
Expand All @@ -1318,7 +1319,7 @@ void VKGSRender::clear_surface(u32 mask)
{
rsx::get_g8b8_clear_color(clear_r, clear_g, clear_b, clear_a);
colormask = rsx::get_g8b8_r8g8_colormask(colormask);
use_fast_clear = (colormask == (0x10 | 0x20));
use_fast_clear = (colormask == (RSX_GCM_CLEAR_RED_BIT | RSX_GCM_CLEAR_GREEN_BIT));
break;
}
case rsx::surface_color_format::a8b8g8r8:
Expand All @@ -1331,7 +1332,7 @@ void VKGSRender::clear_surface(u32 mask)
}
default:
{
use_fast_clear = (colormask == (0x10 | 0x20 | 0x40 | 0x80));
use_fast_clear = (colormask == RSX_GCM_CLEAR_COLOR_MASK);
break;
}
}
Expand Down
128 changes: 108 additions & 20 deletions rpcs3/Emu/RSX/VK/vkutils/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ namespace vk
stencil_export_support = device_extensions.is_supported(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
conditional_render_support = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
external_memory_host_support = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
sampler_mirror_clamped_support = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
unrestricted_depth_range_support = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
debug_utils_support = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
surface_capabilities_2_support = instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
Expand Down Expand Up @@ -312,6 +313,16 @@ namespace vk
requested_extensions.push_back(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
}

if (pgpu->stencil_export_support)
{
requested_extensions.push_back(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
}

if (pgpu->sampler_mirror_clamped_support)
{
requested_extensions.push_back(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
}

enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
Expand Down Expand Up @@ -674,40 +685,117 @@ namespace vk
memory_type_mapping result;
result.device_local_total_bytes = 0;
result.host_visible_total_bytes = 0;
bool host_visible_cached = false;
result.device_bar_total_bytes = 0;

// Sort the confusingly laid out heap-type map into something easier to scan.
// Not performance-critical, this method is called once at initialization.
struct memory_type
{
u32 type_index;
VkFlags flags;
VkDeviceSize size;
};

struct heap_type_map_entry
{
VkMemoryHeap heap;
std::vector<memory_type> types;
};

std::vector<heap_type_map_entry> memory_heap_map;
for (u32 i = 0; i < memory_properties.memoryHeapCount; ++i)
{
memory_heap_map.push_back(
{
.heap = memory_properties.memoryHeaps[i],
.types = {}
});
}

for (u32 i = 0; i < memory_properties.memoryTypeCount; i++)
{
VkMemoryHeap& heap = memory_properties.memoryHeaps[memory_properties.memoryTypes[i].heapIndex];
auto& type_info = memory_properties.memoryTypes[i];
memory_heap_map[type_info.heapIndex].types.push_back({ i, type_info.propertyFlags, 0 });
}

bool is_device_local = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (is_device_local)
auto find_memory_type_with_property = [&memory_heap_map](VkFlags desired_flags, VkFlags excluded_flags)
{
std::vector<memory_type> results;

for (auto& heap : memory_heap_map)
{
// Allow multiple device_local heaps
result.device_local.push(i, heap.size);
result.device_local_total_bytes += heap.size;
for (auto &type : heap.types)
{
if (((type.flags & desired_flags) == desired_flags) && !(type.flags & excluded_flags))
{
// Match, only once allowed per heap!
results.push_back({ type.type_index, type.flags, heap.heap.size });
break;
}
}
}

bool is_host_visible = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
bool is_host_coherent = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
bool is_cached = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
return results;
};

auto device_local_types = find_memory_type_with_property(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD));
auto host_coherent_types = find_memory_type_with_property((VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), 0);
auto bar_memory_types = find_memory_type_with_property((VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), 0);

ensure(!device_local_types.empty());
ensure(!host_coherent_types.empty());

// BAR heap, currently parked for future use, I have some plans for it (kd-11)
for (auto& type : bar_memory_types)
{
result.device_bar.push(type.type_index, type.size);
result.device_bar_total_bytes += type.size;
}

if (is_host_coherent && is_host_visible)
// Generic VRAM access, requires some minor prioritization based on flags
// Most devices have a 'PURE' device local type, pin that as the first priority
// Internally, there will be some reshuffling based on memory load later, but this is rare
if (device_local_types.size() > 1)
{
std::sort(device_local_types.begin(), device_local_types.end(), [](const auto& a, const auto& b)
{
if ((is_cached && !host_visible_cached) || (result.host_visible_total_bytes < heap.size))
if (a.flags == b.flags)
{
// Allow only a single host_visible heap. It makes no sense to have multiple of these otherwise
result.host_visible_coherent = { i, heap.size };
result.host_visible_total_bytes = heap.size;
host_visible_cached = is_cached;
return a.size > b.size;
}

return (a.flags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) || (b.flags != VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT && a.size > b.size);
});
}

for (auto& type : device_local_types)
{
result.device_local.push(type.type_index, type.size);
result.device_local_total_bytes += type.size;
}

// Some prioritization is needed for host-visible memory. We only need to pick only one block unlike the others.
// Use host-cached memory if available, but this is not really required.
bool is_host_cached = false;
for (auto& type : host_coherent_types)
{
if (!is_host_cached && type.flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
{
is_host_cached = true;
result.host_visible_coherent = { type.type_index, type.size };
result.host_visible_total_bytes = type.size;
}
else if (result.host_visible_total_bytes < type.size)
{
result.host_visible_coherent = { type.type_index, type.size };
result.host_visible_total_bytes = type.size;
}
}

if (!result.device_local)
fmt::throw_exception("GPU doesn't support device local memory");
if (!result.host_visible_coherent)
fmt::throw_exception("GPU doesn't support host coherent device local memory");
rsx_log.notice("Detected %llu MB of device local memory", result.device_local_total_bytes / (0x100000));
rsx_log.notice("Detected %llu MB of host coherent memory", result.host_visible_total_bytes / (0x100000));
rsx_log.notice("Detected %llu MB of BAR memory", result.device_bar_total_bytes / (0x100000));

return result;
}

Expand Down
3 changes: 3 additions & 0 deletions rpcs3/Emu/RSX/VK/vkutils/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ namespace vk
{
memory_type_info host_visible_coherent;
memory_type_info device_local;
memory_type_info device_bar;

u64 device_local_total_bytes;
u64 host_visible_total_bytes;
u64 device_bar_total_bytes;

PFN_vkGetMemoryHostPointerPropertiesEXT _vkGetMemoryHostPointerPropertiesEXT;
};
Expand All @@ -56,6 +58,7 @@ namespace vk
bool unrestricted_depth_range_support = false;
bool surface_capabilities_2_support = false;
bool debug_utils_support = false;
bool sampler_mirror_clamped_support = false;

friend class render_device;
private:
Expand Down