Skip to content

Commit

Permalink
Merge pull request #2391 from kd-11/rsx_surface_tests
Browse files Browse the repository at this point in the history
gl/vk/rsx: Improve texture cache
  • Loading branch information
kd-11 committed Mar 1, 2017
2 parents bd85f23 + 85c0219 commit 4ab9a2a
Show file tree
Hide file tree
Showing 19 changed files with 1,660 additions and 854 deletions.
56 changes: 28 additions & 28 deletions rpcs3/Emu/RSX/Common/TextureUtils.cpp
Expand Up @@ -363,34 +363,34 @@ u8 get_format_block_size_in_texel(int format)
LOG_ERROR(RSX, "Unimplemented block size in texels for texture format: 0x%x", format);
return 1;
}
}

u8 get_format_block_size_in_bytes(rsx::surface_color_format format)
{
switch (format)
{
case rsx::surface_color_format::b8:
return 1;
case rsx::surface_color_format::g8b8:
case rsx::surface_color_format::r5g6b5:
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
return 2;
case rsx::surface_color_format::a8b8g8r8:
case rsx::surface_color_format::a8r8g8b8:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x32:
return 4;
case rsx::surface_color_format::w16z16y16x16:
return 8;
case rsx::surface_color_format::w32z32y32x32:
return 16;
default:
fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format);
}
}

u8 get_format_block_size_in_bytes(rsx::surface_color_format format)
{
switch (format)
{
case rsx::surface_color_format::b8:
return 1;
case rsx::surface_color_format::g8b8:
case rsx::surface_color_format::r5g6b5:
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
return 2;
case rsx::surface_color_format::a8b8g8r8:
case rsx::surface_color_format::a8r8g8b8:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x32:
return 4;
case rsx::surface_color_format::w16z16y16x16:
return 8;
case rsx::surface_color_format::w32z32y32x32:
return 16;
default:
fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format);
}
}

static size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment)
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp
Expand Up @@ -41,7 +41,7 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st)

void D3D12GSRender::load_program()
{
auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16>
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
{
ID3D12Resource *surface = nullptr;
if (!is_depth)
Expand Down
162 changes: 95 additions & 67 deletions rpcs3/Emu/RSX/GL/GLGSRender.cpp
Expand Up @@ -332,7 +332,6 @@ void GLGSRender::begin()

std::chrono::time_point<steady_clock> now = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
m_draw_calls++;
}

namespace
Expand Down Expand Up @@ -381,8 +380,6 @@ void GLGSRender::end()
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
}

draw_fbo.bind();

//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
Expand Down Expand Up @@ -473,10 +470,17 @@ void GLGSRender::end()
draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count);
}

m_attrib_ring_buffer->notify();
m_index_ring_buffer->notify();
m_uniform_ring_buffer->notify();

std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();

write_buffers();
m_draw_calls++;

//LOG_WARNING(RSX, "Finished draw call, EID=%d", m_draw_calls);
synchronize_buffers();

rsx::thread::end();
}
Expand Down Expand Up @@ -546,10 +550,11 @@ void GLGSRender::on_init_thread()
m_index_ring_buffer->create(gl::buffer::target::element_array, 16 * 0x100000);

m_vao.element_array_buffer = *m_index_ring_buffer;
m_gl_texture_cache.initialize_rtt_cache();

if (g_cfg_rsx_overlay)
m_text_printer.init();

m_gl_texture_cache.initialize(this);
}

void GLGSRender::on_exit()
Expand Down Expand Up @@ -588,11 +593,12 @@ void GLGSRender::on_exit()
m_index_ring_buffer->remove();

m_text_printer.close();
m_gl_texture_cache.close();

return GSRender::on_exit();
}

void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
void GLGSRender::clear_surface(u32 arg)
{
if (rsx::method_registers.surface_color_target() == rsx::surface_target::none) return;

Expand All @@ -602,9 +608,6 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
return;
}

renderer->init_buffers(true);
renderer->draw_fbo.bind();

GLbitfield mask = 0;

rsx::surface_depth_format surface_depth_format = rsx::method_registers.surface_depth_fmt();
Expand All @@ -618,6 +621,10 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
glDepthMask(GL_TRUE);
glClearDepth(double(clear_depth) / max_depth_value);
mask |= GLenum(gl::buffers::depth);

gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
ds->set_cleared();
}

if (surface_depth_format == rsx::surface_depth_format::z24s8 && (arg & 0x2))
Expand All @@ -644,61 +651,53 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
}

glClear(mask);
renderer->write_buffers();
}

using rsx_method_impl_t = void(*)(u32, GLGSRender*);

static const std::unordered_map<u32, rsx_method_impl_t> g_gl_method_tbl =
{
{ NV4097_CLEAR_SURFACE, nv4097_clear_surface }
};

bool GLGSRender::do_method(u32 cmd, u32 arg)
{
auto found = g_gl_method_tbl.find(cmd);

if (found == g_gl_method_tbl.end())
{
return false;
}

found->second(arg, this);

switch (cmd)
{
case NV4097_CLEAR_SURFACE:
{
if (arg & 0x1)
{
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
ds->set_cleared();
}
init_buffers(true);
synchronize_buffers();
clear_surface(arg);
return true;
}
case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
flush_draw_buffers = true;
return true;
}

return true;
return false;
}

bool GLGSRender::load_program()
{
auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16>
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple<bool, u16>
{
gl::render_target *surface = nullptr;
if (!is_depth)
surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
else
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);

if (!surface) return std::make_tuple(false, 0);
if (!surface)
{
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
if (!rsc.surface || rsc.is_depth_surface != is_depth)
return std::make_tuple(false, 0);

surface = rsc.surface;
}

return std::make_tuple(true, surface->get_native_pitch());
};

RSXVertexProgram vertex_program = get_current_vertex_program();
RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func);

std::array<float, 16> rtt_scaling;
u32 unnormalized_rtts = 0;

for (auto &vtx : vertex_program.rsx_vertex_inputs)
Expand Down Expand Up @@ -819,17 +818,8 @@ void GLGSRender::flip(int buffer)
rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
u32 absolute_address = buffer_region.address + buffer_region.base;

if (0)
{
LOG_NOTICE(RSX, "flip(%d) -> 0x%x [0x%x]", buffer, absolute_address, rsx::get_address(gcm_buffers[1 - buffer].offset, CELL_GCM_LOCATION_LOCAL));
}

gl::texture *render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address);

/**
* Calling read_buffers will overwrite cached content
*/

__glcheck m_flip_fbo.recreate();
m_flip_fbo.bind();

Expand Down Expand Up @@ -877,33 +867,27 @@ void GLGSRender::flip(int buffer)
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });

coordi aspect_ratio;
if (1) //enable aspect ratio
{
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;

const double aq = (double)buffer_width / buffer_height;
const double rq = (double)new_size.width / new_size.height;
const double q = aq / rq;
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;

if (q > 1.0)
{
new_size.height = int(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = int(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
const double aq = (double)buffer_width / buffer_height;
const double rq = (double)new_size.width / new_size.height;
const double q = aq / rq;

aspect_ratio.size = new_size;
if (q > 1.0)
{
new_size.height = int(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else
else if (q < 1.0)
{
aspect_ratio.size = { m_frame->client_width(), m_frame->client_height() };
new_size.width = int(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}

aspect_ratio.size = new_size;

gl::screen.clear(gl::buffers::color_depth_stencil);

__glcheck flip_fbo->blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical());
Expand All @@ -928,6 +912,8 @@ void GLGSRender::flip(int buffer)
m_vertex_upload_time = 0;
m_textures_upload_time = 0;

m_gl_texture_cache.clear_temporary_surfaces();

for (auto &tex : m_rtts.invalidated_resources)
{
tex->remove();
Expand All @@ -946,6 +932,48 @@ u64 GLGSRender::timestamp() const

bool GLGSRender::on_access_violation(u32 address, bool is_writing)
{
if (is_writing) return m_gl_texture_cache.mark_as_dirty(address);
return false;
if (is_writing)
return m_gl_texture_cache.mark_as_dirty(address);
else
return m_gl_texture_cache.flush_section(address);
}

void GLGSRender::do_local_task()
{
std::lock_guard<std::mutex> lock(queue_guard);

work_queue.remove_if([](work_item &q) { return q.received; });

for (work_item& q: work_queue)
{
std::unique_lock<std::mutex> lock(q.guard_mutex);

//Process this address
q.result = m_gl_texture_cache.flush_section(q.address_to_flush);
q.processed = true;

//Notify thread waiting on this
lock.unlock();
q.cv.notify_one();
}
}

work_item& GLGSRender::post_flush_request(u32 address)
{
std::lock_guard<std::mutex> lock(queue_guard);

work_queue.emplace_back();
work_item &result = work_queue.back();
result.address_to_flush = address;
return result;
}

void GLGSRender::synchronize_buffers()
{
if (flush_draw_buffers)
{
//LOG_WARNING(RSX, "Flushing RTT buffers EID=%d", m_draw_calls);
write_buffers();
flush_draw_buffers = false;
}
}

0 comments on commit 4ab9a2a

Please sign in to comment.