Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rsx: PS3 Native frame limiter improvements, add Infinite frame limiter #12052

Merged
merged 2 commits into from
Jun 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions rpcs3/Emu/Cell/Modules/cellVideoOut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "Emu/Cell/PPUModule.h"
#include "Emu/IdManager.h"
#include "Emu/RSX/rsx_utils.h"
#include "Emu/RSX/RSXThread.h"

#include "cellVideoOut.h"

Expand Down Expand Up @@ -211,6 +212,8 @@ error_code cellVideoOutConfigure(u32 videoOut, vm::ptr<CellVideoOutConfiguration

cellSysutil.notice("Selected video configuration: resolutionId=0x%x, aspect=0x%x=>0x%x, format=0x%x", config->resolutionId, config->aspect, conf.aspect, config->format);

// This function resets VSYNC to be enabled
rsx::get_current_renderer()->requested_vsync = true;
return CELL_OK;
}

Expand Down
34 changes: 27 additions & 7 deletions rpcs3/Emu/Cell/lv2/sys_rsx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,11 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64

render->on_frame_end(static_cast<u32>(a4));
render->send_event(0, SYS_RSX_EVENT_QUEUE_BASE << a3, 0);

if (g_cfg.video.frame_limit == frame_limit_type::infinite)
{
render->post_vblank_event(get_system_time());
}
}
break;

Expand Down Expand Up @@ -536,12 +541,24 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64
case 0x106: // ? (Used by cellGcmInitPerfMon)
break;

case 0x108: // cellGcmSetSecondVFrequency
case 0x108: // cellGcmSetVBlankFrequency, cellGcmSetSecondVFrequency
// a4 == 3, CELL_GCM_DISPLAY_FREQUENCY_59_94HZ
// a4 == 2, CELL_GCM_DISPLAY_FREQUENCY_SCANOUT
// a4 == 4, CELL_GCM_DISPLAY_FREQUENCY_DISABLE
// Note: Scanout/59_94 is ignored currently as we report refresh rate of 59_94hz as it is, so the difference doesnt matter
render->enable_second_vhandler.store(a4 != 4);

if (a5 == 1u)
{
// This function resets vsync state to enabled
render->requested_vsync = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is the counterpart to disable this? We know PS3 had engines that had tearing when fps dropped below a threshold (e.g cryengine 2)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see sometimes in logs that the games reset it afterward and after cellVideoOutConfigure, I will add to it as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, it is unlikely that they are calling videoOutConfigure during gameplay as that is a heavy operation. It is more likely to be toggled using this method. Is it known what happens if a5 is 0?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming that games also react to the unused CellVideoOutCallback. (maybe it's called when the display is changed)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's controlled by CellGcmSetFlipMode. You can choose 3 modes there, 1 for vsync, 1 for scanline sync and the last mode for all out anarchy. We only need to set vsync to engage when the first mode is chosen.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming that games also react to the unused CellVideoOutCallback. (maybe it's called when the display is changed)

I've checked and no game uses cellVideoOutRegisterCallback.


// TODO: Set vblank frequency
}
else if (ensure(a5 == 2u))
{
// TODO: Implement its frequency as well
render->enable_second_vhandler.store(a4 != 4);
}

break;

case 0x10a: // ? Involved in managing flip status through cellGcmResetFlipStatus
Expand Down Expand Up @@ -743,22 +760,25 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64

case 0xFED: // hack: vblank command
{
if (get_current_cpu_thread())
if (cpu_thread::get_current<ppu_thread>())
{
// VBLANK thread only
// VBLANK/RSX thread only
return CELL_EINVAL;
}

// NOTE: There currently seem to only be 2 active heads on PS3
ensure(a3 < 2);

// todo: this is wrong and should be 'second' vblank handler and freq, but since currently everything is reported as being 59.94, this should be fine
vm::_ref<u32>(render->device_addr + 0x30) = 1;
driverInfo.head[a3].lastSecondVTime.atomic_op([&](be_t<u64>& time)
{
a4 = std::max<u64>(a4, time + 1);
time = a4;
});

// Time point is supplied in argument 4 (todo: convert it to MFTB rate and use it)
const u64 current_time = rsxTimeStamp();

driverInfo.head[a3].lastSecondVTime = current_time;

// Note: not atomic
driverInfo.head[a3].lastVTimeLow = static_cast<u32>(current_time);
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/lv2/sys_rsx.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct RsxDriverInfo
be_t<u32> lastQueuedBufferId; // 0x14 todo: this is definately not this variable but its 'unused' so im using it for queueId to pass to flip handler
be_t<u32> unk3; // 0x18
be_t<u32> lastVTimeLow; // 0x1C last time for first vhandler freq (low 32-bits)
be_t<u64> lastSecondVTime; // 0x20 last time for second vhandler freq
atomic_be_t<u64> lastSecondVTime; // 0x20 last time for second vhandler freq
be_t<u64> unk4; // 0x28
atomic_be_t<u64> vBlankCount; // 0x30
be_t<u32> unk; // 0x38 possible u32, 'flip field', top/bottom for interlaced
Expand Down
103 changes: 62 additions & 41 deletions rpcs3/Emu/RSX/RSXThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,31 @@ namespace rsx
}
}

void thread::post_vblank_event(u64 post_event_time)
{
vblank_count++;

if (isHLE)
{
if (auto ptr = vblank_handler)
{
intr_thread->cmd_list
({
{ ppu_cmd::set_args, 1 }, u64{1},
{ ppu_cmd::lle_call, ptr },
{ ppu_cmd::sleep, 0 }
});

intr_thread->cmd_notify++;
intr_thread->cmd_notify.notify_one();
}
}
else
{
sys_rsx_context_attribute(0x55555555, 0xFED, 1, get_guest_system_time(post_event_time), 0, 0);
}
}

void thread::on_task()
{
g_tls_log_prefix = []
Expand Down Expand Up @@ -686,7 +711,6 @@ namespace rsx
{
{
local_vblank_count++;
vblank_count++;

if (local_vblank_count == vblank_rate)
{
Expand All @@ -701,25 +725,7 @@ namespace rsx
vblank_period = 1'000'000 + u64{g_cfg.video.vblank_ntsc.get()} * 1000;
}

if (isHLE)
{
if (auto ptr = vblank_handler)
{
intr_thread->cmd_list
({
{ ppu_cmd::set_args, 1 }, u64{1},
{ ppu_cmd::lle_call, ptr },
{ ppu_cmd::sleep, 0 }
});

intr_thread->cmd_notify++;
intr_thread->cmd_notify.notify_one();
}
}
else
{
sys_rsx_context_attribute(0x55555555, 0xFED, 1, get_guest_system_time(post_event_time), 0, 0);
}
post_vblank_event(post_event_time);
}
}
else if (wait_sleep)
Expand Down Expand Up @@ -3203,15 +3209,17 @@ namespace rsx
}

double limit = 0.;
switch (g_disable_frame_limit ? frame_limit_type::none : g_cfg.video.frame_limit)
const auto frame_limit = g_disable_frame_limit ? frame_limit_type::none : g_cfg.video.frame_limit;

switch (frame_limit)
{
case frame_limit_type::none: limit = 0.; break;
case frame_limit_type::_59_94: limit = 59.94; break;
case frame_limit_type::_50: limit = 50.; break;
case frame_limit_type::_60: limit = 60.; break;
case frame_limit_type::_30: limit = 30.; break;
case frame_limit_type::_auto: limit = static_cast<double>(g_cfg.video.vblank_rate); break;
case frame_limit_type::_ps3: limit = 0.; break;
case frame_limit_type::infinite: limit = 0.; break;
default:
break;
}
Expand Down Expand Up @@ -3246,22 +3254,32 @@ namespace rsx
performance_counters.idle_time += delay_us;
}
}

flip_notification_count = 1;
}
else if (wait_for_flip_sema)
else if (frame_limit == frame_limit_type::_ps3)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm reading this correctly the logic here is essentially "do not flip until a fresh vblank signal is received". You have the vblank signal as the flip release and this is an acquire operation here. Two problems:

  1. There is still the correct event sent to us which you are now short-circuiting in semaphore-acquire. I'm not sure why this approach is to be considered better as it is essentially a hackier approach. Note that this "PS3 native" sync is to be used with games that have trouble with the normal PC-friendly framelimiter system, so extra bells and whistles are unneeded for this path. Is it that it doesn't work properly with HLE? I'm scratching my head on why I should prefer this version over the existing one which more closely follows what the PS3 does.
  2. This whole set of variables and checks is not correctly labeled to match how display systems are architected with clients acquiring the display and the server releasing it in a cycle. Not a blocker by itself, but its not ideal and I would end up having to change it.

Copy link
Contributor Author

@elad335 elad335 Jun 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm quite confident DEVICE 0x30 is something to do with the queue command and not vblank, as it's inserted by gcm for all display sync options and occurs before semaphore wait by cellGcmSetFlipWithWaitLabel.
This function exists specifically to allow accurate timing of flipping and this is a usage example:
image
Here is label 0 is used, notice that it resides after DEVICE 0x30 waiting
Furthermore, flips are executed along with the vblank signal, not after, I guess that's why TLoU regressed by the original pr.

Copy link
Contributor

@kd-11 kd-11 Jun 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the trace above, we can see there are 2 signals:
The only difference is that there are 2 cond vars that I see, one to ack the enqueue and another to wait for vertical sync.
Server:

	label_0x30.wait_signal();
	handle_enqueued_head();
	label_0x30.reset()

Client:

	enqueue();
	label_0x30.signal();
	label_0x30.wait_ack();
	call(); <- CB reset??
	label_0x0.wait_signal();
	flip();

My guess here is that 0x0 is signaled by the hardware vblank system, 0x30 just acknowledges that a head has been enqueued. So, your PR addresses this by having the system use vblank instead of enqueue signal.
While this is fine, the second critique still holds. The implementation is convoluted and not clear at all.
My suggestion to solve this once and for all is to have a special kind of cond var with waiting for known values. Simply replace my rushed solution with the wait_for_flip_sema with proper condvar.wait() semantics to make the flow clearer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For simplicity I have omitted the 0x10 logic, I have no idea where that signal is for.
For 0x30 handling, we have the frame enqueue logic in sys_rsx and rsx_thread::end_frame() for that.
For 0x0 we can signal from vblank thread.

Copy link
Contributor Author

@elad335 elad335 Jun 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No I have HLEd the game this is a cellGcmSetFlipWithWaitLabel call with 0 address. It doesn't appear in other games which uses other flip functions.

Copy link
Contributor Author

@elad335 elad335 Jun 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which is named _cellGcmSetFlipCommandWithWaitLabel on firmware.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is an LLE flip without waiting on label.
image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So for "normal" flip the 0x0 var is absent? That simplifies the design then and makes the 0x30_ack a wait for vblank.
Again, my biggest problem with the way it is now is that the "documentation is the code" philosophy doesn't hold up because the design is all over the place. This isn't a new problem, but if it is now well understood, we have to do it properly and this PR already modifies the whole thing anyway. I only understand what is going on because I'm familiar with most of the code there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0x30 has nothing to do with vblank, it appears in all sync modes even with VSYNC turned off and besides the whole purpose of _cellGcmSetFlipCommandWithWaitLabel is to time the flip very closely to what CELL decides. So if 0x30 is a vblank semaphore it defies all logic as to why the function exists in the first place.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems my point is getting lost in translation, that doesn't matter it's just an ack, whether its from vblank or not. I'm not happy with the implementation, but if we're being honest, you're unlikely to restructure it, so I'll merge and change it later when I have time.

{
const auto& value = vm::_ref<RsxSemaphore>(device_addr + 0x30).val;
if (value != flip_sema_wait_val)
bool exit = false;

if (vblank_at_flip == umax)
{
vblank_at_flip = +vblank_count;
flip_notification_count = 1;
exit = true;
}

if (requested_vsync && (exit || vblank_at_flip == vblank_count))
{
// Not yet signaled, handle it later
async_flip_requested |= flip_request::emu_requested;
async_flip_buffer = buffer;
return;
}

wait_for_flip_sema = false;
vblank_at_flip = umax;
}

int_flip_index++;
int_flip_index += flip_notification_count;

current_display_buffer = buffer;
m_queued_flip.emu_flip = true;
Expand All @@ -3274,23 +3292,26 @@ namespace rsx
flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE;
m_queued_flip.in_progress = false;

if (!isHLE)
while (flip_notification_count--)
{
sys_rsx_context_attribute(0x55555555, 0xFEC, buffer, 0, 0, 0);
return;
}
if (!isHLE)
{
sys_rsx_context_attribute(0x55555555, 0xFEC, buffer, 0, 0, 0);
continue;
}

if (auto ptr = flip_handler)
{
intr_thread->cmd_list
({
{ ppu_cmd::set_args, 1 }, u64{ 1 },
{ ppu_cmd::lle_call, ptr },
{ ppu_cmd::sleep, 0 }
});
if (auto ptr = flip_handler)
{
intr_thread->cmd_list
({
{ ppu_cmd::set_args, 1 }, u64{ 1 },
{ ppu_cmd::lle_call, ptr },
{ ppu_cmd::sleep, 0 }
});

intr_thread->cmd_notify++;
intr_thread->cmd_notify.notify_one();
intr_thread->cmd_notify++;
intr_thread->cmd_notify.notify_one();
}
}
}
}
7 changes: 4 additions & 3 deletions rpcs3/Emu/RSX/RSXThread.h
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ namespace rsx
u32 flip_status;
int debug_level;

atomic_t<bool> requested_vsync{false};
atomic_t<bool> requested_vsync{true};
atomic_t<bool> enable_second_vhandler{false};

RsxDisplayInfo display_buffers[8];
Expand Down Expand Up @@ -645,8 +645,9 @@ namespace rsx
atomic_t<u64> vblank_count{0};
bool capture_current_frame = false;

bool wait_for_flip_sema = false;
u32 flip_sema_wait_val = 0;
u64 vblank_at_flip = umax;
u64 flip_notification_count = 0;
void post_vblank_event(u64 post_event_time);

public:
atomic_t<bool> sync_point_request = false;
Expand Down
5 changes: 2 additions & 3 deletions rpcs3/Emu/RSX/rsx_decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -1874,11 +1874,10 @@ struct registers_decoder<NV406E_SEMAPHORE_RELEASE>

static std::string dump(const decoded_type& decoded)
{
return fmt::format("NV409E semaphore: release: 0x%x", decoded.value);
return fmt::format("NV406E semaphore: release: 0x%x", decoded.value);
}
};


template <>
struct registers_decoder<NV406E_SEMAPHORE_ACQUIRE>
{
Expand All @@ -1891,7 +1890,7 @@ struct registers_decoder<NV406E_SEMAPHORE_ACQUIRE>

static std::string dump(const decoded_type& decoded)
{
return fmt::format("NV409E semaphore: acquire: 0x%x", decoded.value);
return fmt::format("NV406E semaphore: acquire: 0x%x", decoded.value);
}
};

Expand Down
34 changes: 19 additions & 15 deletions rpcs3/Emu/RSX/rsx_methods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace rsx
template<bool FlushDMA, bool FlushPipe>
void write_gcm_label(thread* rsx, u32 address, u32 data)
{
const bool is_flip_sema = (address == (rsx->label_addr + 0x10) || address == (rsx->label_addr + 0x30));
const bool is_flip_sema = (address == (rsx->label_addr + 0x10) || address == (rsx->device_addr + 0x30));
if (!is_flip_sema)
{
// First, queue the GPU work. If it flushes the queue for us, the following routines will be faster.
Expand Down Expand Up @@ -108,18 +108,6 @@ namespace rsx
rsx->flush_fifo();
}

if (addr == rsx->device_addr + 0x30)
{
if (g_cfg.video.frame_limit == frame_limit_type::_ps3 && rsx->requested_vsync)
{
// Enables PS3-compliant vblank behavior
rsx->flip_sema_wait_val = arg;
rsx->wait_for_flip_sema = (sema != arg);
}

return;
}

u64 start = rsx::uclock();
u64 last_check_val = start;

Expand Down Expand Up @@ -189,6 +177,12 @@ namespace rsx
return;
}

if (addr == rsx->device_addr + 0x30 && !arg)
{
// HW flip synchronization related, 1 is not written without display queue command (TODO: make it behave as real hw)
arg = 1;
}

write_gcm_label<false, true>(rsx, addr, arg);
}
}
Expand Down Expand Up @@ -1776,6 +1770,11 @@ namespace rsx
{
ensure(rsx->isHLE);

if (rsx->vblank_at_flip != umax)
{
rsx->flip_notification_count++;
}

if (auto ptr = rsx->queue_handler)
{
rsx->intr_thread->cmd_list
Expand Down Expand Up @@ -1824,7 +1823,7 @@ namespace rsx
template<u32 index>
struct driver_flip
{
static void impl(thread*, u32 /*reg*/, u32 arg)
static void impl(thread* rsx, u32 /*reg*/, u32 arg)
{
sys_rsx_context_attribute(0x55555555, 0x102, index, arg, 0, 0);
}
Expand All @@ -1833,8 +1832,13 @@ namespace rsx
template<u32 index>
struct queue_flip
{
static void impl(thread*, u32 /*reg*/, u32 arg)
static void impl(thread* rsx, u32 /*reg*/, u32 arg)
{
if (rsx->vblank_at_flip != umax)
{
rsx->flip_notification_count++;
}

sys_rsx_context_attribute(0x55555555, 0x103, index, arg, 0, 0);
}
};
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/system_config_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,12 @@ void fmt_class_string<frame_limit_type>::format(std::string& out, u64 arg)
switch (value)
{
case frame_limit_type::none: return "Off";
case frame_limit_type::_59_94: return "59.94";
case frame_limit_type::_50: return "50";
case frame_limit_type::_60: return "60";
case frame_limit_type::_30: return "30";
case frame_limit_type::_auto: return "Auto";
case frame_limit_type::_ps3: return "PS3 Native";
case frame_limit_type::infinite: return "Infinite";
}

return unknown;
Expand Down
Loading