Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prefetch MFC list elements #5345

Merged
merged 6 commits into from Jul 23, 2019
Merged
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
94 changes: 70 additions & 24 deletions rpcs3/Emu/Cell/SPUThread.cpp
Expand Up @@ -1500,55 +1500,101 @@ bool spu_thread::do_dma_check(const spu_mfc_cmd& args)

bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
{
struct list_element
// Amount of elements to fetch in one go
constexpr u32 fetch_size = 6;

struct alignas(8) list_element
{
be_t<u16> sb; // Stall-and-Notify bit (0x8000)
be_t<u16> ts; // List Transfer Size
be_t<u32> ea; // External Address Low
} item{};
};

while (args.size)
union
{
if (UNLIKELY(item.sb & 0x8000))
{
ch_stall_mask |= utils::rol32(1, args.tag);
list_element items[fetch_size];
alignas(v128) char bufitems[sizeof(items)];
};

if (!ch_stall_stat.get_count())
spu_mfc_cmd transfer;
transfer.eah = 0;
transfer.tag = args.tag;
transfer.cmd = MFC(args.cmd & ~MFC_LIST_MASK);

args.lsa &= 0x3fff0;

u32 index = fetch_size;

// Assume called with size greater than 0
while (true)
{
// Check if fetching is needed
if (index == fetch_size)
{
const auto src = _ptr<void>(args.eal & 0x3fff8);
if ((uptr)src % alignof(v128))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can union be replaced with simple alignas(16) list_element items[fetch_size];, and the whole alignment check with two pathes with single memcpy?

{
ch_event_stat |= SPU_EVENT_SN;
// Unaligned
((u64*)+bufitems)[0] = ((u64*)src)[0];
((u64*)+bufitems)[1] = ((u64*)src)[1];
((u64*)+bufitems)[2] = ((u64*)src)[2];
((u64*)+bufitems)[3] = ((u64*)src)[3];
((u64*)+bufitems)[4] = ((u64*)src)[4];
((u64*)+bufitems)[5] = ((u64*)src)[5];
}
else
{
((v128*)+bufitems)[0] = ((v128*)src)[0];
((v128*)+bufitems)[1] = ((v128*)src)[1];
((v128*)+bufitems)[2] = ((v128*)src)[2];
}

ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value());

args.tag |= 0x80; // Set stalled status
return false;
// Reset to elements array head
index = 0;
}

args.lsa &= 0x3fff0;
item = _ref<list_element>(args.eal & 0x3fff8);
const u32 size = items[index].ts & 0x7fff;
const u32 addr = items[index].ea;

const u32 size = item.ts & 0x7fff;
const u32 addr = item.ea;

LOG_TRACE(SPU, "LIST: addr=0x%x, size=0x%x, lsa=0x%05x, sb=0x%x", addr, size, args.lsa | (addr & 0xf), item.sb);
LOG_TRACE(SPU, "LIST: addr=0x%x, size=0x%x, lsa=0x%05x, sb=0x%x", addr, size, args.lsa | (addr & 0xf), items[index].sb);

if (size)
{
spu_mfc_cmd transfer;
transfer.eal = addr;
transfer.eah = 0;
transfer.lsa = args.lsa | (addr & 0xf);
transfer.tag = args.tag;
transfer.cmd = MFC(args.cmd & ~MFC_LIST_MASK);
transfer.size = size;

do_dma_transfer(transfer);
const u32 add_size = std::max<u32>(size, 16);
args.lsa += add_size;
}

args.eal += 8;
args.size -= 8;

if (!args.size)
{
// No more elements
break;
}

args.eal += 8;

if (UNLIKELY(items[index].sb & 0x8000))
{
ch_stall_mask |= utils::rol32(1, args.tag);

if (!ch_stall_stat.get_count())
{
ch_event_stat |= SPU_EVENT_SN;
}

ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value());

args.tag |= 0x80; // Set stalled status
return false;
}

index++;
}

return true;
Expand Down Expand Up @@ -2040,7 +2086,7 @@ bool spu_thread::process_mfc_cmd()

if (LIKELY(do_dma_check(cmd)))
{
if (LIKELY(do_list_transfer(cmd)))
if (LIKELY(!cmd.size || do_list_transfer(cmd)))
{
return true;
}
Expand Down