Skip to content

Commit

Permalink
rsx: Write the ibo tail in the correct order
Browse files Browse the repository at this point in the history
Fixes MSG4’s graveyard scene.
  • Loading branch information
linkmauve committed Oct 29, 2019
1 parent 066286c commit b4327a8
Showing 1 changed file with 21 additions and 28 deletions.
49 changes: 21 additions & 28 deletions rpcs3/Emu/RSX/Common/BufferUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ namespace
{
AVX2_FUNC
static
std::tuple<u16, u16, u32> upload_u16_swapped_avx2(const void *src, void *dst, u32 count, u16 restart_index)
std::tuple<u16, u16> upload_u16_swapped_avx2(const void *src, void *dst, u32 iterations, u16 restart_index)
{
const __m256i shuffle_mask = _mm256_set_epi8(
0xE, 0xF, 0xC, 0xD,
Expand All @@ -754,7 +754,6 @@ namespace
__m256i min = _mm256_set1_epi16(0xffff);
__m256i max = _mm256_set1_epi16(0);

const auto iterations = count / 16;
for (unsigned n = 0; n < iterations; ++n)
{
const __m256i raw = _mm256_loadu_si256(src_stream++);
Expand Down Expand Up @@ -802,12 +801,12 @@ namespace
const u16 min_index = u16(_mm_cvtsi128_si32(min2) & 0xFFFF);
const u16 max_index = u16(_mm_cvtsi128_si32(max2) & 0xFFFF);

return std::make_tuple(min_index, max_index, count);
return std::make_tuple(min_index, max_index);
}

SSE4_1_FUNC
static
std::tuple<u16, u16, u32> upload_u16_swapped_sse4_1(const void *src, void *dst, u32 count, u16 restart_index)
std::tuple<u16, u16> upload_u16_swapped_sse4_1(const void *src, void *dst, u32 iterations, u16 restart_index)
{
const __m128i shuffle_mask = _mm_set_epi8(
0xE, 0xF, 0xC, 0xD,
Expand All @@ -822,7 +821,6 @@ namespace
__m128i min = _mm_set1_epi16(0xffff);
__m128i max = _mm_set1_epi16(0);

const auto iterations = count / 8;
for (unsigned n = 0; n < iterations; ++n)
{
const __m128i raw = _mm_loadu_si128(src_stream++);
Expand Down Expand Up @@ -864,12 +862,12 @@ namespace
const u16 min_index = u16(_mm_cvtsi128_si32(min) & 0xFFFF);
const u16 max_index = u16(_mm_cvtsi128_si32(max) & 0xFFFF);

return std::make_tuple(min_index, max_index, count);
return std::make_tuple(min_index, max_index);
}

SSE4_1_FUNC
static
std::tuple<u32, u32, u32> upload_u32_swapped_sse4_1(const void *src, void *dst, u32 count, u32 restart_index)
std::tuple<u32, u32> upload_u32_swapped_sse4_1(const void *src, void *dst, u32 iterations, u32 restart_index)
{
const __m128i shuffle_mask = _mm_set_epi8(
0xC, 0xD, 0xE, 0xF,
Expand All @@ -884,7 +882,6 @@ namespace
__m128i min = _mm_set1_epi32(0xffffffff);
__m128i max = _mm_set1_epi32(0);

const auto iterations = count / 4;
for (unsigned n = 0; n < iterations; ++n)
{
const __m128i raw = _mm_loadu_si128(src_stream++);
Expand Down Expand Up @@ -918,57 +915,53 @@ namespace
const u32 min_index = u32(_mm_cvtsi128_si32(min));
const u32 max_index = u32(_mm_cvtsi128_si32(max));

return std::make_tuple(min_index, max_index, count);
return std::make_tuple(min_index, max_index);
}

template<typename T>
static
std::tuple<T, T, u32> upload_untouched(gsl::span<to_be_t<const T>> src, gsl::span<T> dst, T restart_index, bool skip_restart)
{
T min_index, max_index;
T min_index = index_limit<T>();
T max_index = 0;
u32 written = 0;
u32 remaining = src.size();
u32 length = src.size();

if (remaining >= 32 && !skip_restart)
if (length >= 32 && !skip_restart)
{
if constexpr (std::is_same<T, u16>::value)
{
if (s_use_avx2)
{
const auto count = (remaining & ~0xf);
std::tie(min_index, max_index, written) = upload_u16_swapped_avx2(src.data(), dst.data(), count, restart_index);
u32 iterations = length >> 4;
written = length & 0xf;
std::tie(min_index, max_index) = upload_u16_swapped_avx2(src.data(), dst.data(), iterations, restart_index);
}
else if (s_use_sse4_1)
{
const auto count = (remaining & ~0x7);
std::tie(min_index, max_index, written) = upload_u16_swapped_sse4_1(src.data(), dst.data(), count, restart_index);
u32 iterations = length >> 3;
written = length & 0x7;
std::tie(min_index, max_index) = upload_u16_swapped_sse4_1(src.data(), dst.data(), iterations, restart_index);
}
}
else if constexpr (std::is_same<T, u32>::value)
{
if (s_use_sse4_1)
{
const auto count = (remaining & ~0x3);
std::tie(min_index, max_index, written) = upload_u32_swapped_sse4_1(src.data(), dst.data(), count, restart_index);
u32 iterations = length >> 2;
written = length & 0x3;
std::tie(min_index, max_index) = upload_u32_swapped_sse4_1(src.data(), dst.data(), iterations, restart_index);
}
}
else
{
fmt::throw_exception("Unreachable" HERE);
}

remaining -= written;
}
else
{
min_index = index_limit<T>();
max_index = 0;
written = 0;
}

while (remaining--)
for (u32 i = written; i < length; ++i)
{
T index = src[written];
T index = src[i];
if (index == restart_index)
{
if (!skip_restart)
Expand Down

0 comments on commit b4327a8

Please sign in to comment.