Skip to content

Commit

Permalink
GS: Add double operations to GSVector/GSNewCodeGenerator
Browse files Browse the repository at this point in the history
  • Loading branch information
TellowKrinkle committed May 26, 2022
1 parent 82de13d commit b6c1b3f
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pcsx2/GS/GSVector.cpp
Expand Up @@ -67,6 +67,7 @@ CONSTINIT const GSVector4 GSVector4::m_two = cxpr(2.0f);
CONSTINIT const GSVector4 GSVector4::m_four = cxpr(4.0f);
CONSTINIT const GSVector4 GSVector4::m_x4b000000 = cxpr(0x4b000000);
CONSTINIT const GSVector4 GSVector4::m_x4f800000 = cxpr(0x4f800000);
CONSTINIT const GSVector4 GSVector4::m_xc1e00000000fffff = cxpr64(0xc1e00000000fffffull);
CONSTINIT const GSVector4 GSVector4::m_max = cxpr(FLT_MAX);
CONSTINIT const GSVector4 GSVector4::m_min = cxpr(FLT_MIN);

Expand All @@ -78,6 +79,7 @@ CONSTINIT const GSVector8 GSVector8::m_x7fffffff = cxpr(0x7fffffff);
CONSTINIT const GSVector8 GSVector8::m_x80000000 = cxpr(0x80000000);
CONSTINIT const GSVector8 GSVector8::m_x4b000000 = cxpr(0x4b000000);
CONSTINIT const GSVector8 GSVector8::m_x4f800000 = cxpr(0x4f800000);
CONSTINIT const GSVector8 GSVector8::m_xc1e00000000fffff = cxpr64(0xc1e00000000fffffull);
CONSTINIT const GSVector8 GSVector8::m_max = cxpr(FLT_MAX);
CONSTINIT const GSVector8 GSVector8::m_min = cxpr(FLT_MAX);

Expand Down
62 changes: 62 additions & 0 deletions pcsx2/GS/GSVector4.h
Expand Up @@ -28,6 +28,11 @@ class alignas(16) GSVector4
{
}

constexpr GSVector4(cxpr_init_tag, u64 x, u64 y)
: U64{x, y}
{
}

public:
union
{
Expand All @@ -36,6 +41,7 @@ class alignas(16) GSVector4
struct { float left, top, right, bottom; };
float v[4];
float F32[4];
double F64[2];
s8 I8[16];
s16 I16[8];
s32 I32[4];
Expand All @@ -55,6 +61,7 @@ class alignas(16) GSVector4
static const GSVector4 m_four;
static const GSVector4 m_x4b000000;
static const GSVector4 m_x4f800000;
static const GSVector4 m_xc1e00000000fffff;
static const GSVector4 m_max;
static const GSVector4 m_min;

Expand Down Expand Up @@ -82,6 +89,16 @@ class alignas(16) GSVector4
return GSVector4(cxpr_init, x, x, x, x);
}

constexpr static GSVector4 cxpr64(u64 x, u64 y)
{
return GSVector4(cxpr_init, x, y);
}

constexpr static GSVector4 cxpr64(u64 x)
{
return GSVector4(cxpr_init, x, x);
}

__forceinline GSVector4(float x, float y, float z, float w)
{
m = _mm_set_ps(w, z, y, x);
Expand Down Expand Up @@ -119,6 +136,11 @@ class alignas(16) GSVector4
{
}

__forceinline explicit GSVector4(__m128d m)
: m(_mm_castpd_ps(m))
{
}

__forceinline explicit GSVector4(float f)
{
*this = f;
Expand Down Expand Up @@ -162,6 +184,11 @@ class alignas(16) GSVector4

#endif

__forceinline static GSVector4 f64(double x, double y)
{
return GSVector4(_mm_castpd_ps(_mm_set_pd(y, x)));
}

__forceinline void operator=(const GSVector4& v)
{
m = v.m;
Expand Down Expand Up @@ -858,6 +885,36 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
return GSVector4(_mm_cmple_ps(v1, v2));
}

__forceinline GSVector4 mul64(const GSVector4& v) const
{
return GSVector4(_mm_mul_pd(_mm_castps_pd(m), _mm_castps_pd(v.m)));
}

__forceinline GSVector4 add64(const GSVector4& v) const
{
return GSVector4(_mm_add_pd(_mm_castps_pd(m), _mm_castps_pd(v.m)));
}

__forceinline GSVector4 sub64(const GSVector4& v) const
{
return GSVector4(_mm_sub_pd(_mm_castps_pd(m), _mm_castps_pd(v.m)));
}

__forceinline static GSVector4 f32to64(const GSVector4& v)
{
return GSVector4(_mm_cvtps_pd(v.m));
}

__forceinline static GSVector4 f32to64(const void* p)
{
return GSVector4(_mm_cvtps_pd(_mm_castpd_ps(_mm_load_sd(static_cast<const double*>(p)))));
}

__forceinline GSVector4i f64toi32(bool truncate = true) const
{
return GSVector4i(truncate ? _mm_cvttpd_epi32(_mm_castps_pd(m)) : _mm_cvtpd_epi32(_mm_castps_pd(m)));
}

// clang-format off

#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
Expand Down Expand Up @@ -907,4 +964,9 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
}

#endif

__forceinline static GSVector4 broadcast64(const void* d)
{
return GSVector4(_mm_loaddup_pd(static_cast<const double*>(d)));
}
};
57 changes: 57 additions & 0 deletions pcsx2/GS/GSVector8.h
Expand Up @@ -32,13 +32,19 @@ class alignas(32) GSVector8
{
}

constexpr GSVector8(cxpr_init_tag, u64 x, u64 y, u64 z, u64 w)
: U64{x, y, z, w}
{
}

public:
union
{
struct { float x0, y0, z0, w0, x1, y1, z1, w1; };
struct { float r0, g0, b0, a0, r1, g1, b1, a1; };
float v[8];
float F32[8];
double F64[4];
s8 I8[32];
s16 I16[16];
s32 I32[8];
Expand All @@ -57,6 +63,7 @@ class alignas(32) GSVector8
static const GSVector8 m_x80000000;
static const GSVector8 m_x4b000000;
static const GSVector8 m_x4f800000;
static const GSVector8 m_xc1e00000000fffff;
static const GSVector8 m_max;
static const GSVector8 m_min;

Expand Down Expand Up @@ -87,6 +94,16 @@ class alignas(32) GSVector8
return cxpr(static_cast<int>(x));
}

constexpr static GSVector8 cxpr64(u64 x, u64 y, u64 z, u64 w)
{
return GSVector8(cxpr_init, x, y, z, w);
}

constexpr static GSVector8 cxpr64(u64 x)
{
return GSVector8(cxpr_init, x, x, x, x);
}

__forceinline GSVector8(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
{
m = _mm256_set_ps(w1, z1, y1, x1, w0, z0, y0, x0);
Expand Down Expand Up @@ -142,6 +159,11 @@ class alignas(32) GSVector8
{
}

__forceinline explicit GSVector8(__m256d m)
: m(_mm256_castpd_ps(m))
{
}

#if _M_SSE >= 0x501

__forceinline explicit GSVector8(const GSVector8i& v);
Expand Down Expand Up @@ -773,6 +795,36 @@ class alignas(32) GSVector8
return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_LE_OQ));
}

__forceinline GSVector8 mul64(const GSVector8& v) const
{
return GSVector8(_mm256_mul_pd(_mm256_castps_pd(m), _mm256_castps_pd(v.m)));
}

__forceinline GSVector8 add64(const GSVector8& v) const
{
return GSVector8(_mm256_add_pd(_mm256_castps_pd(m), _mm256_castps_pd(v.m)));
}

__forceinline GSVector8 sub64(const GSVector8& v) const
{
return GSVector8(_mm256_sub_pd(_mm256_castps_pd(m), _mm256_castps_pd(v.m)));
}

__forceinline static GSVector8 f32to64(const GSVector4& v)
{
return GSVector8(_mm256_cvtps_pd(v.m));
}

__forceinline static GSVector8 f32to64(const void* p)
{
return GSVector8(_mm256_cvtps_pd(_mm_load_ps(static_cast<const float*>(p))));
}

__forceinline GSVector4i f64toi32(bool truncate = true) const
{
return GSVector4i(truncate ? _mm256_cvttpd_epi32(_mm256_castps_pd(m)) : _mm256_cvtpd_epi32(_mm256_castps_pd(m)));
}

// clang-format off

// x = v[31:0] / v[159:128]
Expand Down Expand Up @@ -888,6 +940,11 @@ class alignas(32) GSVector8
return GSVector8(_mm256_broadcastss_ps(_mm_load_ss((const float*)f)));
}

__forceinline static GSVector8 broadcast64(const void* d)
{
return GSVector8(_mm256_broadcast_sd(static_cast<const double*>(d)));
}

// TODO: v.(x0|y0|z0|w0|x1|y1|z1|w1) // broadcast element

#endif
Expand Down
20 changes: 20 additions & 0 deletions pcsx2/GS/Renderers/SW/GSNewCodeGenerator.h
Expand Up @@ -312,9 +312,18 @@ class GSNewCodeGenerator
FORWARD_JUMP(jmp)

AFORWARD(2, addps, ARGS_XO)
AFORWARD(2, addpd, ARGS_XO)
SFORWARD(2, cvtdq2ps, ARGS_XO)
SFORWARD(2, cvtpd2dq, ARGS_XO)
SFORWARD(2, cvtpd2ps, ARGS_XO)
SFORWARD(2, cvttpd2dq, ARGS_XO)
SFORWARD(2, cvtps2dq, ARGS_XO)
SFORWARD(2, cvtps2pd, ARGS_XO)
SFORWARD(2, cvtsd2si, const AddressReg&, const Operand&);
AFORWARD(2, cvtsd2ss, ARGS_XO)
AFORWARD(2, cvtss2sd, ARGS_XO)
SFORWARD(2, cvttps2dq, ARGS_XO)
SFORWARD(2, cvttsd2si, const AddressReg&, const Operand&);
SFORWARD(3, extractps, const Operand&, const Xmm&, u8)
AFORWARD(2, maxps, ARGS_XO)
AFORWARD(2, minps, ARGS_XO)
Expand All @@ -324,13 +333,21 @@ class GSNewCodeGenerator
SFORWARD(2, movd, const Reg32&, const Xmm&)
SFORWARD(2, movd, const Xmm&, const Address&)
SFORWARD(2, movd, const Xmm&, const Reg32&)
SFORWARD(2, movddup, ARGS_XO);
SFORWARD(2, movdqa, ARGS_XO)
SFORWARD(2, movdqa, const Address&, const Xmm&)
SFORWARD(2, movhps, ARGS_XO)
SFORWARD(2, movhps, const Address&, const Xmm&)
SFORWARD(2, movq, const Address&, const Xmm&)
SFORWARD(2, movq, const Xmm&, const Address&)
SFORWARD(2, movsd, const Address&, const Xmm&)
SFORWARD(2, movsd, const Xmm&, const Address&)
SFORWARD(2, movss, const Address&, const Xmm&)
SFORWARD(2, movss, const Xmm&, const Address&)
AFORWARD(2, mulpd, ARGS_XO)
AFORWARD(2, mulps, ARGS_XO)
AFORWARD(2, mulsd, ARGS_XO)
AFORWARD(2, mulss, ARGS_XO)
AFORWARD(2, orps, ARGS_XO)
AFORWARD(2, packssdw, ARGS_XO)
AFORWARD(2, packusdw, ARGS_XO)
Expand Down Expand Up @@ -382,11 +399,14 @@ class GSNewCodeGenerator
SFORWARD(2, rcpps, ARGS_XO)
AFORWARD(3, shufps, ARGS_XOI)
AFORWARD(2, subps, ARGS_XO)
AFORWARD(2, unpcklps, ARGS_XO)
AFORWARD(2, unpcklpd, ARGS_XO)
AFORWARD(2, xorps, ARGS_XO)

FORWARD_SSE_XMM0(pblendvb)

FORWARD(2, AVX, vbroadcastss, ARGS_XO)
FORWARD(2, AVX, vbroadcastsd, const Ymm&, const Address&)
FORWARD(2, AVX2, vbroadcasti128, const Ymm&, const Address&)
FORWARD(2, AVX, vbroadcastf128, const Ymm&, const Address&)
FORWARD(3, FMA, vfmadd213ps, ARGS_XXO)
Expand Down

0 comments on commit b6c1b3f

Please sign in to comment.