Skip to content

Commit

Permalink
Arm: Add NEON and MVE RTL patterns for Complex Addition.
Browse files Browse the repository at this point in the history
This adds implementation for the optabs for complex additions.  With this the
following C code:

  void f90 (float complex a[restrict N], float complex b[restrict N],
	    float complex c[restrict N])
  {
    for (int i=0; i < N; i++)
      c[i] = a[i] + (b[i] * I);
  }

generates

  f90:
	  add     r3, r2, #1600
  .L2:
	  vld1.32 {q8}, [r0]!
	  vld1.32 {q9}, [r1]!
	  vcadd.f32       q8, q8, q9, #90
	  vst1.32 {q8}, [r2]!
	  cmp     r3, r2
	  bne     .L2
	  bx      lr

instead of

  f90:
	  add     r3, r2, #1600
  .L2:
	  vld2.32 {d24-d27}, [r0]!
	  vld2.32 {d20-d23}, [r1]!
	  vsub.f32	q8, q12, q11
	  vadd.f32	q9, q13, q10
	  vst2.32 {d16-d19}, [r2]!
	  cmp     r3, r2
	  bne     .L2
	  bx      lr

gcc/ChangeLog:

	* config/arm/arm_mve.h (__arm_vcaddq_rot90_u8, __arm_vcaddq_rot270_u8,
	__arm_vcaddq_rot90_s8, __arm_vcaddq_rot270_s8,
	__arm_vcaddq_rot90_u16, __arm_vcaddq_rot270_u16,
	__arm_vcaddq_rot90_s16, __arm_vcaddq_rot270_s16,
	__arm_vcaddq_rot90_u32, __arm_vcaddq_rot270_u32,
	__arm_vcaddq_rot90_s32, __arm_vcaddq_rot270_s32,
	__arm_vcaddq_rot90_f16, __arm_vcaddq_rot270_f16,
	__arm_vcaddq_rot90_f32, __arm_vcaddq_rot270_f32):  Update builtin calls.
	* config/arm/arm_mve_builtins.def (vcaddq_rot90_u, vcaddq_rot270_u,
	vcaddq_rot90_s, vcaddq_rot270_s, vcaddq_rot90_f, vcaddq_rot270_f):
	Removed.
	(vcaddq_rot90, vcaddq_rot270): New.
	* config/arm/constraints.md (Dz): Include MVE.
	* config/arm/iterators.md (mve_rot): New.
	(supf): Remove VCADDQ_ROT270_S, VCADDQ_ROT270_U, VCADDQ_ROT90_S,
	VCADDQ_ROT90_U.
	(VCADDQ_ROT270, VCADDQ_ROT90): Removed.
	* config/arm/mve.md (mve_vcaddq_rot270_<supf><mode,
	mve_vcaddq_rot90_<supf><mode>, mve_vcaddq_rot270_f<mode>,
	mve_vcaddq_rot90_f<mode>): Removed.
	(mve_vcaddq<mve_rot><mode>, mve_vcaddq<mve_rot><mode>): New.
	* config/arm/unspecs.md (VCADDQ_ROT270_S, VCADDQ_ROT90_S,
	VCADDQ_ROT270_U, VCADDQ_ROT90_U, VCADDQ_ROT270_F,
	VCADDQ_ROT90_F): Removed.
	* config/arm/vec-common.md (cadd<rot><mode>3): New.
  • Loading branch information
TamarChristinaArm committed Dec 16, 2020
1 parent 84747ac commit 9732dc8
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 72 deletions.
38 changes: 22 additions & 16 deletions gcc/config/arm/arm_mve.h
Expand Up @@ -3981,14 +3981,16 @@ __extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b)
{
return __builtin_mve_vcaddq_rot90_uv16qi (__a, __b);
return (uint8x16_t)
__builtin_mve_vcaddq_rot90v16qi ((int8x16_t)__a, (int8x16_t)__b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u8 (uint8x16_t __a, uint8x16_t __b)
{
return __builtin_mve_vcaddq_rot270_uv16qi (__a, __b);
return (uint8x16_t)
__builtin_mve_vcaddq_rot270v16qi ((int8x16_t)__a, (int8x16_t)__b);
}

__extension__ extern __inline uint8x16_t
Expand Down Expand Up @@ -4520,14 +4522,14 @@ __extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b)
{
return __builtin_mve_vcaddq_rot90_sv16qi (__a, __b);
return __builtin_mve_vcaddq_rot90v16qi (__a, __b);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s8 (int8x16_t __a, int8x16_t __b)
{
return __builtin_mve_vcaddq_rot270_sv16qi (__a, __b);
return __builtin_mve_vcaddq_rot270v16qi (__a, __b);
}

__extension__ extern __inline int8x16_t
Expand Down Expand Up @@ -4821,14 +4823,16 @@ __extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b)
{
return __builtin_mve_vcaddq_rot90_uv8hi (__a, __b);
return (uint16x8_t)
__builtin_mve_vcaddq_rot90v8hi ((int16x8_t)__a, (int16x8_t)__b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u16 (uint16x8_t __a, uint16x8_t __b)
{
return __builtin_mve_vcaddq_rot270_uv8hi (__a, __b);
return (uint16x8_t)
__builtin_mve_vcaddq_rot270v8hi ((int16x8_t)__a, (int16x8_t)__b);
}

__extension__ extern __inline uint16x8_t
Expand Down Expand Up @@ -5360,14 +5364,14 @@ __extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b)
{
return __builtin_mve_vcaddq_rot90_sv8hi (__a, __b);
return __builtin_mve_vcaddq_rot90v8hi (__a, __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s16 (int16x8_t __a, int16x8_t __b)
{
return __builtin_mve_vcaddq_rot270_sv8hi (__a, __b);
return __builtin_mve_vcaddq_rot270v8hi (__a, __b);
}

__extension__ extern __inline int16x8_t
Expand Down Expand Up @@ -5661,14 +5665,16 @@ __extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b)
{
return __builtin_mve_vcaddq_rot90_uv4si (__a, __b);
return (uint32x4_t)
__builtin_mve_vcaddq_rot90v4si ((int32x4_t)__a, (int32x4_t)__b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u32 (uint32x4_t __a, uint32x4_t __b)
{
return __builtin_mve_vcaddq_rot270_uv4si (__a, __b);
return (uint32x4_t)
__builtin_mve_vcaddq_rot270v4si ((int32x4_t)__a, (int32x4_t)__b);
}

__extension__ extern __inline uint32x4_t
Expand Down Expand Up @@ -6200,14 +6206,14 @@ __extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b)
{
return __builtin_mve_vcaddq_rot90_sv4si (__a, __b);
return __builtin_mve_vcaddq_rot90v4si (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s32 (int32x4_t __a, int32x4_t __b)
{
return __builtin_mve_vcaddq_rot270_sv4si (__a, __b);
return __builtin_mve_vcaddq_rot270v4si (__a, __b);
}

__extension__ extern __inline int32x4_t
Expand Down Expand Up @@ -17370,14 +17376,14 @@ __extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
{
return __builtin_mve_vcaddq_rot90_fv8hf (__a, __b);
return __builtin_mve_vcaddq_rot90v8hf (__a, __b);
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
{
return __builtin_mve_vcaddq_rot270_fv8hf (__a, __b);
return __builtin_mve_vcaddq_rot270v8hf (__a, __b);
}

__extension__ extern __inline float16x8_t
Expand Down Expand Up @@ -17622,14 +17628,14 @@ __extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
{
return __builtin_mve_vcaddq_rot90_fv4sf (__a, __b);
return __builtin_mve_vcaddq_rot90v4sf (__a, __b);
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
{
return __builtin_mve_vcaddq_rot270_fv4sf (__a, __b);
return __builtin_mve_vcaddq_rot270v4sf (__a, __b);
}

__extension__ extern __inline float32x4_t
Expand Down
10 changes: 4 additions & 6 deletions gcc/config/arm/arm_mve_builtins.def
Expand Up @@ -125,8 +125,6 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si)
Expand Down Expand Up @@ -202,8 +200,6 @@ VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si)
Expand Down Expand Up @@ -268,8 +264,6 @@ VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot90_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot270_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf)
Expand Down Expand Up @@ -892,3 +886,7 @@ VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_vec_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_carry_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_vec_u, v16qi, v8hi, v4si)
VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si)

/* optabs without any suffixes. */
VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf)
VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot270, v16qi, v8hi, v4si, v8hf, v4sf)
2 changes: 1 addition & 1 deletion gcc/config/arm/constraints.md
Expand Up @@ -310,7 +310,7 @@
"@internal
In ARM/Thumb-2 state a vector of constant zeros."
(and (match_code "const_vector")
(match_test "TARGET_NEON && op == CONST0_RTX (mode)")))
(match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)")))

(define_constraint "Da"
"@internal
Expand Down
11 changes: 5 additions & 6 deletions gcc/config/arm/iterators.md
Expand Up @@ -1182,6 +1182,9 @@
(UNSPEC_VCMLA180 "180")
(UNSPEC_VCMLA270 "270")])

(define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90")
(UNSPEC_VCADD270 "_rot270")])

(define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
(UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8")
(UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8")
Expand Down Expand Up @@ -1232,10 +1235,8 @@
(VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
(VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
(VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
(VADDVQ_P_S "s") (VADDVQ_P_U "u")
(VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
(VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
(VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
(VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBRSRQ_N_S "s")
(VBRSRQ_N_U "u") (VCMPEQQ_S "s") (VCMPEQQ_U "u")
(VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
(VCMPNEQ_N_U "u")
(VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
Expand Down Expand Up @@ -1500,8 +1501,6 @@
(define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
Expand Down
53 changes: 16 additions & 37 deletions gcc/config/arm/mve.md
Expand Up @@ -962,34 +962,28 @@
])

;;
;; [vcaddq_rot270_s, vcaddq_rot270_u])
;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
;;
(define_insn "mve_vcaddq_rot270_<supf><mode>"
(define_insn "mve_vcaddq<mve_rot><mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
(match_operand:MVE_2 2 "s_register_operand" "w")]
VCADDQ_ROT270))
VCADD))
]
"TARGET_HAVE_MVE"
"vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #270"
"vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])

;;
;; [vcaddq_rot90_u, vcaddq_rot90_s])
;;
(define_insn "mve_vcaddq_rot90_<supf><mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
(match_operand:MVE_2 2 "s_register_operand" "w")]
VCADDQ_ROT90))
]
"TARGET_HAVE_MVE"
"vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #90"
[(set_attr "type" "mve_move")
])
;; Auto vectorizer pattern for int vcadd
(define_expand "cadd<rot><mode>3"
[(set (match_operand:MVE_2 0 "register_operand")
(unspec:MVE_2 [(match_operand:MVE_2 1 "register_operand")
(match_operand:MVE_2 2 "register_operand")]
VCADD))]
"TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN"
)

;;
;; [vcmpcsq_n_u])
Expand Down Expand Up @@ -2102,32 +2096,17 @@
])

;;
;; [vcaddq_rot270_f])
;;
(define_insn "mve_vcaddq_rot270_f<mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
(match_operand:MVE_0 2 "s_register_operand" "w")]
VCADDQ_ROT270_F))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #270"
[(set_attr "type" "mve_move")
])

;;
;; [vcaddq_rot90_f])
;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
;;
(define_insn "mve_vcaddq_rot90_f<mode>"
(define_insn "mve_vcaddq<mve_rot><mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
(match_operand:MVE_0 2 "s_register_operand" "w")]
VCADDQ_ROT90_F))
VCADD))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #90"
"vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])

Expand Down
6 changes: 0 additions & 6 deletions gcc/config/arm/unspecs.md
Expand Up @@ -598,8 +598,6 @@
VADDVAQ_S
VADDVQ_P_S
VBRSRQ_N_S
VCADDQ_ROT270_S
VCADDQ_ROT90_S
VCMPEQQ_S
VCMPEQQ_N_S
VCMPNEQ_N_S
Expand Down Expand Up @@ -641,8 +639,6 @@
VADDVAQ_U
VADDVQ_P_U
VBRSRQ_N_U
VCADDQ_ROT270_U
VCADDQ_ROT90_U
VCMPEQQ_U
VCMPEQQ_N_U
VCMPNEQ_N_U
Expand Down Expand Up @@ -709,8 +705,6 @@
VABDQ_M_U
VABDQ_F
VADDQ_N_F
VCADDQ_ROT270_F
VCADDQ_ROT90_F
VCMPEQQ_F
VCMPEQQ_N_F
VCMPGEQ_F
Expand Down
10 changes: 10 additions & 0 deletions gcc/config/arm/vec-common.md
Expand Up @@ -205,3 +205,13 @@
(neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
"ARM_HAVE_<MODE>_ARITH"
)

(define_expand "cadd<rot><mode>3"
[(set (match_operand:VF 0 "register_operand")
(unspec:VF [(match_operand:VF 1 "register_operand")
(match_operand:VF 2 "register_operand")]
VCADD))]
"(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
&& ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
)

0 comments on commit 9732dc8

Please sign in to comment.