@@ -12,18 +12,272 @@ define <2 x half> @v_test_cvt_v2f32_v2f16(<2 x float> %src) {
12
12
ret <2 x half > %res
13
13
}
14
14
15
- define half @fptrunc_v2f32_v2f16_then_extract (< 2 x float > %src ) {
16
- ; GFX950-LABEL: fptrunc_v2f32_v2f16_then_extract :
15
+ define < 3 x half > @v_test_cvt_v3f32_v3f16 (< 3 x float > %src ) {
16
+ ; GFX950-LABEL: v_test_cvt_v3f32_v3f16 :
17
17
; GFX950: ; %bb.0:
18
18
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19
+ ; GFX950-NEXT: v_cvt_f16_f32_e32 v2, v2
19
20
; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
20
- ; GFX950-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
21
+ ; GFX950-NEXT: v_mov_b32_e32 v1, v2
22
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
23
+ %res = fptrunc <3 x float > %src to <3 x half >
24
+ ret <3 x half > %res
25
+ }
26
+
27
+ define <4 x half > @v_test_cvt_v4f32_v4f16 (<4 x float > %src ) {
28
+ ; GFX950-LABEL: v_test_cvt_v4f32_v4f16:
29
+ ; GFX950: ; %bb.0:
30
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
32
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
33
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
34
+ %res = fptrunc <4 x float > %src to <4 x half >
35
+ ret <4 x half > %res
36
+ }
37
+
38
+ define <8 x half > @v_test_cvt_v8f32_v2f16 (<8 x float > %src ) {
39
+ ; GFX950-LABEL: v_test_cvt_v8f32_v2f16:
40
+ ; GFX950: ; %bb.0:
41
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
43
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
44
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v2, v4, v5
45
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v3, v6, v7
46
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
47
+ %res = fptrunc <8 x float > %src to <8 x half >
48
+ ret <8 x half > %res
49
+ }
50
+
51
+ define <16 x half > @v_test_cvt_v16f32_v16f16 (<16 x float > %src ) {
52
+ ; GFX950-LABEL: v_test_cvt_v16f32_v16f16:
53
+ ; GFX950: ; %bb.0:
54
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
56
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
57
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v2, v4, v5
58
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v3, v6, v7
59
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v4, v8, v9
60
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v5, v10, v11
61
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v6, v12, v13
62
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v7, v14, v15
63
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
64
+ %res = fptrunc <16 x float > %src to <16 x half >
65
+ ret <16 x half > %res
66
+ }
67
+
68
+ define half @fptrunc_v2f32_v2f16_extract_uses (<2 x float > %src ) {
69
+ ; GFX950-LABEL: fptrunc_v2f32_v2f16_extract_uses:
70
+ ; GFX950: ; %bb.0:
71
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72
+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
73
+ ; GFX950-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
21
74
; GFX950-NEXT: s_setpc_b64 s[30:31]
22
75
%vec_half = fptrunc <2 x float > %src to <2 x half >
23
- %first = extractelement <2 x half > %vec_half , i64 1
24
- %second = extractelement <2 x half > %vec_half , i64 0
25
- %res = fadd half %first , %second
26
- ret half %res
76
+ %f0 = extractelement <2 x half > %vec_half , i64 0
77
+ %f1 = extractelement <2 x half > %vec_half , i64 1
78
+ %rslt = fadd half %f0 , %f1
79
+ ret half %rslt
80
+ }
81
+
82
+ define half @fptrunc_v3f32_v3f16_extract_uses (<3 x float > %vec_float ) {
83
+ ; GFX950-SDAG-LABEL: fptrunc_v3f32_v3f16_extract_uses:
84
+ ; GFX950-SDAG: ; %bb.0:
85
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86
+ ; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
87
+ ; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
88
+ ; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
89
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
90
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v2, v0
91
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
92
+ ;
93
+ ; GFX950-GISEL-LABEL: fptrunc_v3f32_v3f16_extract_uses:
94
+ ; GFX950-GISEL: ; %bb.0:
95
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96
+ ; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
97
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
98
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
99
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v2, v0
100
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
101
+ %vec_half = fptrunc <3 x float > %vec_float to <3 x half >
102
+ %f0 = extractelement <3 x half > %vec_half , i64 0
103
+ %f1 = extractelement <3 x half > %vec_half , i64 1
104
+ %f2 = extractelement <3 x half > %vec_half , i64 2
105
+ %sum0 = fadd half %f0 , %f1
106
+ %rslt = fadd half %f2 , %sum0
107
+ ret half %rslt
108
+ }
109
+
110
+ define half @fptrunc_v4f32_v4f16_extract_uses (<4 x float > %vec_float ) {
111
+ ; GFX950-SDAG-LABEL: fptrunc_v4f32_v4f16_extract_uses:
112
+ ; GFX950-SDAG: ; %bb.0:
113
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v2, v2, v3
115
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
116
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
117
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v1, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
118
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
119
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
120
+ ;
121
+ ; GFX950-GISEL-LABEL: fptrunc_v4f32_v4f16_extract_uses:
122
+ ; GFX950-GISEL: ; %bb.0:
123
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
125
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
126
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
127
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
128
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
129
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
130
+ %vec_half = fptrunc <4 x float > %vec_float to <4 x half >
131
+ %f0 = extractelement <4 x half > %vec_half , i64 0
132
+ %f1 = extractelement <4 x half > %vec_half , i64 1
133
+ %f2 = extractelement <4 x half > %vec_half , i64 2
134
+ %f3 = extractelement <4 x half > %vec_half , i64 3
135
+ %sum0 = fadd half %f0 , %f1
136
+ %sum1 = fadd half %f2 , %f3
137
+ %rslt = fadd half %sum0 , %sum1
138
+ ret half %rslt
139
+ }
140
+
141
+ define half @fptrunc_v8f32_v8f16_extract_uses (<8 x float > %vec_float ) {
142
+ ; GFX950-SDAG-LABEL: fptrunc_v8f32_v8f16_extract_uses:
143
+ ; GFX950-SDAG: ; %bb.0:
144
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v6, v6, v7
146
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v4, v4, v5
147
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v2, v2, v3
148
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
149
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
150
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v1, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
151
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v2, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
152
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v3, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
153
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
154
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v1, v2, v3
155
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
156
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
157
+ ;
158
+ ; GFX950-GISEL-LABEL: fptrunc_v8f32_v8f16_extract_uses:
159
+ ; GFX950-GISEL: ; %bb.0:
160
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
162
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
163
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v2, v4, v5
164
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v3, v6, v7
165
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
166
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
167
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
168
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
169
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
170
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v1, v2, v3
171
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
172
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
173
+ %vec_half = fptrunc <8 x float > %vec_float to <8 x half >
174
+ %f0 = extractelement <8 x half > %vec_half , i64 0
175
+ %f1 = extractelement <8 x half > %vec_half , i64 1
176
+ %f2 = extractelement <8 x half > %vec_half , i64 2
177
+ %f3 = extractelement <8 x half > %vec_half , i64 3
178
+ %f4 = extractelement <8 x half > %vec_half , i64 4
179
+ %f5 = extractelement <8 x half > %vec_half , i64 5
180
+ %f6 = extractelement <8 x half > %vec_half , i64 6
181
+ %f7 = extractelement <8 x half > %vec_half , i64 7
182
+ %sum0 = fadd half %f0 , %f1
183
+ %sum1 = fadd half %f2 , %f3
184
+ %sum2 = fadd half %f4 , %f5
185
+ %sum3 = fadd half %f6 , %f7
186
+ %sum4 = fadd half %sum0 , %sum1
187
+ %sum5 = fadd half %sum2 , %sum3
188
+ %rslt = fadd half %sum4 , %sum5
189
+ ret half %rslt
190
+ }
191
+
192
+ define half @fptrunc_v16f32_v16f16_extract_uses (<16 x float > %vec_float ) {
193
+ ; GFX950-SDAG-LABEL: fptrunc_v16f32_v16f16_extract_uses:
194
+ ; GFX950-SDAG: ; %bb.0:
195
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v14, v14, v15
197
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v12, v12, v13
198
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v10, v10, v11
199
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v8, v8, v9
200
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v6, v6, v7
201
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v4, v4, v5
202
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v2, v2, v3
203
+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
204
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
205
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v1, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
206
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v2, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
207
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v3, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
208
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v4, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
209
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v5, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
210
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v6, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
211
+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v7, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
212
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
213
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v1, v2, v3
214
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v2, v4, v5
215
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v3, v6, v7
216
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
217
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v1, v2, v3
218
+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
219
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
220
+ ;
221
+ ; GFX950-GISEL-LABEL: fptrunc_v16f32_v16f16_extract_uses:
222
+ ; GFX950-GISEL: ; %bb.0:
223
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
225
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
226
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v2, v4, v5
227
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v3, v6, v7
228
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v4, v8, v9
229
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v5, v10, v11
230
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v6, v12, v13
231
+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v7, v14, v15
232
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
233
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
234
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
235
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
236
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
237
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
238
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
239
+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
240
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
241
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v1, v2, v3
242
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v2, v4, v5
243
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v3, v6, v7
244
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
245
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v1, v2, v3
246
+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
247
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
248
+ %vec_half = fptrunc <16 x float > %vec_float to <16 x half >
249
+ %f0 = extractelement <16 x half > %vec_half , i64 0
250
+ %f1 = extractelement <16 x half > %vec_half , i64 1
251
+ %f2 = extractelement <16 x half > %vec_half , i64 2
252
+ %f3 = extractelement <16 x half > %vec_half , i64 3
253
+ %f4 = extractelement <16 x half > %vec_half , i64 4
254
+ %f5 = extractelement <16 x half > %vec_half , i64 5
255
+ %f6 = extractelement <16 x half > %vec_half , i64 6
256
+ %f7 = extractelement <16 x half > %vec_half , i64 7
257
+ %f8 = extractelement <16 x half > %vec_half , i64 8
258
+ %f9 = extractelement <16 x half > %vec_half , i64 9
259
+ %f10 = extractelement <16 x half > %vec_half , i64 10
260
+ %f11 = extractelement <16 x half > %vec_half , i64 11
261
+ %f12 = extractelement <16 x half > %vec_half , i64 12
262
+ %f13 = extractelement <16 x half > %vec_half , i64 13
263
+ %f14 = extractelement <16 x half > %vec_half , i64 14
264
+ %f15 = extractelement <16 x half > %vec_half , i64 15
265
+ %sum0 = fadd half %f0 , %f1
266
+ %sum1 = fadd half %f2 , %f3
267
+ %sum2 = fadd half %f4 , %f5
268
+ %sum3 = fadd half %f6 , %f7
269
+ %sum4 = fadd half %f8 , %f9
270
+ %sum5 = fadd half %f10 , %f11
271
+ %sum6 = fadd half %f12 , %f13
272
+ %sum7 = fadd half %f14 , %f15
273
+ %sum8 = fadd half %sum0 , %sum1
274
+ %sum9 = fadd half %sum2 , %sum3
275
+ %sum10 = fadd half %sum4 , %sum5
276
+ %sum11 = fadd half %sum6 , %sum7
277
+ %sum12 = fadd half %sum8 , %sum9
278
+ %sum13 = fadd half %sum10 , %sum11
279
+ %rslt = fadd half %sum12 , %sum13
280
+ ret half %rslt
27
281
}
28
282
29
283
define <2 x half > @v_test_cvt_v2f64_v2f16 (<2 x double > %src ) {
0 commit comments