/
avx2.json
201 lines (201 loc) · 6.08 KB
/
avx2.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
{
"llvm_prefix": "llvm.x86.avx2.",
"intrinsics": [
{
"intrinsic": "256_abs_{0.data_type}",
"width": [256],
"llvm": "pabs.{0.data_type_short}",
"ret": "s(8-32)",
"args": ["0"]
},
{
"intrinsic": "256_adds_{0.data_type}",
"width": [256],
"llvm": "padd{0.kind_short}s.{0.data_type_short}",
"ret": "i(8-16)",
"args": ["0", "0"]
},
{
"intrinsic": "256_avg_{0.data_type}",
"width": [256],
"llvm": "pavg.{0.data_type_short}",
"ret": "u(8-16)",
"args": ["0", "0"]
},
{
"intrinsic": "256_hadd_{0.data_type}",
"width": [256],
"llvm": "phadd.{0.data_type_short}",
"ret": "s(16-32)",
"args": ["0", "0"]
},
{
"intrinsic": "256_hadds_epi16",
"width": [256],
"llvm": "phadd.sw",
"ret": "s16",
"args": ["0", "0"]
},
{
"intrinsic": "256_hsub_{0.data_type}",
"width": [256],
"llvm": "phsub.{0.data_type_short}",
"ret": "s(16-32)",
"args": ["0", "0"]
},
{
"intrinsic": "256_hsubs_epi16",
"width": [256],
"llvm": "phsub.sw",
"ret": "s16",
"args": ["0", "0"]
},
{
"intrinsic": "256_madd_epi16",
"width": [256],
"llvm": "pmadd.wd",
"ret": "s32",
"args": ["s16", "s16"]
},
{
"intrinsic": "256_maddubs_epi16",
"width": [256],
"llvm": "pmadd.ub.sw",
"ret": "s16",
"args": ["s8", "s8"]
},
{
"intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}",
"width": [128, 256],
"llvm": "gather.d.{0.data_type_short}{0.width_suffix}",
"ret": ["s32", "f32"],
"args": ["0", "0SPc/S8", "s32", "0s->0", "S32/8"]
},
{
"intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}",
"width": [128, 256],
"llvm": "gather.d.{0.data_type_short}{0.width_suffix}",
"ret": ["s64", "f64"],
"args": ["0", "0SPc/S8", "s32x128", "0s->0", "S32/8"]
},
{
"intrinsic": "{3.width_mm}_mask_i64gather_{0.data_type}",
"width": [128, 256],
"llvm": "gather.q.{0.data_type_short}{0.width_suffix}",
"ret": ["s32x128", "f32x128"],
"args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"]
},
{
"intrinsic": "{0.width_mm}_mask_i64gather_{0.data_type}",
"width": [128, 256],
"llvm": "gather.q.{0.data_type_short}{0.width_suffix}",
"ret": ["s64", "f64"],
"args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"]
},
{
"intrinsic": "{0.width_mm}_maskload_{0.data_type}",
"width": [128, 256],
"llvm": "maskload.{0.data_type_short}{0.width_suffix}",
"ret": ["s(32-64)"],
"args": ["0Pc/S8", "0"]
},
{
"intrinsic": "{2.width_mm}_maskstore_{2.data_type}",
"width": [128, 256],
"llvm": "maskstore.{2.data_type_short}{2.width_suffix}",
"ret": "V",
"args": ["S(32-64)Pm/S8", "1Dv", "2"]
},
{
"intrinsic": "256_max_{0.data_type}",
"width": [256],
"llvm": "pmax{0.kind}.{0.data_type_short}",
"ret": "i(8-32)",
"args": ["0", "0"]
},
{
"intrinsic": "256_min_{0.data_type}",
"width": [256],
"llvm": "pmin{0.kind}.{0.data_type_short}",
"ret": "i(8-32)",
"args": ["0", "0"]
},
{
"intrinsic": "256_movemask_epi8",
"width": [256],
"llvm": "pmovmskb",
"ret": "S32",
"args": ["s8"]
},
{
"intrinsic": "256_mpsadbw_epu8",
"width": [256],
"llvm": "mpsadbw",
"ret": "u16",
"args": ["u8", "u8", "S32/8"]
},
{
"intrinsic": "256_mul_{0.data_type}",
"width": [256],
"llvm": "pmul{0.data_type_short}.dq",
"ret": "i64",
"args": ["0dn", "0dn"]
},
{
"intrinsic": "256_mulhi_{0.data_type}",
"width": [256],
"llvm": "pmulh{0.data_type_short}.w",
"ret": "i16",
"args": ["0", "0"]
},
{
"intrinsic": "256_mulhrs_epi16",
"width": [256],
"llvm": "pmul.hr.sw",
"ret": "s16",
"args": ["0", "0"]
},
{
"intrinsic": "256_pack{0.kind_short}s_{1.data_type}",
"width": [256],
"llvm": "pack{0.kind}s{1.data_type_short}{0.data_type_short}",
"ret": "i(8-16)",
"args": ["0hws", "0hws"]
},
{
"intrinsic": "256_permutevar8x32_{0.data_type}",
"width": [256],
"llvm": "perm{0.data_type_short}",
"ret": ["s32", "f32"],
"args": ["0", "0s"]
},
{
"intrinsic": "256_sad_epu8",
"width": [256],
"llvm": "psad.bw",
"ret": "u8",
"args": ["0", "0"]
},
{
"intrinsic": "256_shuffle_epi8",
"width": [256],
"llvm": "pshuf.b",
"ret": "s8",
"args": ["0", "0"]
},
{
"intrinsic": "256_sign_{0.data_type}",
"width": [256],
"llvm": "psign.{0.data_type_short}",
"ret": "s(8-32)",
"args": ["0", "0"]
},
{
"intrinsic": "256_subs_{0.data_type}",
"width": [256],
"llvm": "psub{0.kind_short}s.{0.data_type_short}",
"ret": "i(8-16)",
"args": ["0", "0"]
}
]
}