@@ -1222,20 +1222,20 @@ def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>;
1222
1222
// F64 division
1223
1223
//
1224
1224
def FRCP64r :
1225
- NVPTXInst <(outs Float64Regs:$dst),
1226
- (ins Float64Regs:$b),
1227
- "rcp.rn.f64 \t$dst, $b; ",
1228
- [(set f64:$dst, (fdiv f64imm_1, f64:$b))]>;
1225
+ BasicNVPTXInst <(outs Float64Regs:$dst),
1226
+ (ins Float64Regs:$b),
1227
+ "rcp.rn.f64",
1228
+ [(set f64:$dst, (fdiv f64imm_1, f64:$b))]>;
1229
1229
def FDIV64rr :
1230
- NVPTXInst <(outs Float64Regs:$dst),
1231
- (ins Float64Regs:$a, Float64Regs:$b),
1232
- "div.rn.f64 \t$dst, $a, $b; ",
1233
- [(set f64:$dst, (fdiv f64:$a, f64:$b))]>;
1230
+ BasicNVPTXInst <(outs Float64Regs:$dst),
1231
+ (ins Float64Regs:$a, Float64Regs:$b),
1232
+ "div.rn.f64",
1233
+ [(set f64:$dst, (fdiv f64:$a, f64:$b))]>;
1234
1234
def FDIV64ri :
1235
- NVPTXInst <(outs Float64Regs:$dst),
1236
- (ins Float64Regs:$a, f64imm:$b),
1237
- "div.rn.f64 \t$dst, $a, $b; ",
1238
- [(set f64:$dst, (fdiv f64:$a, fpimm:$b))]>;
1235
+ BasicNVPTXInst <(outs Float64Regs:$dst),
1236
+ (ins Float64Regs:$a, f64imm:$b),
1237
+ "div.rn.f64",
1238
+ [(set f64:$dst, (fdiv f64:$a, fpimm:$b))]>;
1239
1239
1240
1240
// fdiv will be converted to rcp
1241
1241
// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
@@ -1253,42 +1253,42 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
1253
1253
1254
1254
1255
1255
def FRCP32_approx_r_ftz :
1256
- NVPTXInst <(outs Float32Regs:$dst),
1257
- (ins Float32Regs:$b),
1258
- "rcp.approx.ftz.f32 \t$dst, $b; ",
1259
- [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>,
1260
- Requires<[doF32FTZ]>;
1256
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1257
+ (ins Float32Regs:$b),
1258
+ "rcp.approx.ftz.f32",
1259
+ [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>,
1260
+ Requires<[doF32FTZ]>;
1261
1261
def FRCP32_approx_r :
1262
- NVPTXInst <(outs Float32Regs:$dst),
1263
- (ins Float32Regs:$b),
1264
- "rcp.approx.f32 \t$dst, $b; ",
1265
- [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
1262
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1263
+ (ins Float32Regs:$b),
1264
+ "rcp.approx.f32",
1265
+ [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
1266
1266
1267
1267
//
1268
1268
// F32 Approximate division
1269
1269
//
1270
1270
def FDIV32approxrr_ftz :
1271
- NVPTXInst <(outs Float32Regs:$dst),
1272
- (ins Float32Regs:$a, Float32Regs:$b),
1273
- "div.approx.ftz.f32 \t$dst, $a, $b; ",
1274
- [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
1275
- Requires<[doF32FTZ]>;
1271
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1272
+ (ins Float32Regs:$a, Float32Regs:$b),
1273
+ "div.approx.ftz.f32",
1274
+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
1275
+ Requires<[doF32FTZ]>;
1276
1276
def FDIV32approxri_ftz :
1277
- NVPTXInst <(outs Float32Regs:$dst),
1278
- (ins Float32Regs:$a, f32imm:$b),
1279
- "div.approx.ftz.f32 \t$dst, $a, $b; ",
1280
- [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
1281
- Requires<[doF32FTZ]>;
1277
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1278
+ (ins Float32Regs:$a, f32imm:$b),
1279
+ "div.approx.ftz.f32",
1280
+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
1281
+ Requires<[doF32FTZ]>;
1282
1282
def FDIV32approxrr :
1283
- NVPTXInst <(outs Float32Regs:$dst),
1284
- (ins Float32Regs:$a, Float32Regs:$b),
1285
- "div.approx.f32 \t$dst, $a, $b; ",
1286
- [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
1283
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1284
+ (ins Float32Regs:$a, Float32Regs:$b),
1285
+ "div.approx.f32",
1286
+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
1287
1287
def FDIV32approxri :
1288
- NVPTXInst <(outs Float32Regs:$dst),
1289
- (ins Float32Regs:$a, f32imm:$b),
1290
- "div.approx.f32 \t$dst, $a, $b; ",
1291
- [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
1288
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1289
+ (ins Float32Regs:$a, f32imm:$b),
1290
+ "div.approx.f32",
1291
+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
1292
1292
//
1293
1293
// F32 Semi-accurate reciprocal
1294
1294
//
@@ -1312,66 +1312,72 @@ def : Pat<(fdiv_full f32imm_1, f32:$b),
1312
1312
// F32 Semi-accurate division
1313
1313
//
1314
1314
def FDIV32rr_ftz :
1315
- NVPTXInst <(outs Float32Regs:$dst),
1316
- (ins Float32Regs:$a, Float32Regs:$b),
1317
- "div.full.ftz.f32 \t$dst, $a, $b; ",
1318
- [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>,
1319
- Requires<[doF32FTZ]>;
1315
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1316
+ (ins Float32Regs:$a, Float32Regs:$b),
1317
+ "div.full.ftz.f32",
1318
+ [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>,
1319
+ Requires<[doF32FTZ]>;
1320
1320
def FDIV32ri_ftz :
1321
- NVPTXInst <(outs Float32Regs:$dst),
1322
- (ins Float32Regs:$a, f32imm:$b),
1323
- "div.full.ftz.f32 \t$dst, $a, $b; ",
1324
- [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
1325
- Requires<[doF32FTZ]>;
1321
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1322
+ (ins Float32Regs:$a, f32imm:$b),
1323
+ "div.full.ftz.f32",
1324
+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
1325
+ Requires<[doF32FTZ]>;
1326
1326
def FDIV32rr :
1327
- NVPTXInst <(outs Float32Regs:$dst),
1328
- (ins Float32Regs:$a, Float32Regs:$b),
1329
- "div.full.f32 \t$dst, $a, $b; ",
1330
- [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
1327
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1328
+ (ins Float32Regs:$a, Float32Regs:$b),
1329
+ "div.full.f32",
1330
+ [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
1331
1331
def FDIV32ri :
1332
- NVPTXInst <(outs Float32Regs:$dst),
1333
- (ins Float32Regs:$a, f32imm:$b),
1334
- "div.full.f32 \t$dst, $a, $b; ",
1335
- [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
1332
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1333
+ (ins Float32Regs:$a, f32imm:$b),
1334
+ "div.full.f32",
1335
+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
1336
1336
//
1337
1337
// F32 Accurate reciprocal
1338
1338
//
1339
+
1340
+ def fdiv_ftz : PatFrag<(ops node:$a, node:$b),
1341
+ (fdiv node:$a, node:$b), [{
1342
+ return getDivF32Level(N) == NVPTX::DivPrecisionLevel::IEEE754;
1343
+ }]>;
1344
+
1339
1345
def FRCP32r_prec_ftz :
1340
- NVPTXInst <(outs Float32Regs:$dst),
1341
- (ins Float32Regs:$b),
1342
- "rcp.rn.ftz.f32 \t$dst, $b; ",
1343
- [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>,
1344
- Requires<[doF32FTZ]>;
1346
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1347
+ (ins Float32Regs:$b),
1348
+ "rcp.rn.ftz.f32",
1349
+ [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>,
1350
+ Requires<[doF32FTZ]>;
1345
1351
def FRCP32r_prec :
1346
- NVPTXInst <(outs Float32Regs:$dst),
1347
- (ins Float32Regs:$b),
1348
- "rcp.rn.f32 \t$dst, $b; ",
1349
- [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>;
1352
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1353
+ (ins Float32Regs:$b),
1354
+ "rcp.rn.f32",
1355
+ [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>;
1350
1356
//
1351
1357
// F32 Accurate division
1352
1358
//
1353
1359
def FDIV32rr_prec_ftz :
1354
- NVPTXInst <(outs Float32Regs:$dst),
1355
- (ins Float32Regs:$a, Float32Regs:$b),
1356
- "div.rn.ftz.f32 \t$dst, $a, $b; ",
1357
- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1358
- Requires<[doF32FTZ]>;
1360
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1361
+ (ins Float32Regs:$a, Float32Regs:$b),
1362
+ "div.rn.ftz.f32",
1363
+ [(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>,
1364
+ Requires<[doF32FTZ]>;
1359
1365
def FDIV32ri_prec_ftz :
1360
- NVPTXInst <(outs Float32Regs:$dst),
1361
- (ins Float32Regs:$a, f32imm:$b),
1362
- "div.rn.ftz.f32 \t$dst, $a, $b; ",
1363
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1364
- Requires<[doF32FTZ]>;
1366
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1367
+ (ins Float32Regs:$a, f32imm:$b),
1368
+ "div.rn.ftz.f32",
1369
+ [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>,
1370
+ Requires<[doF32FTZ]>;
1365
1371
def FDIV32rr_prec :
1366
- NVPTXInst <(outs Float32Regs:$dst),
1367
- (ins Float32Regs:$a, Float32Regs:$b),
1368
- "div.rn.f32 \t$dst, $a, $b; ",
1369
- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>;
1372
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1373
+ (ins Float32Regs:$a, Float32Regs:$b),
1374
+ "div.rn.f32",
1375
+ [(set f32:$dst, (fdiv f32:$a, f32:$b))]>;
1370
1376
def FDIV32ri_prec :
1371
- NVPTXInst <(outs Float32Regs:$dst),
1372
- (ins Float32Regs:$a, f32imm:$b),
1373
- "div.rn.f32 \t$dst, $a, $b; ",
1374
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>;
1377
+ BasicNVPTXInst <(outs Float32Regs:$dst),
1378
+ (ins Float32Regs:$a, f32imm:$b),
1379
+ "div.rn.f32",
1380
+ [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>;
1375
1381
1376
1382
//
1377
1383
// FMA
0 commit comments