-
Notifications
You must be signed in to change notification settings - Fork 4
Description
Motivation
With CoreDSL 2 instruction names can not include dots (.) while there are many extensions using names with a dot (see first & second example). While this might be neglectible for HLS and ISS, it affects further integrations, such as disassembler-Generation.
Therefore I propose to add an optional mnemonic: field to the CoreDSL 2 syntax which can be used to provide the actual instruction name if the name used in CoreDSL does not match the real one.
In addition I oftern run into situation where combining multiple instructions (which minor differences in the encoding/behavior) into a single one (see second & third example below), which of course will end up having an invalid name for that instructions.
To deal with this sort of problem I would like to be able to use similar formating options as already allowed for the assembly: field. For dealing with more complex types of instructions (having non-trivial mappings between encoding/operands and names) we would need to come up with a more powerful variant of this feature (see third example)
Examples
Mnemonics with a dot: Custom Multiply-Accumulate (Pulp/CoreV)
Usage: cv.mac rd, rs1, rs2
Before:
CV_MAC {
encoding: 7'b1001000 :: rs2[4:0] :: rs1[4:0] :: 3'b011 :: rd[4:0] :: 7'b0101011;
assembly:"{name(rd)}, {name(rs1)}, {name(rs2)}";
behavior: {
signed<65> result = (signed)X[rs1] * (signed)X[rs2] + (signed)X[rd];
if(rd != 0) X[rd] = result[31:0];
}
}
Problems:
- wrong mnemonic used in (dis)assembly (underscore instead of dot)
After:
CV_MAC {
mnemonic: "cv.mac";
encoding: 7'b1001000 :: rs2[4:0] :: rs1[4:0] :: 3'b011 :: rd[4:0] :: 7'b0101011;
assembly:"{name(rd)}, {name(rs1)}, {name(rs2)}";
behavior: {
signed<65> result = (signed)X[rs1] * (signed)X[rs2] + (signed)X[rd];
if(rd != 0) X[rd] = result[31:0];
}
}
Potential problems:
- None as long as the
mnemonicfield is optional - Upper case for Instruction names vs. lower case or mnemonic?
Trivial mnemonic formatting: Vector Strided Segment Loads (RVV)
Usage: vlsseg<nfields>e<eew>.v vd, (rs1), rs2, vm
Details:
nfields=1...8eew=8,16,32,64- Results in 32 Instructions
- Only define 1 (or 4) times with nfields (or nfields+eew) taken from encoding?
- Many similar examples in RVV!
For now let's only consider nfields. (eew has non-trivial encoding)
Before (combined):
VLSSEGE64_V {
encoding: nf[2:0] :: 1'b0 :: 2'b10 :: vm[0:0] :: rs2[4:0] :: rs1[4:0] :: 3'b111 :: vd[4:0] :: 7'b0000111;
assembly:"{name(vd)}, {name(rs1)}, {name(vm)}";
behavior: {
unsigned<4> nfields = nf + 1;
... // call to external softvector lib
}
}
Problems:
- Wrong mnemonic used in (dis)assembly
- Can not distinguish between the 8 variants
Before (separate):
VLSSEG1E64_V {
encoding: 2'b00 :: 1'b0 :: 2'b10 :: vm[0:0] :: rs2[4:0] :: rs1[4:0] :: 3'b111 :: vd[4:0] :: 7'b0000111;
assembly:"{name(vd)}, {name(rs1)}, {name(vm)}";
behavior: {
unsigned<4> nfields = 1; // nf + 1
... // call to external softvector lib
}
}
VLSSEG2E64_V { ... }
VLSSEG3E64_V { ... }
VLSSEG4E64_V { ... }
VLSSEG5E64_V { ... }
VLSSEG6E64_V { ... }
VLSSEG7E64_V { ... }
VLSSEG8E64_V { ... }
Problems:
- much redundant code
- wrong mnemonic (underscore instead of dot)
After (combined):
VLSSEGE64_V {
mnemonic: "vlsseg{nf+1}e64.v";
encoding: nf[2:0] :: 1'b0 :: 2'b10 :: vm[0:0] :: rs2[4:0] :: rs1[4:0] :: 3'b111 :: vd[4:0] :: 7'b0000111;
assembly:"{name(vd)}, {name(rs1)}, {name(vm)}";
behavior: {
unsigned<4> nfields = nf + 1;
... // call to external softvector lib
}
}
Potential problems:
- allow access to operands during formatting?
- allow
{imm:#08x}style formatting similary to assembly definition?
Non-trivial mnemonic formatting : Byte Unpacking (RVP)
Usage:
SUNPKD810 rd, rs1(Signed Unpacking Bytes 1 & 0)SUNPKD820 rd, rs1(Signed Unpacking Bytes 2 & 0)SUNPKD830 rd, rs1(Signed Unpacking Bytes 3 & 0)SUNPKD831 rd, rs1(Signed Unpacking Bytes 3 & 1)SUNPKD832 rd, rs1(Signed Unpacking Bytes 3 & 2)
Details:
- There are many further instructions in RVP
- i.e. having suffix: BB(Bottom/Bottom), TT(Top/Top), BT(Bottom/Top), TB(Top/Bottom)
Before (combined):
SUNPKD8 { // or SUNPKD8XY
encoding: 7'b1010110 :: code[4:0] :: rs1[4:0] :: 3'b000 :: rd[4:0] :: 7'b1110111;
assembly:"{name(rs1)}, {name(rd)}";
behavior: {
if(rd != 0) {
unsigned<32> rs1_val = X[rs1];
if(code == 5'b01000) { // SUNPKD810
signed<8> rs1_val_hi = rs1_val[15:8];
signed<8> rs1_val_lo = rs1_val[7:0];
} else if (code == 5'b01001) { // SUNPKD820
signed<8> rs1_val_hi = rs1_val[23:16];
signed<8> rs1_val_lo = rs1_val[7:0];
} else if (code == 5'b01010) { // SUNPKD830
signed<8> rs1_val_hi = rs1_val[31:24];
signed<8> rs1_val_lo = rs1_val[7:0];
} else if (code == 5'b01011) { // SUNPKD831
signed<8> rs1_val_hi = rs1_val[31:24];
signed<8> rs1_val_lo = rs1_val[15:8];
} else if (code == 5'b10011) { // SUNPKD832
signed<8> rs1_val_hi = rs1_val[31:24];
signed<8> rs1_val_lo = rs1_val[23:16];
} else {
raise(0, 2); // Invalid instruction
}
X[rd] = (signed<16>)rs1_val_hi :: (unsigned<16>)(signed<16>)rs1_val_lo;
}
}
}
Problems:
- Wrong mnemonic used in (dis)assembly
- can not distinguish between the 5 variants
Before (separate):
SUNPKD810 {
encoding: 7'b1010110 :: 5'b01000 :: rs1[4:0] :: 3'b000 :: rd[4:0] :: 7'b1110111;
assembly:"{name(rs1)}, {name(rd)}";
behavior: {
if(rd != 0) {
unsigned<32> rs1_val = X[rs1];
signed<8> rs1_val_hi = rs1_val[15:8];
signed<8> rs1_val_lo = rs1_val[7:0];
X[rd] = (signed<16>)rs1_val_hi :: (unsigned<16>)(signed<16>)rs1_val_lo;
}
}
}
SUNPKD820 { ... }
SUNPKD830 { ... }
SUNPKD831 { ... }
SUNPKD832 { ... }
Problems:
- Too much redundant code
Before (separate + helper function):
unsigned<32> sunpkd8_helper(unsigned<32> data, unsigned<5> code) {
if(code == 5'b01000) { // SUNPKD810
signed<8> rs1_val_hi = rs1_val[15:8];
signed<8> rs1_val_lo = rs1_val[7:0];
} else if (code == 5'b01001) { // SUNPKD820
signed<8> rs1_val_hi = rs1_val[23:16];
signed<8> rs1_val_lo = rs1_val[7:0];
} else if (code == 5'b01010) { // SUNPKD830
signed<8> rs1_val_hi = rs1_val[31:24];
signed<8> rs1_val_lo = rs1_val[7:0];
} else if (code == 5'b01011) { // SUNPKD831
signed<8> rs1_val_hi = rs1_val[31:24];
signed<8> rs1_val_lo = rs1_val[15:8];
} else if (code == 5'b10011) { // SUNPKD832
signed<8> rs1_val_hi = rs1_val[31:24];
signed<8> rs1_val_lo = rs1_val[23:16];
} else {
raise(0, 2); // Invalid instruction
}
return (signed<16>)rs1_val_hi :: (unsigned<16>)(signed<16>)rs1_val_lo;
}
SUNPKD810 {
encoding: 7'b1010110 :: 5'b01000 :: rs1[4:0] :: 3'b000 :: rd[4:0] :: 7'b1110111;
assembly:"{name(rs1)}, {name(rd)}";
behavior: {
if(rd != 0) {
X[rd] = sunpkd8_helper(X[rs1], 5'b01000)
}
}
}
SUNPKD820 { ... }
SUNPKD830 { ... }
SUNPKD831 { ... }
SUNPKD832 { ... }
Problem:
- Less intuitive
- Encoding etc. still redundant
After (combined only):
string decode_xy(unsigend<5> code) {
if(code == 5'b01000) { // SUNPKD810
return "10";
} else if (code == 5'b01001) { // SUNPKD820
return "20";
} else if (code == 5'b01010) { // SUNPKD830
return "30";
} else if (code == 5'b01011) { // SUNPKD831
return "31";
} else if (code == 5'b10011) { // SUNPKD832
return "32";
} else {
return "";
}
}
SUNPKD8 { // or SUNPKD8XY
mnemonic: "sunpkd8{decode_xy(code)}"
encoding: 7'b1010110 :: code[4:0] :: rs1[4:0] :: 3'b000 :: rd[4:0] :: 7'b1110111;
assembly:"{name(rs1)}, {name(rd)}";
behavior: {
if(rd != 0) {
unsigned<32> rs1_val = X[rs1];
if(code == 5'b01000) { // SUNPKD810
signed<8> rs1_val_hi = rs1_val[15:8];
signed<8> rs1_val_lo = rs1_val[7:0];
} else if (code == 5'b01001) { // SUNPKD820
signed<8> rs1_val_hi = rs1_val[23:16];
signed<8> rs1_val_lo = rs1_val[7:0];
} else if (code == 5'b01010) { // SUNPKD830
signed<8> rs1_val_hi = rs1_val[31:24];
signed<8> rs1_val_lo = rs1_val[7:0];
} else if (code == 5'b01011) { // SUNPKD831
signed<8> rs1_val_hi = rs1_val[31:24];
signed<8> rs1_val_lo = rs1_val[15:8];
} else if (code == 5'b10011) { // SUNPKD832
signed<8> rs1_val_hi = rs1_val[31:24];
signed<8> rs1_val_lo = rs1_val[23:16];
} else {
raise(0, 2); // Invalid instruction
}
X[rd] = (signed<16>)rs1_val_hi :: (unsigned<16>)(signed<16>)rs1_val_lo;
}
}
}
Potential problems:
- See previous example
- needs string type?
- allow to calling helper functions during formatting?
- BTW:
{name(...)}is also allowed and backend-implementation specific which is a bit unintuitive. With the proposed change, this could be implemented as (external) function instead.
- BTW: