Skip to content

Commit

Permalink
rename/roq/dispatch1: support EnableIntMoveElim=false
Browse files Browse the repository at this point in the history
(finish refactoring) [TODO] remove useless code
  • Loading branch information
YikeZhou committed Aug 23, 2021
1 parent 39d3280 commit 5eb4af5
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 69 deletions.
24 changes: 19 additions & 5 deletions src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala
Expand Up @@ -155,7 +155,11 @@ class Dispatch1(implicit p: Parameters) extends XSModule with HasExceptionNO {
updatedUop(i).psrc(1) := updatedPsrc2(i)
updatedUop(i).psrc(2) := updatedPsrc3(i)
updatedUop(i).old_pdest := updatedOldPdest(i)
updatedUop(i).debugInfo.eliminatedMove := io.fromRename(i).bits.eliminatedMove
if (EnableIntMoveElim) {
updatedUop(i).debugInfo.eliminatedMove := io.fromRename(i).bits.eliminatedMove
} else {
updatedUop(i).debugInfo.eliminatedMove := DontCare
}
// update commitType
updatedUop(i).ctrl.commitType := updatedCommitType(i)
// update roqIdx, lqIdx, sqIdx
Expand Down Expand Up @@ -264,10 +268,16 @@ class Dispatch1(implicit p: Parameters) extends XSModule with HasExceptionNO {
// send uops to dispatch queues
// Note that if one of their previous instructions cannot enqueue, they should not enter dispatch queue.
// We use notBlockedByPrevious here.
io.toIntDq.needAlloc(i) := io.fromRename(i).valid && isInt(i) && !io.fromRename(i).bits.eliminatedMove
if (EnableIntMoveElim) {
io.toIntDq.needAlloc(i) := io.fromRename(i).valid && isInt(i) && !io.fromRename(i).bits.eliminatedMove
io.toIntDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isInt(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRoq.canAccept && io.toFpDq.canAccept && io.toLsDq.canAccept && !io.fromRename(i).bits.eliminatedMove
} else {
io.toIntDq.needAlloc(i) := io.fromRename(i).valid && isInt(i)
io.toIntDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isInt(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRoq.canAccept && io.toFpDq.canAccept && io.toLsDq.canAccept
}
io.toIntDq.req(i).bits := updatedUop(i)
io.toIntDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isInt(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRoq.canAccept && io.toFpDq.canAccept && io.toLsDq.canAccept && !io.fromRename(i).bits.eliminatedMove

io.toFpDq.needAlloc(i) := io.fromRename(i).valid && isFp(i)
io.toFpDq.req(i).bits := updatedUop(i)
Expand Down Expand Up @@ -298,7 +308,11 @@ class Dispatch1(implicit p: Parameters) extends XSModule with HasExceptionNO {
p"roq ${updatedUop(i).roqIdx}, lq ${updatedUop(i).lqIdx}, sq ${updatedUop(i).sqIdx})\n"
)

io.allocPregs(i).isInt := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.rfWen && (io.fromRename(i).bits.ctrl.ldest =/= 0.U) && !io.fromRename(i).bits.eliminatedMove
if (EnableIntMoveElim) {
io.allocPregs(i).isInt := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.rfWen && (io.fromRename(i).bits.ctrl.ldest =/= 0.U) && !io.fromRename(i).bits.eliminatedMove
} else {
io.allocPregs(i).isInt := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.rfWen && (io.fromRename(i).bits.ctrl.ldest =/= 0.U)
}
io.allocPregs(i).isFp := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.fpWen
io.allocPregs(i).preg := io.fromRename(i).bits.pdest
}
Expand Down
128 changes: 71 additions & 57 deletions src/main/scala/xiangshan/backend/rename/Rename.scala
Expand Up @@ -61,14 +61,22 @@ class Rename(implicit p: Parameters) extends XSModule {
rat.io.walkWen := io.roqCommits.isWalk
}

// decide if given instruction needs allocating a new physical register (CfCtrl: from decode; RoqCommitInfo: from roq)
def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = {
{if(fp) x.ctrl.fpWen else x.ctrl.rfWen && (x.ctrl.ldest =/= 0.U)}
}
def needDestRegCommit[T <: RoqCommitInfo](fp: Boolean, x: T): Bool = {
{if(fp) x.fpWen else x.rfWen && (x.ldest =/= 0.U)}
}

// connect [flush + redirect + walk] ports for __float point__ & __integer__ free list
Seq(fpFreeList, intFreeList).foreach{ fl =>
Seq((fpFreeList, true), (intFreeList, false)).foreach{ case (fl, isFp) =>
fl.flush := io.flush
fl.redirect := io.redirect.valid
fl.walk := io.roqCommits.isWalk
// when isWalk, use stepBack to restore head pointer of free list
// (if ME enabled, stepBack of intFreeList should be useless thus optimized out)
fl.stepBack := PopCount(io.roqCommits.valid.zip(io.roqCommits.info).map{case (v, i) => v && needDestRegCommit(true, i)})
fl.stepBack := PopCount(io.roqCommits.valid.zip(io.roqCommits.info).map{case (v, i) => v && needDestRegCommit(isFp, i)})
// walk has higher priority than allocation and thus we don't use isWalk here
// only when both fp and int free list and dispatch1 has enough space can we do allocation
fl.doAllocate := fl.canAllocate && io.out(0).ready
Expand All @@ -77,14 +85,6 @@ class Rename(implicit p: Parameters) extends XSModule {
// dispatch1 ready ++ float point free list ready ++ int free list ready ++ not walk
val canOut = io.out(0).ready && fpFreeList.canAllocate && intFreeList.canAllocate && !io.roqCommits.isWalk

// decide if given instruction needs allocating a new physical register (CfCtrl: from decode; RoqCommitInfo: from roq)
def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = {
{if(fp) x.ctrl.fpWen else x.ctrl.rfWen && (x.ctrl.ldest =/= 0.U)}
}
def needDestRegCommit[T <: RoqCommitInfo](fp: Boolean, x: T): Bool = {
{if(fp) x.fpWen else x.rfWen && (x.ldest =/= 0.U)}
}


// speculatively assign the instruction with an roqIdx
val validCount = PopCount(io.in.map(_.valid)) // number of instructions waiting to enter roq (from decode)
Expand Down Expand Up @@ -181,37 +181,47 @@ class Rename(implicit p: Parameters) extends XSModule {
uops(i).psrc(2) := fpPhySrcVec(2)
uops(i).old_pdest := Mux(uops(i).ctrl.rfWen, intOldPdest, fpOldPdest)

if (i == 0) {
// calculate meEnable
meEnable(i) := isMove(i) && !isMax.get(uops(i).psrc(0))
} else {
// compare psrc0
psrc_cmp(i-1) := Cat((0 until i).map(j => {
uops(i).psrc(0) === uops(j).psrc(0) && io.in(i).bits.ctrl.isMove && io.in(j).bits.ctrl.isMove
}) /* reverse is not necessary here */)
if (EnableIntMoveElim) {

// calculate meEnable
meEnable(i) := isMove(i) && !(io.renameBypass.lsrc1_bypass(i-1).orR | psrc_cmp(i-1).orR | isMax.get(uops(i).psrc(0)))
}
uops(i).eliminatedMove := meEnable(i)

// send psrc of eliminated move instructions to free list and label them as eliminated
when (meEnable(i)) {
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).valid := true.B
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).bits := uops(i).psrc(0)
XSInfo(io.in(i).valid && io.out(i).valid, p"Move instruction ${Hexadecimal(io.in(i).bits.cf.pc)} eliminated successfully! psrc:${uops(i).psrc(0)}\n")
} .otherwise {
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).valid := false.B
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).bits := DontCare
XSInfo(io.in(i).valid && io.out(i).valid && isMove(i), p"Move instruction ${Hexadecimal(io.in(i).bits.cf.pc)} failed to be eliminated! psrc:${uops(i).psrc(0)}\n")
if (i == 0) {
// calculate meEnable
meEnable(i) := isMove(i) && !isMax.get(uops(i).psrc(0))
} else {
// compare psrc0
psrc_cmp(i-1) := Cat((0 until i).map(j => {
uops(i).psrc(0) === uops(j).psrc(0) && io.in(i).bits.ctrl.isMove && io.in(j).bits.ctrl.isMove
}) /* reverse is not necessary here */)

// calculate meEnable
meEnable(i) := isMove(i) && !(io.renameBypass.lsrc1_bypass(i-1).orR | psrc_cmp(i-1).orR | isMax.get(uops(i).psrc(0)))
}
uops(i).eliminatedMove := meEnable(i)

// send psrc of eliminated move instructions to free list and label them as eliminated
when (meEnable(i)) {
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).valid := true.B
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).bits := uops(i).psrc(0)
XSInfo(io.in(i).valid && io.out(i).valid, p"Move instruction ${Hexadecimal(io.in(i).bits.cf.pc)} eliminated successfully! psrc:${uops(i).psrc(0)}\n")
} .otherwise {
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).valid := false.B
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).bits := DontCare
XSInfo(io.in(i).valid && io.out(i).valid && isMove(i), p"Move instruction ${Hexadecimal(io.in(i).bits.cf.pc)} failed to be eliminated! psrc:${uops(i).psrc(0)}\n")
}

// update pdest
uops(i).pdest := Mux(meEnable(i), uops(i).psrc(0), // move eliminated
Mux(needIntDest(i), intFreeList.allocatePhyReg(i), // normal int inst
Mux(uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen, 0.U // int inst with dst=r0
/* default */, fpFreeList.allocatePhyReg(i)))) // normal fp inst
} else {
uops(i).eliminatedMove := DontCare
psrc_cmp.foreach(_ := DontCare)
// update pdest
uops(i).pdest := Mux(needIntDest(i), intFreeList.allocatePhyReg(i), // normal int inst
Mux(uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen, 0.U // int inst with dst=r0
/* default */, fpFreeList.allocatePhyReg(i))) // normal fp inst
}

// update pdest
uops(i).pdest := Mux(meEnable(i), uops(i).psrc(0), // move eliminated
Mux(needIntDest(i), intFreeList.allocatePhyReg(i), // normal int inst
Mux(uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen, 0.U // int inst with dst=r0
/* default */, fpFreeList.allocatePhyReg(i)))) // normal fp inst

// write speculative rename table
// we update rat later inside commit code
intSpecWen(i) := intFreeList.allocateReq(i) && intFreeList.canAllocate && intFreeList.doAllocate && !io.roqCommits.isWalk
Expand Down Expand Up @@ -367,13 +377,12 @@ class Rename(implicit p: Parameters) extends XSModule {
for (i <- 0 until CommitWidth) {
val info = io.roqCommits.info(i)
XSDebug(io.roqCommits.isWalk && io.roqCommits.valid(i), p"[#$i walk info] pc:${Hexadecimal(info.pc)} " +
p"ldest:${info.ldest} rfWen:${info.rfWen} fpWen:${info.fpWen} eliminatedMove:${info.eliminatedMove} " +
p"ldest:${info.ldest} rfWen:${info.rfWen} fpWen:${info.fpWen} " + { if (EnableIntMoveElim) p"eliminatedMove:${info.eliminatedMove} " else p"" } +
p"pdest:${info.pdest} old_pdest:${info.old_pdest}\n")
}

XSDebug(p"inValidVec: ${Binary(Cat(io.in.map(_.valid)))}\n")
XSInfo(!canOut, p"stall at rename, hasValid:${hasValid}, fpCanAlloc:${fpFreeList.canAllocate}, intCanAlloc:${intFreeList.canAllocate} dispatch1ready:${io.out(0).ready}, isWalk:${io.roqCommits.isWalk}\n")
XSInfo(meEnable.asUInt().orR(), p"meEnableVec:${Binary(meEnable.asUInt)}\n")

intRat.io.debug_rdata <> io.debug_int_rat
fpRat.io.debug_rdata <> io.debug_fp_rat
Expand All @@ -387,22 +396,27 @@ class Rename(implicit p: Parameters) extends XSModule {
XSPerfAccumulate("stall_cycle_fp", hasValid && io.out(0).ready && !fpFreeList.canAllocate && intFreeList.canAllocate && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_int", hasValid && io.out(0).ready && fpFreeList.canAllocate && !intFreeList.canAllocate && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_walk", hasValid && io.out(0).ready && fpFreeList.canAllocate && intFreeList.canAllocate && io.roqCommits.isWalk)
XSPerfAccumulate("move_instr_count", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove)))
XSPerfAccumulate("move_elim_enabled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && meEnable(i))))
XSPerfAccumulate("move_elim_cancelled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))))
XSPerfAccumulate("move_elim_cancelled_psrc_bypass", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR })))
XSPerfAccumulate("move_elim_cancelled_cnt_limit", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax.get(io.out(i).bits.psrc(0)))))
XSPerfAccumulate("move_elim_cancelled_inc_more_than_one", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR })))

// to make sure meEnable functions as expected
for (i <- 0 until RenameWidth) {
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax.get(io.out(i).bits.psrc(0)),
p"ME_CANCELLED: ref counter hits max value (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR },
p"ME_CANCELLED: RAW dependency (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR },
p"ME_CANCELLED: psrc duplicates with former instruction (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")


if (EnableIntMoveElim) {
XSPerfAccumulate("move_instr_count", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove)))
XSPerfAccumulate("move_elim_enabled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && meEnable(i))))
XSPerfAccumulate("move_elim_cancelled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))))
XSPerfAccumulate("move_elim_cancelled_psrc_bypass", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR })))
XSPerfAccumulate("move_elim_cancelled_cnt_limit", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax.get(io.out(i).bits.psrc(0)))))
XSPerfAccumulate("move_elim_cancelled_inc_more_than_one", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR })))

// to make sure meEnable functions as expected
for (i <- 0 until RenameWidth) {
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax.get(io.out(i).bits.psrc(0)),
p"ME_CANCELLED: ref counter hits max value (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR },
p"ME_CANCELLED: RAW dependency (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR },
p"ME_CANCELLED: psrc duplicates with former instruction (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
}
XSDebug(VecInit(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))).asUInt().orR,
p"ME_CANCELLED: pc group [ " + (0 until RenameWidth).map(i => p"fire:${io.out(i).fire()},pc:0x${Hexadecimal(io.in(i).bits.cf.pc)} ").reduceLeft(_ + _) + p"]\n")
XSInfo(meEnable.asUInt().orR(), p"meEnableVec:${Binary(meEnable.asUInt)}\n")
}
XSDebug(VecInit(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))).asUInt().orR,
p"ME_CANCELLED: pc group [ " + (0 until RenameWidth).map(i => p"fire:${io.out(i).fire()},pc:0x${Hexadecimal(io.in(i).bits.cf.pc)} ").reduceLeft(_ + _) + p"]\n")
}
28 changes: 21 additions & 7 deletions src/main/scala/xiangshan/backend/roq/Roq.scala
Expand Up @@ -680,8 +680,12 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
// enqueue logic set 6 writebacked to false
for (i <- 0 until RenameWidth) {
when (canEnqueue(i)) {
eliminatedMove(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove
writebacked(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove
if (EnableIntMoveElim) {
eliminatedMove(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove
writebacked(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove
} else {
writebacked(enqPtrVec(i).value) := false.B
}
val isStu = io.enq.req(i).bits.ctrl.fuType === FuType.stu
store_data_writebacked(enqPtrVec(i).value) := !isStu
}
Expand Down Expand Up @@ -730,7 +734,11 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
wdata.fpWen := req.ctrl.fpWen
wdata.wflags := req.ctrl.fpu.wflags
wdata.commitType := req.ctrl.commitType
wdata.eliminatedMove := req.eliminatedMove
if (EnableIntMoveElim) {
wdata.eliminatedMove := req.eliminatedMove
} else {
wdata.eliminatedMove := DontCare
}
wdata.pdest := req.pdest
wdata.old_pdest := req.old_pdest
wdata.ftqIdx := req.cf.ftqPtr
Expand Down Expand Up @@ -820,8 +828,10 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
XSPerfAccumulate("commitInstr", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid)))
val commitIsMove = deqPtrVec.map(_.value).map(ptr => debug_microOp(ptr).ctrl.isMove)
XSPerfAccumulate("commitInstrMove", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(commitIsMove).map{ case (v, m) => v && m })))
val commitMoveElim = deqPtrVec.map(_.value).map(ptr => debug_microOp(ptr).debugInfo.eliminatedMove)
XSPerfAccumulate("commitInstrMoveElim", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid zip commitMoveElim map { case (v, e) => v && e })))
if (EnableIntMoveElim) {
val commitMoveElim = deqPtrVec.map(_.value).map(ptr => debug_microOp(ptr).debugInfo.eliminatedMove)
XSPerfAccumulate("commitInstrMoveElim", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid zip commitMoveElim map { case (v, e) => v && e })))
}
val commitIsLoad = io.commits.info.map(_.commitType).map(_ === CommitType.LOAD)
val commitLoadValid = io.commits.valid.zip(commitIsLoad).map{ case (v, t) => v && t }
XSPerfAccumulate("commitInstrLoad", Mux(io.commits.isWalk, 0.U, PopCount(commitLoadValid)))
Expand Down Expand Up @@ -883,8 +893,12 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
difftest.io.valid := RegNext(io.commits.valid(i) && !io.commits.isWalk)
difftest.io.pc := RegNext(SignExt(uop.cf.pc, XLEN))
difftest.io.instr := RegNext(uop.cf.instr)
// when committing an eliminated move instruction, we must make sure that skip is properly set to false (output from EXU is random value)
difftest.io.skip := RegNext(Mux(uop.eliminatedMove, false.B, exuOut.isMMIO || exuOut.isPerfCnt))
if (EnableIntMoveElim) {
// when committing an eliminated move instruction, we must make sure that skip is properly set to false (output from EXU is random value)
difftest.io.skip := RegNext(Mux(uop.eliminatedMove, false.B, exuOut.isMMIO || exuOut.isPerfCnt))
} else {
difftest.io.skip := RegNext(exuOut.isMMIO || exuOut.isPerfCnt)
}
difftest.io.isRVC := RegNext(uop.cf.pd.isRVC)
difftest.io.scFailed := RegNext(!uop.diffTestDebugLrScValid &&
uop.ctrl.fuType === FuType.mou &&
Expand Down

0 comments on commit 5eb4af5

Please sign in to comment.