Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix timing of memblock #3062

Merged
merged 17 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/main/scala/top/Configs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ class MinimalConfig(n: Int = 1) extends Config(
VlMergeBufferSize = 8,
VsMergeBufferSize = 8,
UopWritebackWidth = 2,
SplitBufferSize = 8,
// ==============================
RobSize = 48,
RabSize = 96,
Expand Down
4 changes: 1 addition & 3 deletions src/main/scala/xiangshan/Parameters.scala
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ case class XSCoreParameters
UopWritebackWidth: Int = 2,
VLUopWritebackWidth: Int = 2,
VSUopWritebackWidth: Int = 1,
SplitBufferSize: Int = 8,
VSegmentBufferSize: Int = 8,
// ==============================
UncacheBufferSize: Int = 4,
Expand All @@ -232,7 +231,7 @@ case class XSCoreParameters
EnableLdVioCheckAfterReset: Boolean = true,
EnableSoftPrefetchAfterReset: Boolean = true,
EnableCacheErrorAfterReset: Boolean = true,
EnableAccurateLoadError: Boolean = true,
EnableAccurateLoadError: Boolean = false,
EnableUncacheWriteOutstanding: Boolean = false,
EnableStorePrefetchAtIssue: Boolean = false,
EnableStorePrefetchAtCommit: Boolean = false,
Expand Down Expand Up @@ -731,7 +730,6 @@ trait HasXSParameter {
def UopWritebackWidth = coreParams.UopWritebackWidth
def VLUopWritebackWidth = coreParams.VLUopWritebackWidth
def VSUopWritebackWidth = coreParams.VSUopWritebackWidth
def SplitBufferSize = coreParams.SplitBufferSize
def VSegmentBufferSize = coreParams.VSegmentBufferSize
def UncacheBufferSize = coreParams.UncacheBufferSize
def EnableLoadToLoadForward = coreParams.EnableLoadToLoadForward
Expand Down
28 changes: 21 additions & 7 deletions src/main/scala/xiangshan/backend/MemBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import xiangshan.mem._
import xiangshan.mem.mdp._
import xiangshan.frontend.HasInstrMMIOConst
import xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher}
import xiangshan.backend.datapath.NewPipelineConnect

trait HasMemBlockParameters extends HasXSParameter {
// number of memory units
Expand Down Expand Up @@ -394,7 +395,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
}
// load prefetch to l1 Dcache
l1PrefetcherOpt match {
case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = true, name = Some("pf_queue_to_ldu_reg"))
case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg"))
case None =>
l1_pf_req.valid := false.B
l1_pf_req.bits := DontCare
Expand Down Expand Up @@ -651,7 +652,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val pmp = Module(new PMP())
pmp.io.distribute_csr <> csrCtrl.distribute_csr

val pmp_check = VecInit(Seq.fill(DTlbSize)(Module(new PMPChecker(4)).io))
val pmp_check = VecInit(Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true)).io))
for ((p,d) <- pmp_check zip dtlb_pmps) {
p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
require(p.req.bits.size.getWidth == d.bits.size.getWidth)
Expand Down Expand Up @@ -738,11 +739,12 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// dtlb
loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i)
if(i == 0 ){ // port 0 assign to vsegmentUnit
dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || vSegmentUnit.io.dtlb.req.valid
val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle
dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid)
vSegmentUnit.io.dtlb.req.ready := dtlb_reqs.take(LduCnt)(i).req.ready
dtlb_reqs.take(LduCnt)(i).req.bits := Mux1H(Seq(
vSegmentUnit.io.dtlb.req.valid -> vSegmentUnit.io.dtlb.req.bits,
loadUnits(i).io.tlb.req.valid -> loadUnits(i).io.tlb.req.bits
RegNext(vsegmentDtlbReqValid) -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid),
loadUnits(i).io.tlb.req.valid -> loadUnits(i).io.tlb.req.bits
))
}
// pmp
Expand Down Expand Up @@ -1336,7 +1338,13 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && LSUOpType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) &&
vLsuCanaccept(i) && !isSegment
vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head
vsSplit(i).io.out <> storeUnits(i).io.vecstin // Todo: May be some balance mechanism is needed
NewPipelineConnect(
vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire,
Mux(vsSplit(i).io.out.fire,
vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect),
storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)),
Option("VsSplitConnectStu")
)
vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data

}
Expand All @@ -1346,7 +1354,13 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && LSUOpType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) &&
vLsuCanaccept(i) && !isSegment
vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i)
vlSplit(i).io.out <> loadUnits(i).io.vecldin // Todo: May be some balance mechanism is needed
NewPipelineConnect(
vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire,
Mux(vlSplit(i).io.out.fire,
vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect),
loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)),
Option("VlSplitConnectLdu")
)

}
(0 until LduCnt).foreach{i=>
Expand Down
8 changes: 6 additions & 2 deletions src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ case class DCacheParameters
blockBytes: Int = 64,
nMaxPrefetchEntry: Int = 1,
alwaysReleaseData: Boolean = false,
isKeywordBitsOpt: Option[Boolean] = Some(true)
isKeywordBitsOpt: Option[Boolean] = Some(true),
enableDataEcc: Boolean = false,
enableTagEcc: Boolean = false
) extends L1CacheParameters {
// if sets * blockBytes > 4KB(page size),
// cache alias will happen,
Expand Down Expand Up @@ -128,6 +130,8 @@ trait HasDCacheParameters extends HasL1CacheParameters with HasL1PrefetchSourceP
require(cfg.nMissEntries < cfg.nReleaseEntries)
val nEntries = cfg.nMissEntries + cfg.nReleaseEntries
val releaseIdBase = cfg.nMissEntries
val EnableDataEcc = cacheParams.enableDataEcc
val EnableTagEcc = cacheParams.enableTagEcc

// banked dcache support
val DCacheSetDiv = 1
Expand Down Expand Up @@ -363,7 +367,7 @@ class DCacheWordReq(implicit p: Parameters) extends DCacheBundle
val isFirstIssue = Bool()
val replayCarry = new ReplayCarry(nWays)
val lqIdx = new LqPtr

val debug_robIdx = UInt(log2Ceil(RobSize).W)
def dump() = {
XSDebug("DCacheWordReq: cmd: %x vaddr: %x data: %x mask: %x id: %d\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,6 @@ case object HasDataEccParam
// -----------------------------------------------------------------
abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
{
val EnableDataEcc = false
val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
val ReadlinePortErrorIndex = LoadPipelineWidth
val io = IO(new DCacheBundle {
Expand Down
10 changes: 8 additions & 2 deletions src/main/scala/xiangshan/cache/dcache/loadpipe/LoadPipe.scala
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
val s1_will_send_miss_req = s1_valid && !s1_nack && !s1_hit

// data read
io.banked_data_read.valid := s1_fire && !s1_nack && !io.lsu.s1_kill && !s1_is_prefetch && s1_hit
io.banked_data_read.valid := s1_fire && !s1_nack && !io.lsu.s1_kill && !s1_is_prefetch
io.banked_data_read.bits.addr := s1_vaddr
io.banked_data_read.bits.way_en := s1_pred_tag_match_way_dup_dc
io.banked_data_read.bits.bankMask := s1_bank_oh
Expand Down Expand Up @@ -350,7 +350,7 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer

val s2_instrtype = s2_req.instrtype

val s2_tag_error = dcacheParameters.tagCode.decode(s2_encTag).error // error reported by tag ecc check
val s2_tag_error = WireInit(false.B)
val s2_flag_error = RegEnable(s1_flag_error, s1_fire)

val s2_hit_prefetch = RegEnable(s1_hit_prefetch, s1_fire)
Expand All @@ -366,6 +366,12 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer

val s2_can_send_miss_req = RegEnable(s1_will_send_miss_req, s1_fire)

if(EnableTagEcc) {
s2_tag_error := dcacheParameters.tagCode.decode(s2_encTag).error // error reported by tag ecc check
}else {
s2_tag_error := false.B
}

// send load miss to miss queue
io.miss_req.valid := s2_valid && s2_can_send_miss_req
io.miss_req.bits := DontCare
Expand Down
12 changes: 9 additions & 3 deletions src/main/scala/xiangshan/cache/dcache/mainpipe/MainPipe.scala
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents w
val s2_coh = RegEnable(s1_coh, s1_fire)
val s2_banked_store_wmask = RegEnable(s1_banked_store_wmask, s1_fire)
val s2_flag_error = RegEnable(s1_flag_error, s1_fire)
val s2_tag_error = dcacheParameters.tagCode.decode(s2_encTag).error && s2_need_tag
val s2_tag_error = WireInit(false.B)
val s2_l2_error = io.refill_info.bits.error
val s2_error = s2_flag_error || s2_tag_error || s2_l2_error // data_error not included

Expand All @@ -408,6 +408,12 @@ class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents w
val s2_amo_hit = s2_hit && !s2_req.probe && !s2_req.miss && s2_req.isAMO
val s2_store_hit = s2_hit && !s2_req.probe && !s2_req.miss && s2_req.isStore

if(EnableTagEcc) {
s2_tag_error := dcacheParameters.tagCode.decode(s2_encTag).error && s2_need_tag
}else {
s2_tag_error := false.B
}

s2_s0_set_conlict := s2_valid_dup(0) && s0_idx === s2_idx
s2_s0_set_conlict_store := s2_valid_dup(1) && store_idx === s2_idx

Expand Down Expand Up @@ -718,7 +724,7 @@ class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents w
val amo_update_meta_dup_for_meta_w_valid = s3_req_source_dup_for_meta_w_valid === AMO_SOURCE.U &&
!s3_req_probe_dup_for_meta_w_valid &&
s3_hit_coh_dup_for_meta_w_valid =/= s3_new_hit_coh_dup_for_meta_w_valid
val update_meta_dup_for_meta_w_valid =
val update_meta_dup_for_meta_w_valid =
miss_update_meta_dup_for_meta_w_valid ||
probe_update_meta_dup_for_meta_w_valid ||
store_update_meta_dup_for_meta_w_valid ||
Expand Down Expand Up @@ -1481,7 +1487,7 @@ class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents w
val atomic_hit_resp_valid = s3_valid_dup(10) && (s3_amo_can_go || s3_miss_can_go && (s3_req.isAMO || s3_req.miss))

io.atomic_resp.valid := atomic_replay_resp_valid || atomic_hit_resp_valid
io.atomic_resp.bits := Mux(atomic_replay_resp_valid, atomic_replay_resp, atomic_hit_resp)
io.atomic_resp.bits := Mux(atomic_replay_resp_valid, atomic_replay_resp, atomic_hit_resp)

// io.replace_resp.valid := s3_fire && s3_req_replace_dup(3)
// io.replace_resp.bits := s3_req.miss_id
Expand Down
55 changes: 40 additions & 15 deletions src/main/scala/xiangshan/cache/dcache/meta/TagArray.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ class TagEccWriteReq(implicit p: Parameters) extends TagReadReq {
val ecc = UInt(eccTagBits.W)
}

class TagArray(implicit p: Parameters) extends DCacheModule {
case object HasTagEccParam

abstract class AbstractTagArray(implicit p: Parameters) extends DCacheModule {
val TagEccParam = if(EnableTagEcc) Some(HasTagEccParam) else None
}

class TagArray(implicit p: Parameters) extends AbstractTagArray {
val io = IO(new Bundle() {
val read = Flipped(DecoupledIO(new TagReadReq))
val resp = Output(Vec(nWays, UInt(tagBits.W)))
Expand All @@ -62,8 +68,12 @@ class TagArray(implicit p: Parameters) extends DCacheModule {
val tag_array = Module(new SRAMTemplate(UInt(tagBits.W), set = nSets, way = nWays,
shouldReset = false, holdRead = false, singlePort = true))

val ecc_array = Module(new SRAMTemplate(UInt(eccTagBits.W), set = nSets, way = nWays,
shouldReset = false, holdRead = false, singlePort = true))
val ecc_array = TagEccParam.map {
case _ =>
val ecc = Module(new SRAMTemplate(UInt(eccTagBits.W), set = nSets, way = nWays,
shouldReset = false, holdRead = false, singlePort = true))
ecc
}

val wen = rst || io.write.valid
tag_array.io.w.req.valid := wen
Expand All @@ -77,12 +87,16 @@ class TagArray(implicit p: Parameters) extends DCacheModule {
val ecc_waddr = Mux(rst, rst_cnt, io.ecc_write.bits.idx)
val ecc_wdata = Mux(rst, rstVal, io.ecc_write.bits.ecc)
val ecc_wmask = Mux(rst || (nWays == 1).B, (-1).asSInt, io.ecc_write.bits.way_en.asSInt).asBools
ecc_array.io.w.req.valid := ecc_wen
ecc_array.io.w.req.bits.apply(
setIdx = ecc_waddr,
data = ecc_wdata,
waymask = VecInit(ecc_wmask).asUInt
)
ecc_array match {
case Some(ecc) =>
ecc.io.w.req.valid := ecc_wen
ecc.io.w.req.bits.apply(
setIdx = ecc_waddr,
data = ecc_wdata,
waymask = VecInit(ecc_wmask).asUInt
)
case None =>
}

// tag read
val ren = io.read.fire
Expand All @@ -93,17 +107,28 @@ class TagArray(implicit p: Parameters) extends DCacheModule {
XSPerfAccumulate("part_tag_read_counter", tag_array.io.r.req.valid)

val ecc_ren = io.ecc_read.fire
ecc_array.io.r.req.valid := ecc_ren
ecc_array.io.r.req.bits.apply(setIdx = io.ecc_read.bits.idx)
io.ecc_resp := ecc_array.io.r.resp.data
ecc_array match {
case Some(ecc) =>
ecc.io.r.req.valid := ecc_ren
ecc.io.r.req.bits.apply(setIdx = io.ecc_read.bits.idx)
io.ecc_resp := ecc.io.r.resp.data
case None =>
io.ecc_resp := 0.U.asTypeOf(io.ecc_resp)
}

io.write.ready := !rst
io.read.ready := !wen
io.ecc_write.ready := !rst
io.ecc_read.ready := !ecc_wen
ecc_array match {
case Some(ecc) =>
io.ecc_write.ready := !rst
io.ecc_read.ready := !ecc_wen
case None =>
io.ecc_write.ready := true.B
io.ecc_read.ready := true.B
}
}

class DuplicatedTagArray(readPorts: Int)(implicit p: Parameters) extends DCacheModule {
class DuplicatedTagArray(readPorts: Int)(implicit p: Parameters) extends AbstractTagArray {
val io = IO(new Bundle() {
val read = Vec(readPorts, Flipped(DecoupledIO(new TagReadReq)))
val resp = Output(Vec(readPorts, Vec(nWays, UInt(encTagBits.W))))
Expand Down
27 changes: 2 additions & 25 deletions src/main/scala/xiangshan/mem/lsqueue/LoadQueueRAW.scala
Original file line number Diff line number Diff line change
Expand Up @@ -267,31 +267,8 @@ class LoadQueueRAW(implicit p: Parameters) extends XSModule
val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt

// group info
val selectValidGroups =
if (valid.length <= SelectGroupSize) {
Seq(valid)
} else {
(0 until numSelectGroups).map(g => {
if (valid.length < (g + 1) * SelectGroupSize) {
valid.takeRight(valid.length - g * SelectGroupSize)
} else {
(0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j))
}
})
}
val selectBitsGroups =
if (bits.length <= SelectGroupSize) {
Seq(bits)
} else {
(0 until numSelectGroups).map(g => {
if (bits.length < (g + 1) * SelectGroupSize) {
bits.takeRight(bits.length - g * SelectGroupSize)
} else {
(0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j))
}
})
}

val selectValidGroups = valid.grouped(SelectGroupSize).toList
val selectBitsGroups = bits.grouped(SelectGroupSize).toList
// select logic
if (valid.length <= SelectGroupSize) {
val (selValid, selBits) = selectPartialOldest(valid, bits)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,7 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
needEnqueue(i) && !io.enq(i).bits.isLoadReplay
})

val canAcceptCount = PopCount(freeList.io.canAllocate)
for ((enq, w) <- io.enq.zipWithIndex) {
vaddrModule.io.wen(w) := false.B
freeList.io.doAllocate(w) := false.B
Expand All @@ -611,7 +612,7 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule

// Allocated ready
val offset = PopCount(newEnqueue.take(w))
val canAccept = freeList.io.canAllocate(offset)
val canAccept = canAcceptCount >= (w+1).U
val enqIndex = Mux(enq.bits.isLoadReplay, enq.bits.schedIndex, freeList.io.allocateSlot(offset))
enqIndexOH(w) := UIntToOH(enqIndex)
enq.ready := Mux(enq.bits.isLoadReplay, true.B, canAccept)
Expand Down
4 changes: 3 additions & 1 deletion src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -736,10 +736,12 @@ class StoreQueue(implicit p: Parameters) extends XSModule
// TODO: CAN NOT deal with vector mmio now!
val s_idle :: s_req :: s_resp :: s_wb :: s_wait :: Nil = Enum(5)
val uncacheState = RegInit(s_idle)
val uncacheUop = Reg(new DynInst)
switch(uncacheState) {
is(s_idle) {
when(RegNext(io.rob.pendingst && uop(deqPtr).robIdx === io.rob.pendingPtr && pending(deqPtr) && allocated(deqPtr) && datavalid(deqPtr) && addrvalid(deqPtr))) {
uncacheState := s_req
uncacheUop := uop(deqPtr)
}
}
is(s_req) {
Expand Down Expand Up @@ -811,7 +813,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule

// (4) scalar store: writeback to ROB (and other units): mark as writebacked
io.mmioStout.valid := uncacheState === s_wb && !isVec(deqPtr)
io.mmioStout.bits.uop := uop(deqPtr)
io.mmioStout.bits.uop := uncacheUop
io.mmioStout.bits.uop.sqIdx := deqPtrExt(0)
io.mmioStout.bits.data := shiftDataToLow(paddrModule.io.rdata(0), dataModule.io.rdata(0).data) // dataModule.io.rdata.read(deqPtr)
io.mmioStout.bits.debug.isMMIO := true.B
Expand Down
15 changes: 7 additions & 8 deletions src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,13 @@ class SQData8Module(numEntries: Int, numRead: Int, numWrite: Int, numForward: In
val needCheck1 = io.needForward(i)(1)(j)
val needCheck0Reg = RegNext(needCheck0)
val needCheck1Reg = RegNext(needCheck1)
(0 until XLEN / 8).foreach(k => {
matchResultVec(j).validFast := needCheck0 && data(j).valid
matchResultVec(j).valid := needCheck0Reg && data(j).valid
matchResultVec(j).data := data(j).data
matchResultVec(numEntries + j).validFast := needCheck1 && data(j).valid
matchResultVec(numEntries + j).valid := needCheck1Reg && data(j).valid
matchResultVec(numEntries + j).data := data(j).data
})

matchResultVec(j).validFast := needCheck0 && data(j).valid
matchResultVec(j).valid := needCheck0Reg && data(j).valid
matchResultVec(j).data := data(j).data
matchResultVec(numEntries + j).validFast := needCheck1 && data(j).valid
matchResultVec(numEntries + j).valid := needCheck1Reg && data(j).valid
matchResultVec(numEntries + j).data := data(j).data
}

val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
Expand Down
Loading
Loading