hw/npu2.c: Add memory coherence directory programming

The memory coherence directory (MCD) needs to know which system memory addresses belong to the GPU. This amounts to setting a BAR and a size in the MCD to cover the addresses assigned to each of the GPUs. To ease assignment we assume GPUs are assigned memory in a contiguous block per chip. Signed-off-by: Alistair Popple <alistair@popple.id.au> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
open-power · Jun 6, 2017 · b5d8537 · b5d8537
1 parent bdea201
commit b5d8537
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 1 deletion.
diff --git a/hw/npu2.c b/hw/npu2.c
@@ -728,13 +728,50 @@ static int64_t npu2_ioda_reset(struct phb *phb, bool purge)
 
 static void npu2_hw_init(struct npu2 *p)
 {
-	uint64_t val;
+	int i;
+	uint64_t val, size, addr, gpu_min_addr, gpu_max_addr, total_size;
+	struct proc_chip *chip = get_chip(p->chip_id);
 
 	npu2_ioda_reset(&p->phb, false);
 
 	/* Enable XTS retry mode */
 	val = npu2_read(p, NPU2_XTS_CFG);
 	npu2_write(p, NPU2_XTS_CFG, val | NPU2_XTS_CFG_MMIOSD | NPU2_XTS_CFG_TRY_ATR_RO);
+
+	/* Init memory cache directory (MCD) registers. */
+	phys_map_get(chip, GPU_MEM, NPU2_LINKS_PER_CHIP - 1, &gpu_min_addr, NULL);
+	phys_map_get(chip, GPU_MEM, 0, &gpu_max_addr, &size);
+	gpu_max_addr += size;
+
+	/* We assume GPU memory is contiguous from the first possible GPU to the
+	 * last and that the size is the same so best to check that. */
+	for (i = 0; i < NPU2_LINKS_PER_CHIP; i++) {
+		uint64_t tmp;
+		phys_map_get(chip, GPU_MEM, i, &addr, &tmp);
+		assert((addr >= gpu_min_addr) && (addr + tmp <= gpu_max_addr));
+		assert(tmp == size);
+	}
+
+	/* We have two MCDs, so if neccessary we can split the region covered
+	 * across both if total_size is not a power of two. */
+	total_size = gpu_max_addr - gpu_min_addr;
+	size = 1ull << ilog2(total_size);
+	val = PPC_BIT(0);
+	val = SETFIELD(PPC_BITMASK(13, 29), val, (size >> 25) - 1);
+	val = SETFIELD(PPC_BITMASK(33, 63), val, gpu_min_addr >> 25);
+	xscom_write(p->chip_id, MCD0_BANK0_CN3, val);
+	total_size -= size;
+	if (total_size) {
+	/* total_size was not a power of two, but the remainder should
+	 * be if all GPUs were assigned the same size. */
+		assert(is_pow2(total_size));
+		addr += size;
+		size = 1ull << ilog2(total_size);
+		val = PPC_BIT(0);
+		val = SETFIELD(PPC_BITMASK(13, 29), val, (size >> 25) - 1);
+		val = SETFIELD(PPC_BITMASK(33, 63), val, addr >> 25);
+		xscom_write(p->chip_id, MCD1_BANK0_CN3, val);
+	}
 }
 
 static int64_t npu2_map_pe_dma_window_real(struct phb *phb,

diff --git a/include/npu2-regs.h b/include/npu2-regs.h
@@ -24,6 +24,11 @@ uint64_t npu2_read(struct npu2 *p, uint64_t reg);
 void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val);
 void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
 
+/* These aren't really NPU specific registers but we initialise them in NPU
+ * code */
+#define MCD0_BANK0_CN3 0x301100d
+#define MCD1_BANK0_CN3 0x301140d
+
 #define NPU2_REG_OFFSET(stack, block, offset) \
 	(((stack) << 20) | ((block) << 16) | (offset))