# ChiffonCore bring-up - Ultra96-V2 / PYNQ

1. Load bitstream (Overlay)
2. Write `.hex` to DRAM (CMA buffer)
3. Set `dram_base` / `entry_pc` and START signal via regbus

In [None]:
from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
import time
import numpy as np

from pynq import Overlay, allocate, MMIO
from pynq.lib.video import *

## 0) Config

In [None]:
BIT_PATH = "design_1.bit"

IMAGE_TYPE = "hex"  # "hex" or "elf" (elf requires pyelftools)
IMAGE_PATH = "kernel.bin.hex"
BYTESWAP32 = False

# ---- CPU memory map (virtual) ----
CPU_RAM_BASE      = 0x8000_0000
MAILBOX_CPU_ADDR  = 0x8000_2000  # u64[2] : fb0_phys, fb1_phys
BOOTINFO_CPU_ADDR = 0x8000_1000  # optional (versioned boot info)

# ---- allocate CPU RAM backing store (CMA contiguous) ----
# bring-up: 16-32 MiB
RAM_BYTES = 16 * 1024 * 1024

# ---- simple filesystem (initramfs) ----
ENABLE_FS = True
FS_TYPE = "tar"  # 'tar' or 'none'
FS_PATH = "rootfs.tar"
FS_ALIGN = 0x1000

# ----  DisplayPort + graphics ----
ENABLE_DISPLAYPORT = True
ENABLE_GRAPHICS    = True  # allocate fb0/fb1 + write mailbox (+ bootinfo)

PIXEL_WIDTH  = 640
PIXEL_HEIGHT = 480
PIXEL_CH     = 4  # RGBA bytes

assert (MAILBOX_CPU_ADDR % 8) == 0
assert (BOOTINFO_CPU_ADDR % 8) == 0

## 1) Load Overlay & find regbus

In [None]:
ol = Overlay(BIT_PATH)
ol

In [None]:
# Find regbus AXI-lite IP
cands = [k for k in ol.ip_dict.keys() if "regbus" in k.lower()]
print("regbus candidates:", cands)
assert len(cands) > 0, "regbus IP not found. Check ol.ip_dict / block design."
regbus = getattr(ol, cands[0])
regbus

## 2) Configure DisplayPort output

In [None]:
import time
from pynq import MMIO
from pynq.lib.video import *

class LiveDisplayPort(DisplayPort):
    # DisplayPort register parameters (Ultra96-V2)
    __DP_REG_BASE_ADDR__                    = 0xfd4a0000
    __DP_REG_ADDR_RANGE__                   = 0x0000cc20
    __RA_DP_MAIN_STREAM_ENABLE__            = 0x00000084
    __RA_V_BLEND_SET_GLOVAL_ALPHA_REG__     = 0x0000a00c
    __RA_AV_BUF_OUTPUT_AUDIO_VIDEO_SELECT__ = 0x0000b070
    __RA_AV_BUF_AUD_VID_CLK_SOURCE__        = 0x0000b120
    __RA_AV_BUF_SRST_REG__                  = 0x0000b124

    def __init__(self, event_loop=None):
        super().__init__(event_loop)

    def configure(self, mode, pixelformat):
        super().configure(mode, pixelformat)
        self.__setup_live_video__()

    def __setup_live_video__(self):
        dpreg = MMIO(self.__DP_REG_BASE_ADDR__, self.__DP_REG_ADDR_RANGE__)
        dpreg.write(self.__RA_DP_MAIN_STREAM_ENABLE__, 0x00000000)
        dpreg.write(self.__RA_V_BLEND_SET_GLOVAL_ALPHA_REG__, 0x00000000)
        dpreg.write(self.__RA_AV_BUF_OUTPUT_AUDIO_VIDEO_SELECT__, 0x00000050)
        dpreg.write(self.__RA_AV_BUF_AUD_VID_CLK_SOURCE__, 0x00000002)
        dpreg.write(self.__RA_AV_BUF_SRST_REG__, 0x00000002)
        time.sleep(1)
        dpreg.write(self.__RA_AV_BUF_SRST_REG__, 0x00000000)
        time.sleep(1)
        dpreg.write(self.__RA_DP_MAIN_STREAM_ENABLE__, 0x00000001)

if ENABLE_DISPLAYPORT:
    lvdp = LiveDisplayPort()
    lvdp.configure(VideoMode(PIXEL_WIDTH, PIXEL_HEIGHT, 24), PIXEL_RGB)
    print("DisplayPort configured:", PIXEL_WIDTH, PIXEL_HEIGHT)
else:
    print("Skip DisplayPort config (headless)")


## 3) Boot utilities (RAM image loader, mailbox/bootinfo, bootctrl)

In [None]:
# ---- bootctrl_regbus map (bootctrl_regbus.veryl) ----
BOOT_BASE = 0x1000
RA_STATUS   = BOOT_BASE + 0x0000
RA_CTRL     = BOOT_BASE + 0x0004
RA_DRAMBASE = BOOT_BASE + 0x0008
RA_ENTRYPC  = BOOT_BASE + 0x000C

def show_status():
    st = int(regbus.read(RA_STATUS))
    print(f"STATUS=0x{st:08x}  run={(st>>1)&1} hold_reset={st&1}")
    return st

def hold_reset(on: bool):
    regbus.write(RA_CTRL, 0x1 if on else 0x0)

def set_ram_mapping(*, dram_base: int, ram_phys_base: int):
    regbus.write(RA_DRAMBASE, dram_base & 0xFFFF_FFFF)
    regbus.write(RA_ENTRYPC,  ram_phys_base & 0xFFFF_FFFF)

def start_cpu():
    # W1P START
    regbus.write(RA_CTRL, 0x2)

def parse_hex_words(path: str, byteswap32: bool = False) -> list[int]:
    words = []
    for line in Path(path).read_text().splitlines():
        line = line.strip()
        if not line or line.startswith("#") or line.startswith("//"):
            continue
        if line.startswith(("0x","0X")):
            line = line[2:]
        v = int(line, 16) & 0xFFFF_FFFF
        if byteswap32:
            v = int.from_bytes(v.to_bytes(4, "big"), "little")
        words.append(v)
    return words

def ensure_pyelftools() -> bool:
    try:
        from elftools.elf.elffile import ELFFile  # noqa
        return True
    except Exception as e:
        print("pyelftools not available:", e)
        print("Install: pip3 install pyelftools")
        return False

@dataclass
class CpuRamImage:
    cpu_base: int
    buf_u64: np.ndarray
    ram_bytes: int

    @property
    def phys_base(self) -> int:
        return int(self.buf_u64.device_address)

    def flush(self):
        self.buf_u64.flush()

    def _off(self, cpu_addr: int) -> int:
        off = cpu_addr - self.cpu_base
        assert off >= 0, f"cpu_addr 0x{cpu_addr:x} < CPU_RAM_BASE 0x{self.cpu_base:x}"
        assert off < self.ram_bytes, f"cpu_addr 0x{cpu_addr:x} beyond RAM image (size=0x{self.ram_bytes:x})"
        return off

    def write_u64(self, cpu_addr: int, value: int):
        assert (cpu_addr % 8) == 0
        off = self._off(cpu_addr)
        qi = off // 8
        self.buf_u64[qi] = np.uint64(value & 0xFFFF_FFFF_FFFF_FFFF)

    def write_u32(self, cpu_addr: int, value: int):
        assert (cpu_addr % 4) == 0
        off = self._off(cpu_addr)
        qi = off // 8
        shift = (off % 8) * 8
        cur = int(self.buf_u64[qi])
        mask = 0xFFFF_FFFF << shift
        nxt = (cur & ~mask) | ((value & 0xFFFF_FFFF) << shift)
        self.buf_u64[qi] = np.uint64(nxt)

    def write_bytes(self, cpu_addr: int, data: bytes):
        off = self._off(cpu_addr)
        assert off + len(data) <= self.ram_bytes
        u8 = self.buf_u64.view(np.uint8)
        u8[off:off+len(data)] = np.frombuffer(data, dtype=np.uint8)

def alloc_cpu_ram_image(cpu_base: int, ram_bytes: int) -> CpuRamImage:
    qwords = (ram_bytes + 7) // 8
    buf = allocate(shape=(qwords,), dtype=np.uint64, cacheable=False)
    buf[:] = 0
    buf.flush()
    img = CpuRamImage(cpu_base=cpu_base, buf_u64=buf, ram_bytes=ram_bytes)
    assert img.phys_base <= 0xFFFF_FFFF, "AXI_ADDR_WIDTH=32 の場合 4GB 超は不可"
    return img

def load_hex_into(img: CpuRamImage, hex_path: str, load_addr: int):
    words32 = parse_hex_words(hex_path, byteswap32=BYTESWAP32)
    print("hex 32-bit words:", len(words32))
    for i in range(0, len(words32), 2):
        lo = words32[i]
        hi = words32[i+1] if i+1 < len(words32) else 0
        img.write_u64(load_addr + (i//2)*8, (hi << 32) | lo)
    img.flush()
    end_addr = load_addr + ((len(words32)+1)//2)*8
    return load_addr, end_addr

def load_elf_into(img: CpuRamImage, elf_path: str):
    if not ensure_pyelftools():
        raise RuntimeError("pyelftools required for ELF loading")

    from elftools.elf.elffile import ELFFile
    with open(elf_path, "rb") as f:
        ef = ELFFile(f)
        entry = int(ef.header["e_entry"])
        max_end = 0
        print(f"ELF entry: 0x{entry:016x}")
        for seg in ef.iter_segments():
            if seg.header.p_type != "PT_LOAD":
                continue
            vaddr = int(seg.header.p_vaddr)
            memsz = int(seg.header.p_memsz)
            filesz = int(seg.header.p_filesz)
            data = seg.data()
            end = vaddr + memsz
            if end > max_end:
                max_end = end
            print(f"  LOAD vaddr=0x{vaddr:016x} filesz=0x{filesz:x} memsz=0x{memsz:x}")
            img.write_bytes(vaddr, data)
            if memsz > filesz:
                img.write_bytes(vaddr + filesz, b"\x00" * (memsz - filesz))
    img.flush()
    return entry, max_end

def hexdump_cpu(img: CpuRamImage, cpu_addr: int, nbytes: int = 64):
    off = cpu_addr - img.cpu_base
    u8 = img.buf_u64.view(np.uint8)
    data = bytes(u8[off:off+nbytes])
    for i in range(0, len(data), 16):
        chunk = data[i:i+16]
        hx = " ".join(f"{b:02x}" for b in chunk)
        print(f"0x{cpu_addr+i:08x}: {hx}")


def align_up(x: int, align: int) -> int:
    return (x + align - 1) & ~(align - 1)

def align_down(x: int, align: int) -> int:
    return x & ~(align - 1)

# ---- BootInfo ----
BOOTINFO_MAGIC = 0x43424F54  # 'CBOT'
BOOTINFO_VER   = 2
FS_TYPE_TAR    = 1

def write_bootinfo(img: CpuRamImage, *, fb0_phys: int, fb1_phys: int, width: int, height: int, stride_bytes: int,
                   fs_base: int = 0, fs_size: int = 0, fs_type: int = 0):
    img.write_u32(BOOTINFO_CPU_ADDR + 0x00, BOOTINFO_MAGIC)
    img.write_u32(BOOTINFO_CPU_ADDR + 0x04, BOOTINFO_VER)
    img.write_u64(BOOTINFO_CPU_ADDR + 0x08, fb0_phys)
    img.write_u64(BOOTINFO_CPU_ADDR + 0x10, fb1_phys)
    img.write_u32(BOOTINFO_CPU_ADDR + 0x18, width)
    img.write_u32(BOOTINFO_CPU_ADDR + 0x1C, height)
    img.write_u32(BOOTINFO_CPU_ADDR + 0x20, stride_bytes)
    img.write_u32(BOOTINFO_CPU_ADDR + 0x24, 0)  # pixel_format: 0=RGBA8888
    img.write_u64(BOOTINFO_CPU_ADDR + 0x28, 0x1001_0000)  # regbus_base
    img.write_u64(BOOTINFO_CPU_ADDR + 0x30, MAILBOX_CPU_ADDR)
    img.write_u64(BOOTINFO_CPU_ADDR + 0x38, fs_base)
    img.write_u64(BOOTINFO_CPU_ADDR + 0x40, fs_size)
    img.write_u32(BOOTINFO_CPU_ADDR + 0x48, fs_type)
    img.write_u32(BOOTINFO_CPU_ADDR + 0x4C, 0)
    img.flush()


## 4) Allocate CPU RAM image + load program

In [None]:
img = alloc_cpu_ram_image(CPU_RAM_BASE, RAM_BYTES)
print(f"CPU RAM image phys base = 0x{img.phys_base:08x}  (RAM_BYTES={RAM_BYTES})")

if IMAGE_TYPE == "hex":
    entry_va, end_va = load_hex_into(img, IMAGE_PATH, CPU_RAM_BASE)
elif IMAGE_TYPE == "elf":
    entry_va, end_va = load_elf_into(img, IMAGE_PATH)
else:
    raise ValueError("IMAGE_TYPE must be 'hex' or 'elf'")

print(f"entry virtual addr (CPU) = 0x{entry_va:016x}")

fs_base = 0
fs_size = 0
fs_type_id = 0

if ENABLE_FS and FS_TYPE != "none":
    fs_bytes = Path(FS_PATH).read_bytes()
    fs_size = len(fs_bytes)
    fs_end = CPU_RAM_BASE + RAM_BYTES
    fs_base = align_down(fs_end - fs_size, FS_ALIGN)
    assert fs_base + fs_size <= fs_end
    assert fs_base >= CPU_RAM_BASE + 0x0000_4000
    img.write_bytes(fs_base, fs_bytes)
    img.flush()
    if FS_TYPE == "tar":
        fs_type_id = FS_TYPE_TAR
    print(f"fs loaded: type={FS_TYPE} base=0x{fs_base:08x} size=0x{fs_size:x}")
else:
    print("Skip filesystem init")

if end_va is not None and end_va > MAILBOX_CPU_ADDR:
    print("WARNING: program image overlaps mailbox region in CPU address space.")
    print("         (mailbox will be written AFTER load, but kernel should reserve the range.)")
    print(f"         program end: 0x{end_va:08x}, mailbox: 0x{MAILBOX_CPU_ADDR:08x}")

## 5) Allocate fb0/fb1 and write mailbox / BootInfo

In [None]:
if ENABLE_GRAPHICS:
    fb0 = allocate(shape=(PIXEL_HEIGHT, PIXEL_WIDTH, PIXEL_CH), dtype=np.uint8, cacheable=False)
    fb1 = allocate(shape=(PIXEL_HEIGHT, PIXEL_WIDTH, PIXEL_CH), dtype=np.uint8, cacheable=False)

    fb0[:] = 0
    fb1[:] = 0
    fb0.flush()
    fb1.flush()

    fb0_phys = int(fb0.device_address)
    fb1_phys = int(fb1.device_address)

    print(f"fb0 phys = 0x{fb0_phys:08x}  ({PIXEL_WIDTH}x{PIXEL_HEIGHT}x{PIXEL_CH} bytes)")
    print(f"fb1 phys = 0x{fb1_phys:08x}  ({PIXEL_WIDTH}x{PIXEL_HEIGHT}x{PIXEL_CH} bytes)")
    assert fb0_phys <= 0xFFFF_FFFF
    assert fb1_phys <= 0xFFFF_FFFF

    # mailbox (legacy)
    img.write_u64(MAILBOX_CPU_ADDR + 0x00, fb0_phys)
    img.write_u64(MAILBOX_CPU_ADDR + 0x08, fb1_phys)
    img.flush()

    # BootInfo (optional)
    write_bootinfo(img, fb0_phys=fb0_phys, fb1_phys=fb1_phys,
                   width=PIXEL_WIDTH, height=PIXEL_HEIGHT,
                   stride_bytes=PIXEL_WIDTH * PIXEL_CH,
                   fs_base=fs_base, fs_size=fs_size, fs_type=fs_type_id)

    print(f"mailbox @ 0x{MAILBOX_CPU_ADDR:08x} written (fb0/fb1)")
    print(f"bootinfo @ 0x{BOOTINFO_CPU_ADDR:08x} written (magic/version)")

    print("Mailbox dump:")
    hexdump_cpu(img, MAILBOX_CPU_ADDR, 32)
else:
    print("Skip graphics (headless boot)")

## 6) Start CPU (bootctrl_regbus)

In [None]:
# 1) Hold reset
hold_reset(True)
time.sleep(0.01)
show_status()

# 2) Configure mapping (dram_base is additional offset; usually 0)
set_ram_mapping(dram_base=0x0000_0000, ram_phys_base=img.phys_base)
time.sleep(0.01)

# 3) START (release reset + run)
start_cpu()
time.sleep(0.01)
show_status()


## 7) Quick sanity checks

In [None]:
print("BootInfo dump:")
hexdump_cpu(img, BOOTINFO_CPU_ADDR, 80)

print("First 64 bytes of image:")
hexdump_cpu(img, CPU_RAM_BASE, 64)