In [None]:
import numpy as np

In [None]:
fake_config = {
    'mode': 'memory',
    'technology': '65',
    'device_type': 'SRAM',
    'frequency': 1e9,            # 1 GHz
    'precision_mu': 6,        # Device variation mean
    'precision_sigma': 2,    # Device variation stddev
    'dataset': 'CIFAR-10',
    'model': 'ResNet18',
    'distribution_file': "../../../DATA/customized_gaussian_current.csv"
}

In [None]:
VDD = 1.0  # 电压 V
C_UNIT = 0.2e-12  # 单位负载电容 F
TECH_FEATURE_SIZE = 45e-9  # 技术节点 nm
GATE_DELAY_UNIT = 10e-12  # 单位门延迟 s


class INVDecoder:
    def __init__(self,widthNmos=1, widthPmos=2):    
        self.widthNmos = widthNmos
        self.widthPmos = widthPmos
    
    def calculate_area(self):
        return 2 * TECH_FEATURE_SIZE * self.width

    def calculate_delay(self):
        return 1 * GATE_DELAY_UNIT

    def calculate_dynamic_power(self):
        return 0.5 * C_UNIT * VDD**2 * 1e12  # pJ


class NANDDecoder:
    def __init__(self, num_inputs):
        self.num_inputs = num_inputs

    def calculate_area(self):
        # 每个输入需要一个 NMOS，PMOS 多并联
        return (self.num_inputs + self.num_inputs) * TECH_FEATURE_SIZE * 2

    def calculate_delay(self):
        return self.num_inputs * GATE_DELAY_UNIT * 1.2  # 估算延迟增大因子

    def calculate_dynamic_power(self):
        return 0.5 * self.num_inputs * C_UNIT * VDD**2 * 1e12


class NORDecoder:
    def __init__(self, num_inputs):
        self.num_inputs = num_inputs

    def calculate_area(self):
        return (self.num_inputs + self.num_inputs) * TECH_FEATURE_SIZE * 2

    def calculate_delay(self):
        return self.num_inputs * GATE_DELAY_UNIT * 1.5  # PMOS 串联更慢

    def calculate_dynamic_power(self):
        return 0.5 * self.num_inputs * C_UNIT * VDD**2 * 1e12


In [None]:
class RowDecoder:
    def __init__(self, tech, feature_size, vdd, temperature, pn_size_ratio=2):
        self.tech = tech  # 工艺模型（仅用于结构，可扩展）
        self.feature_size = feature_size  # 最小工艺节点尺寸 (e.g., 22e-9 for 22nm)
        self.vdd = vdd  # 电源电压 (V)
        self.temp = temperature  # 温度 (K)
        self.pn_ratio = pn_size_ratio  # PMOS/NMOS 尺寸比

        # 以下为初始化后设置的
        self.initialized = False

    def initialize(self, num_addr_row, use_mux=False, parallel=False):
        self.num_addr_row = num_addr_row
        self.use_mux = use_mux
        self.parallel = parallel

        self.num_inv = num_addr_row
        self.num_nand = 4 * int(np.floor(num_addr_row / 2))
        self.num_nor = int(2 ** num_addr_row) if num_addr_row > 2 else 0
        self.num_metal = self.num_nand + ((num_addr_row % 2) * 2) if num_addr_row > 2 else 0

        multiplier = 8 if parallel else 1
        min_size = 1e-6  # 单位: m

        self.width_inv_n = multiplier * min_size * self.feature_size
        self.width_inv_p = self.pn_ratio * self.width_inv_n

        self.width_nand_n = multiplier * 2 * min_size * self.feature_size
        self.width_nand_p = self.pn_ratio * self.width_nand_n / 2

        self.width_nor_n = multiplier * min_size * self.feature_size
        self.width_nor_p = multiplier * self.pn_ratio * min_size * self.feature_size * int(np.ceil(num_addr_row / 2))

        self.width_drv_inv_n = multiplier * 3 * min_size * self.feature_size
        self.width_drv_inv_p = self.pn_ratio * self.width_drv_inv_n / 3

        self.initialized = True
        print(f"[INFO] RowDecoder initialized: {num_addr_row}-bit decoder")

    def calculate_area(self):
        if not self.initialized:
            raise RuntimeError("RowDecoder not initialized.")

        gate_height = 2 * self.feature_size  # 例如 2 λ
        area_inv = gate_height * (self.width_inv_n + self.width_inv_p)
        area_nand = gate_height * (self.width_nand_n + self.width_nand_p)
        area_nor = gate_height * (self.width_nor_n + self.width_nor_p)
        area_drv_inv = gate_height * (self.width_drv_inv_n + self.width_drv_inv_p)

        total_area = (
            area_inv * self.num_inv +
            area_nand * self.num_nand +
            area_nor * self.num_nor +
            area_drv_inv * self.num_nor * 2  # 两个 INV 做输出驱动
        )

        return total_area * 1e12  # 转换成 μm²

    def calculate_latency(self, cap_load=2e-15, ramp_input=1e-10):
        if not self.initialized:
            raise RuntimeError("RowDecoder not initialized.")

        R_nand = 1e3  # 假设值 1kΩ
        C_nand = cap_load  # 外部负载

        tr = R_nand * C_nand
        beta = 0.4
        delay = tr * np.sqrt(np.log(1 + ramp_input / (beta * tr)))  # Horowitz 简化

        total_delay = delay * (1 + int(self.num_nand > 0) + int(self.num_nor > 0))
        return total_delay * 1e9  # 转换成 ns

    def calculate_power(self, activity=0.1, freq=1e9):
        C_total = self.num_inv * 2e-15 + self.num_nand * 3e-15 + self.num_nor * 4e-15
        E_dynamic = 0.5 * C_total * self.vdd ** 2
        P_dynamic = E_dynamic * freq * activity
        P_leakage = self.num_inv * 1e-9 + self.num_nand * 1.5e-9 + self.num_nor * 2e-9  # 假设静态功耗(nW)

        return {
            "dynamic_energy_pJ": E_dynamic * 1e12,
            "dynamic_power_mW": P_dynamic * 1e3,
            "leakage_power_uW": P_leakage * 1e6
        }




In [None]:
class LogicGateDecoder:
    def __init__(self, input_bits, output_lines):
        self.input_bits = input_bits
        self.output_lines = output_lines

    def calculate_area(self):
        # 假设每个 gate 占用面积约为 input_bits * 2 μm²
        return self.output_lines * self.input_bits * 2

    def calculate_latency(self):
        # 简化模型：latency ∝ 输入位数 × log(output_lines)
        return self.input_bits * 0.1 + np.log2(self.output_lines) * 0.05

    def calculate_power(self):
        # 假设动态能耗：每行切换电容0.2pF，V=1V
        energy_per_output = 0.5 * 0.2e-12 * 1**2
        total_energy = energy_per_output * self.output_lines
        return total_energy * 1e12  # pJ


In [None]:
class WLDriver:
    def __init__(self, num_lines):
        self.num_lines = num_lines

    def calculate_area(self):
        return self.num_lines * 4  # 每个driver 4μm²

    def calculate_latency(self):
        return 0.2  # ns，假设一个标准缓冲延迟

    def calculate_power(self):
        # 假设每次充电 0.4pF 电容，V=1V
        energy = 0.5 * 0.4e-12 * 1**2 * self.num_lines
        return energy * 1e12  # pJ


In [None]:
class HierarchicalDecoder:
    def __init__(self, address_bits):
        self.address_bits = address_bits
        self.total_lines = 2 ** address_bits

        # separate address bits into pre-decoder and local decoder bits
        self.pre_bits = address_bits // 2
        self.local_bits = address_bits - self.pre_bits

        self.num_blocks = 2 ** self.pre_bits
        self.block_lines = 2 ** self.local_bits

        # 构建 submodules
        self.predecoder = LogicGateDecoder(self.pre_bits, self.num_blocks)
        self.local_decoders = [
            LogicGateDecoder(self.local_bits, self.block_lines)
            for _ in range(self.num_blocks)
        ]
        self.drivers = [
            WLDriver(self.block_lines)
            for _ in range(self.num_blocks)
        ]

    def calculate_area(self):
        area = self.predecoder.calculate_area()
        area += sum(ld.calculate_area() + drv.calculate_area()
                    for ld, drv in zip(self.local_decoders, self.drivers))
        return area

    def calculate_latency(self):
        latency = self.predecoder.calculate_latency()
        latency += max(ld.calculate_latency() + drv.calculate_latency()
                       for ld, drv in zip(self.local_decoders, self.drivers))
        return latency

    def calculate_power(self):
        power = self.predecoder.calculate_power()
        power += sum(ld.calculate_power() + drv.calculate_power()
                     for ld, drv in zip(self.local_decoders, self.drivers))
        return power
