In [1]:
import numpy as np
import random
import json

class RandomAllocation:
    def __init__(self, bits, layersizes, R, original_bit):
        self.bits = bits
        self.layersizes = layersizes
        self.R = R
        self.original_bit = original_bit
        self.target_bit = self._calculate_target_bit()

    def _calculate_target_bit(self):
        return self.original_bit * self.R

    def _validate_allocation(self, allocation):
        total_size = sum(self.layersizes)
        weighted_sum = sum(layer_size * bit for layer_size, bit in zip(self.layersizes, allocation))
        average_bit = weighted_sum / total_size
        return average_bit <= self.target_bit

    def generate_allocation(self):
        while True:
            allocation = [random.choice(self.bits) for _ in range(len(self.layersizes))]
            if self._validate_allocation(allocation):
                return allocation

In [None]:
import time

def load_json(file):
    with open(file, 'r', encoding='utf-8') as f:
        json_data = json.load(f)
    data = []
    for id, block in enumerate(json_data):
        for key, value in json_data[block].items():
            data.append(value)
    return np.array(data)

if __name__ == "__main__":
    bits = [3,4,8]
    layersizes = load_json('/root/autodl-tmp/methods/mix_quantize/model_info/llama2-7b/LayersParams.json')
    R = 0.25
    original_bit = 16
    
    random_allocator = RandomAllocation(bits, layersizes, R, original_bit)

    start_time = time.time()

    # 生成随机分配
    allocation = random_allocator.generate_allocation()
    print("随机分配结果:", allocation)

    print("总耗时：",time.time() - start_time)

# 压缩率计划 + 组内随机分配
* 压缩率计划的目标是生成一个满足压缩率要求的位宽分配计划，例如：
{size0: {16: 2, 4: 8}, size1: {8: 6, 4: 2}}。
    * 当前这部分的随机不好操作，使用线性规划可以求解，但是求解的结果通常是固定的

* 在这个计划的基础上，组内随机分配会对每个目标大小的层进行随机位宽分配，确保每个大小级别的层严格遵循位宽分配计划，同时增加分配的随机性。

In [1]:
import random
from collections import defaultdict

class RandomAllocation:
    def __init__(self, bits, layersizes, R, original_bit, seed=None):
        self.bits = bits
        self.layersizes = layersizes
        self.R = R
        self.original_bit = original_bit
        self.seed = seed
        if self.seed is not None:
            random.seed(self.seed)
        else:
            self.seed = random.randint(0,1000)
            print(self.seed)
            random.seed(self.seed) 
        self.target_bit = self._calculate_target_bit()
        self.size_groups = self._group_layers_by_size()

    def _calculate_target_bit(self):
        return self.original_bit * self.R

    def _group_layers_by_size(self):
        size_groups = defaultdict(int)
        for size in self.layersizes:
            size_groups[size] += 1
        return size_groups

    def _allocate_for_size_group(self, size, num_layers):
        allocation = {bit: 0 for bit in self.bits}
        remaining_layers = num_layers
        remaining_budget = size * num_layers * self.target_bit
    
        # 随机生成初始分配方案
        while remaining_layers > 0 and remaining_budget > 0:
            # 随机选择一个位宽
            bit = random.choice(self.bits)
            # 计算该位宽可以分配的最大层数
            max_possible = min(remaining_layers, remaining_budget // (size * bit))
            if max_possible > 0:
                # 随机分配一定数量的层
                assigned = random.randint(1, max_possible)
                allocation[bit] += assigned
                remaining_layers -= assigned
                remaining_budget -= size * bit * assigned

        # 在初始方案基础上进行随机调整
        max_iterations = 100  # 防止无限循环
        for _ in range(max_iterations):
            # 随机选择两个位宽进行调整
            bit1, bit2 = random.sample(self.bits, 2)
            if allocation[bit1] > 0:
                # 计算调整后的预算变化
                delta = size * (bit2 - bit1)
                if remaining_budget + delta >= 0:
                    # 调整位宽分配
                    allocation[bit1] -= 1
                    allocation[bit2] += 1
                    remaining_budget += delta
    
        return allocation

    def _generate_size_group_allocation(self):
        size_group_allocation = {}
        for size, num_layers in self.size_groups.items():
            size_group_allocation[size] = self._allocate_for_size_group(size, num_layers)
        return size_group_allocation

    def _generate_final_allocation(self, size_group_allocation):
        final_allocation = []
        for size in self.layersizes:
            # 过滤掉分配数为 0 的位宽
            valid_bits = [bit for bit, num in size_group_allocation[size].items() if num > 0]
            valid_weights = [size_group_allocation[size][bit] for bit in valid_bits]
            # print(valid_bits,valid_weights)
            # 随机选择一个位宽
            bit = random.choices(valid_bits, weights=valid_weights)[0]
            final_allocation.append(bit)
            # 更新分配数目
            size_group_allocation[size][bit] -= 1
            if size_group_allocation[size][bit] == 0:
                del size_group_allocation[size][bit]
        return final_allocation

    def generate_allocation(self):
        # 第一阶段：按大小级别分配位宽数目
        size_group_allocation = self._generate_size_group_allocation()
        print(size_group_allocation)
        # 第二阶段：生成最终的位宽分配
        final_allocation = self._generate_final_allocation(size_group_allocation)
        return final_allocation

In [2]:
import time
import numpy as np
import json

def load_json(file):
    with open(file, 'r', encoding='utf-8') as f:
        json_data = json.load(f)
    data = []
    for id, block in enumerate(json_data):
        for key, value in json_data[block].items():
            data.append(value)
    return np.array(data)

if __name__ == "__main__":
    bits = [3,4,8]
    layersizes = load_json('/root/autodl-tmp/methods/mix_quantize/model_info/llama2-7b/LayersParams.json')
    R = 0.25
    original_bit = 16
    
    random_allocator = RandomAllocation(bits, layersizes, R, original_bit)

    start_time = time.time()

    # 生成随机分配
    allocation = random_allocator.generate_allocation()
    print("随机分配结果:", allocation)

    print("总耗时：",time.time() - start_time)

3
{3: 38, 4: 86, 8: 4}
201326592.0
{3: 82, 4: 5, 8: 9}
1848639488.0
{16777216: {3: 36, 4: 91, 8: 1}, 45088768: {3: 71, 4: 20, 8: 5}}
随机分配结果: [4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 3, 3, 8, 3, 4, 4, 4, 4, 3, 3, 3, 4, 4, 3, 4, 3, 4, 3, 8, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 3, 8, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 4, 3, 3, 4, 3, 4, 4, 4, 3, 3, 3, 4, 3, 4, 4, 3, 3, 3, 3, 4, 3, 4, 4, 3, 4, 4, 4, 4, 3, 3, 3, 4, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 4, 4, 3, 4, 3, 4, 3, 3, 4, 4, 4, 3, 4, 3, 3, 3, 3, 4, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 3, 3, 4, 3, 4, 4, 4, 4, 3, 3, 3, 4, 4, 3, 4, 3, 3, 4, 3, 4, 4, 4, 3, 3, 3, 4, 3, 4, 4, 3, 3, 8, 4, 3, 4, 4, 3, 3, 3, 3, 4, 4, 3, 8, 4, 3, 4, 4, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 4, 3]
总耗时： 0.003949880599975586


In [25]:
import time

def load_json(file):
    with open(file, 'r', encoding='utf-8') as f:
        json_data = json.load(f)
    data = []
    for id, block in enumerate(json_data):
        for key, value in json_data[block].items():
            data.append(value)
    return np.array(data)

if __name__ == "__main__":
    bits = [3,4,8]
    layersizes = load_json('/root/autodl-tmp/methods/mix_quantize/model_info/llama2-7b/LayersParams.json')
    R = 0.25
    original_bit = 16
    
    random_allocator = RandomAllocation(bits, layersizes, R, original_bit,0)

    start_time = time.time()

    # 生成随机分配
    allocation = random_allocator.generate_allocation()
    print("随机分配结果:", allocation)

    print("总耗时：",time.time() - start_time)

随机分配结果: [8, 8, 3, 3, 8, 3, 8, 3, 3, 8, 8, 3, 3, 8, 8, 3, 8, 8, 8, 8, 3, 8, 8, 8, 3, 3, 3, 8, 8, 8, 3, 8, 3, 8, 8, 3, 8, 3, 8, 8, 3, 3, 8, 3, 3, 8, 3, 8, 3, 8, 8, 3, 3, 3, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 3, 8, 3, 8, 3, 3, 3, 8, 8, 3, 3, 8, 8, 8, 8, 8, 8, 8, 3, 8, 3, 8, 8, 8, 8, 8, 3, 3, 8, 8, 8, 8, 3, 8, 3, 8, 8, 3, 8, 3, 3, 8, 3, 8, 3, 3, 8, 3, 3, 8, 3, 8, 3, 8, 3, 3, 3, 3, 8, 3, 3, 8, 3, 3, 3, 8, 8, 3, 3, 3, 3, 3, 8, 3, 3, 3, 3, 3, 8, 3, 3, 8, 8, 8, 3, 8, 8, 3, 8, 8, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 8, 3, 8, 3, 8, 3, 3, 3, 3, 8, 3, 3, 3, 8, 8, 8, 8, 3, 3, 8, 3, 3, 3, 3, 8, 3, 8, 8, 3, 8, 8, 3, 8, 3, 8, 3, 3, 8, 3, 8, 8, 8, 8, 3, 3, 8, 8, 8, 3, 8, 8, 3, 3, 3, 3, 8, 3, 8]
总耗时： 0.0008046627044677734


# 优化组间位宽分配计划函数

In [16]:
class RandomAllocation:
    def __init__(self, bits, layersizes, R, original_bit, seed=None):
        self.bits = sorted(bits, reverse=True)  # 按高位宽优先排序
        self.layersizes = layersizes
        self.R = R
        self.original_bit = original_bit
        self.seed = seed
        if self.seed is not None:
            random.seed(self.seed)
        
        # 计算目标约束
        self.target_bit = original_bit * R
        self.size_groups = self._group_layers()

    def _group_layers(self):
        """将层按大小分组，返回格式 {size: count}"""
        groups = defaultdict(int)
        for size in self.layersizes:
            groups[size] += 1
        return dict(groups)

    def _allocate_group(self, size, num_layers):
        max_iterations = 1000
        best_allocation = None
        min_violation = float('inf')
        target_budget = size * num_layers * self.target_bit


        for _ in range(max_iterations):
            allocation = defaultdict(int)
            remaining_layers = num_layers
            remaining_budget = target_budget

            # 阶段1：基于权重随机分配
            candidate_bits = self.bits.copy()
            while remaining_layers > 0 and len(candidate_bits) > 0:
                # 动态调整权重：高位宽概率更高但保留随机性
                weights = [bit**2 for bit in candidate_bits]  # 高位宽权重更高
                chosen_bit = random.choices(candidate_bits, weights=weights, k=1)[0]

                # 计算最大可分配层数
                max_possible = min(
                    remaining_layers,
                    remaining_budget // (size * chosen_bit)
                )
                if max_possible <= 0:
                    candidate_bits.remove(chosen_bit)
                    continue

                # 随机分配层数
                assigned = random.randint(1, max_possible)
                allocation[chosen_bit] += assigned
                remaining_layers -= assigned
                remaining_budget -= size * chosen_bit * assigned

            # 阶段2：强制修正
            if remaining_layers > 0:
                # 尝试用最低位宽填充剩余层数
                min_bit = min(self.bits)
                max_possible = remaining_budget // (size * min_bit)
                if max_possible >= remaining_layers:
                    allocation[min_bit] += remaining_layers
                    remaining_budget -= size * min_bit * remaining_layers
                    remaining_layers = 0
            

            # 记录最优解
            if remaining_layers == 0 and remaining_budget >= 0:
                return dict(allocation)
            else:
                violation = abs(remaining_budget) + 1000 * remaining_layers
                if violation < min_violation:
                    best_allocation = dict(allocation)
                    min_violation = violation

        if best_allocation is not None:
            return best_allocation
        else:
            return {min(self.bits): num_layers}

    def generate_allocation(self):
        """生成最终分配方案"""
        # 第一阶段：按大小分组分配
        group_allocation = {}
        for size, count in self.size_groups.items():
            allocation = self._allocate_group(size, count)
            group_allocation[f"size{size}"] = allocation  # 使用size值作为key标识
        print(group_allocation)

        # 第二阶段：生成最终分配列表（保持原顺序）
        final_allocation = []
        for size in self.layersizes:
            alloc = group_allocation[f"size{size}"]
            available_bits = [k for k, v in alloc.items() if v > 0]
            weights = [alloc[bit] for bit in available_bits]
            chosen_bit = random.choices(available_bits, weights=weights, k=1)[0]
            final_allocation.append(chosen_bit)
            alloc[chosen_bit] -= 1  # 更新分配数

        return final_allocation

In [17]:
import time

def load_json(file):
    with open(file, 'r', encoding='utf-8') as f:
        json_data = json.load(f)
    data = []
    for id, block in enumerate(json_data):
        for key, value in json_data[block].items():
            data.append(value)
    return np.array(data)

if __name__ == "__main__":
    bits = [3,4,8]
    layersizes = load_json('/root/autodl-tmp/methods/mix_quantize/model_info/llama2-7b/LayersParams.json')
    R = 0.25
    original_bit = 16
    seed = 0

    start_time = time.time()
    allocator = RandomAllocation(bits, layersizes, R, original_bit, seed)
    final_alloc = allocator.generate_allocation()
    
    # 打印最终分配列表
    print("\n最终位宽分配列表:", final_alloc)
    print("总耗时：",time.time() - start_time)

0.0
16777216.0
0.0
16777216.0
0.0
16777216.0
0.0
0.0
33554432.0
0.0
0.0
16777216.0
0.0
0.0
0.0
33554432.0
16777216.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
405798912.0
{'size16777216': {8: 21, 4: 22, 3: 85}, 'size45088768': {4: 17, 3: 65, 8: 14}}

最终位宽分配列表: [4, 8, 4, 3, 3, 4, 8, 3, 8, 3, 3, 8, 3, 8, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 8, 4, 8, 4, 3, 3, 8, 8, 3, 3, 3, 8, 3, 3, 3, 3, 3, 8, 3, 8, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 4, 8, 3, 3, 3, 3, 3, 8, 3, 8, 4, 3, 4, 4, 3, 3, 3, 3, 3, 3, 8, 3, 3, 3, 8, 3, 3, 4, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 8, 8, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 8, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 8, 4, 3, 4, 8, 8, 3, 8, 8, 4, 4, 3, 4, 3, 3, 8, 3, 3, 3, 3, 3, 8, 8, 4, 3, 3, 3, 4, 8, 4, 3, 3, 8, 3, 4, 4, 3, 3, 3, 3, 8, 8, 3, 4, 3, 3, 3, 3, 4, 3, 4, 4, 3, 3, 3, 8, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3]
总耗时： 0.008603572845458984
