In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import os
import json
import pandas as pd

dir_path = 'autogptq_eval_result/deepseek-moe-16b-base'  # Directory containing the files

def extract_bits(filename):
    """Extract the bits part from the filename."""
    try:
        return filename.split("_w_bit_")[1].split("_pile")[0]
    except IndexError:
        return None

# Dictionary to hold all data, with bits as keys and metric dictionaries as values
all_data = {}

for filename in os.listdir(dir_path):
    if filename.startswith("eval_result_deepseek-moe-16b-base-gptq_w_bit_") and filename.endswith("_pile"):
        bits = extract_bits(filename)
        if bits:  # Ensure bits part was successfully extracted
            file_path = os.path.join(dir_path, filename)
            try:
                with open(file_path, 'r') as file:
                    content = json.load(file)
                    all_data[bits] = content
            except json.JSONDecodeError:
                # If JSON is invalid, skip this file
                continue

# Convert the collected data into a DataFrame
# This approach ensures all keys across files are considered
df = pd.DataFrame.from_dict(all_data, orient='index')

# Optionally, sort the DataFrame by index (bits) if required
# df.sort_index(inplace=True)

df.index.name = 'Bits'

# Save the DataFrame to a CSV file
csv_path = os.path.join(dir_path, 'evaluation_results_all.csv')
df.to_csv(csv_path)

print(f"CSV file has been saved to {csv_path}")


CSV file has been saved to autogptq_eval_result/deepseek-moe-16b-base/evaluation_results_all.csv


In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from argparse import ArgumentParser
import csv
import torch
from auto_gptq import moe_quantize_config

class parse_args:
    def __init__(self, bits):
        self.bits = bits
model = AutoModelForCausalLM.from_pretrained('deepseek-ai/deepseek-moe-16b-base', torch_dtype=torch.float16, trust_remote_code=True)

log_data = []

def extract_bits(filename):
    """Extract the bits part from the filename."""
    try:
        return filename.split("_w_bit_")[1].split("_pile")[0]
    except IndexError:
        return None

eval_bits = []

for filename in os.listdir('autogptq_eval_result/deepseek-moe-16b-base'):
    if filename.startswith("eval_result_deepseek-moe-16b-base-gptq_w_bit_") and filename.endswith("_pile"):
        bits = extract_bits(filename)
        eval_bits.append(bits)

print(f"len(eval_bits): {len(eval_bits)}")


for bits in eval_bits:
    args = parse_args(bits)
    
    deeepseek_bit = moe_quantize_config(args)
    
    total_bits_moe = 0
    total_params_moe = 0
    total_bits_self_attn = 0
    total_params_self_attn = 0
    total_bits = 0
    total_params = 0
    
    for name, module in model.named_modules():
        if hasattr(module, 'weight'):
            weight = module.weight.data
            num_params = weight.numel()  # Total number of parameters in the module
            
            if name in deeepseek_bit:
                bit = deeepseek_bit[name]
                total_bits += num_params * bit  # Accumulate total bits for all specified modules
                total_params += num_params
                
            if ('experts' in name or 'shared_experts' in name) and name in deeepseek_bit:
                bit = deeepseek_bit[name]
                total_bits_moe += num_params * bit
                total_params_moe += num_params
                
                # print(f'name {name} | bit {bit}')
                # print(f'total_bits_moe {total_bits_moe} | num_params {num_params} | bit {bit}')
            elif 'self_attn' in name and name in deeepseek_bit:
                bit = deeepseek_bit[name]
                total_bits_self_attn += num_params * bit
                total_params_self_attn += num_params
        
    
    average_bit_moe = total_bits_moe / total_params_moe if total_params_moe > 0 else 0
    average_bit_self_attn = total_bits_self_attn / total_params_self_attn if total_params_self_attn > 0 else 0
    average_bit = total_bits / total_params if total_params > 0 else 0
    print(f"Bits: {bits}")
    print(f"MoE Average Bit: {average_bit_moe}")
    print(f"Self-Attention Average Bit: {average_bit_self_attn}")
    print(f"Average Bit: {average_bit}")
    print('=========================')
    
    data = {
        "Bits": bits,
        "MoE Average Bit": average_bit_moe,
        "Self-Attention Average Bit": average_bit_self_attn,
        "Average Bit": average_bit
    }
    
    # Add the data to the list
    log_data.append(data)

fieldnames = ["Bits", "MoE Average Bit", "Self-Attention Average Bit", "Average Bit"]

# Open a CSV file to write the data
save_path = 'autogptq_eval_result/deepseek_bits_data.csv'
with open(save_path, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    # Write the header
    writer.writeheader()
    
    # Write the log data
    writer.writerows(log_data)

print(f"Log data has been saved to {save_path}.")


CUDA extension not installed.
CUDA extension not installed.


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

len(eval_bits): 44
Bits: moe.shared_2.other.4+other_block_4
MoE Average Bit: 3.9393939393939394
Self-Attention Average Bit: 4.0
Average Bit: 3.941434076362471
Bits: moe.shared_4.top30_8.other_2+other_block.8
MoE Average Bit: 4.787878787878788
Self-Attention Average Bit: 8.0
Average Bit: 4.89600604721097
Bits: moe.shared_4.top15_4.other_2+other_block.8
MoE Average Bit: 2.515151515151515
Self-Attention Average Bit: 8.0
Average Bit: 2.6997839108036383
Bits: moe.shared_8.top30_4.other_2+other_block.4
MoE Average Bit: 3.090909090909091
Self-Attention Average Bit: 4.0
Average Bit: 3.121511145437067
Bits: moe.shared_4.top2_4.other_2+other_block.4
MoE Average Bit: 2.121212121212121
Self-Attention Average Bit: 4.0
Average Bit: 2.1844563672366055
Bits: moe.shared_8.top2_8.other_2+other_block.4
MoE Average Bit: 2.3636363636363638
Self-Attention Average Bit: 4.0
Average Bit: 2.4187200617867206
Bits: moe.shared_8.top5_4.other_2+other_block.4
MoE Average Bit: 2.3333333333333335
Self-Attention Averag

61

Log data has been saved to autogptq_eval_result/deepseek_bits_data.csv.


In [10]:
import pandas as pd

df1 = pd.read_csv('/home/LeiFeng/xiaolong/moe_quantize/autogptq_eval_result/deepseek-moe-16b-base/deepseek_bits_data.csv')
df2 = pd.read_csv('/home/LeiFeng/xiaolong/moe_quantize/autogptq_eval_result/deepseek-moe-16b-base/evaluation_results_all.csv')

merged_df = pd.merge(df1, df2, on='Bits', how='outer')

merged_df.to_csv('/home/LeiFeng/xiaolong/moe_quantize/autogptq_eval_result/deepseek-moe-16b-base/evaluation_results_final.csv', index=False)


In [31]:
import torch
qweight = torch.tensor(
[[-4],[-6]]
, dtype=torch.int32)  


bits = 2
shifts = torch.arange(0, 32, bits)
iweights = torch.bitwise_right_shift(qweight[:, :, None], shifts[None, None, :]).to(
    torch.int8  # smallest dtype available
)
iweights = iweights.view(iweights.shape[0], -1)
iweights = torch.bitwise_and(iweights, (2**bits) - 1)
    
print(iweights)
    


tensor([[0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
        [2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]], dtype=torch.int8)


In [30]:
# intweight = torch.randint(low=0, high=256, size=shape, dtype=torch.int32)
# (intweight.shape[0], intweight.shape[1] // 32 * w_bit),

# intweight = torch.tensor([[168,  77,  94,  72],
#         [234, 254, 191, 123]], dtype=torch.int32)

import torch

shape = (2, 4)
w_bit = 2

intweight = torch.tensor([[168,  77,  94,  72] * (8//w_bit),
        [234, 254, 191, 123]*(8//w_bit)], dtype=torch.int32)


intweight.shape, intweight.max(), intweight.min()


qweight = torch.zeros(
    (intweight.shape[0], intweight.shape[1] * w_bit// 32 ),
    dtype=torch.int32,
    device=intweight.device,
)
qweight.shape
pack_num = 32 // w_bit

for col in range(intweight.shape[1] // pack_num):
    if w_bit == 4:
        order_map = [0, 2, 4, 6, 1, 3, 5, 7]
    elif w_bit == 8:
        order_map = list(range(4))  # Order map for 8-bit quantization
        print(f'order_map: {order_map}')
    elif w_bit == 2:
        order_map = list(range(pack_num))  # Order map for 2-bit quantization
    else:
        raise NotImplementedError("Only 2-bit and 4-bit quantizations are supported.")

    for i in range(pack_num):
        idx = col * pack_num + order_map[i]
        if idx < intweight.shape[1]:  # Check to avoid 'index out of range'
            qweight_col = intweight[:, idx]
            qweight[:, col] |= qweight_col << (i * w_bit)
print('qweight: ', qweight)       
    

(torch.Size([2, 16]),
 tensor(254, dtype=torch.int32),
 tensor(72, dtype=torch.int32))

torch.Size([2, 1])

qweight:  tensor([[-4],
        [-6]], dtype=torch.int32)


In [16]:
import torch

w_bit = 2

intweight = torch.tensor([[168,  77,  94,  72] * (8//w_bit),
        [234, 254, 191, 123]*(8//w_bit)], dtype=torch.int32)



intweight.shape, intweight.max(), intweight.min()


qweight = torch.zeros(
    (intweight.shape[0], intweight.shape[1] * w_bit// 32 ),
    dtype=torch.int32,
    device=intweight.device,
)
qweight.shape
pack_num = 32 // w_bit

for col in range(intweight.shape[1] // pack_num):
    if w_bit == 4:
        order_map = [0, 2, 4, 6, 1, 3, 5, 7]
    elif w_bit == 8:
        order_map = list(range(4))  # Order map for 8-bit quantization
        print(f'order_map: {order_map}')
    elif w_bit == 2:
        order_map = list(range(pack_num))  # Order map for 2-bit quantization
    else:
        raise NotImplementedError("Only 2-bit and 4-bit quantizations are supported.")

    for i in range(pack_num):
        idx = col * pack_num + order_map[i]
        if idx < intweight.shape[1]:  # Check to avoid 'index out of range'
            qweight_col = intweight[:, idx]
            qweight[:, col] |= qweight_col << (i * w_bit)
print('qweight: ', qweight)    
print('intweight: ', intweight)

shifts = torch.arange(0, 32, w_bit)

iweights = torch.bitwise_right_shift(qweight[:, :, None], shifts[None, None, :]).to(
    torch.uint8  
)
iweights = iweights.view(iweights.shape[0], -1)
print('iweights: ', iweights)

qweight:  tensor([[-4],
        [-6]], dtype=torch.int32)
intweight:  tensor([[168,  77,  94,  72, 168,  77,  94,  72, 168,  77,  94,  72, 168,  77,
          94,  72],
        [234, 254, 191, 123, 234, 254, 191, 123, 234, 254, 191, 123, 234, 254,
         191, 123]], dtype=torch.int32)
iweights:  tensor([[252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
         255, 255],
        [250, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
         255, 255]], dtype=torch.uint8)


In [None]:
shifts = torch.arange(0, 32, bits, device=qweight.device)
masks = torch.tensor([0xFF << shift for shift in shifts], device=qweight.device)

# Unpack weights
iweights = torch.bitwise_and(qweight[:, :, None], masks[None, None, :])
iweights = torch.bitwise_right_shift(iweights, shifts[None, None, :])
iweights = iweights.to(torch.int8)
iweights = iweights.view(iweights.shape[0], -1)


In [None]:
# Adjust the data type to avoid overflow
qweight = torch.tensor([
[[1214139816],
        [2076180202]]
], dtype=torch.int64)  # Using 64-bit integer to prevent overflow

from bitstring import Bits

def to_2bit_binary(num):
    # Ensure the number is within the valid range for 2-bit representation
    if num < 0 or num > 3:
        raise ValueError("Number must be between 0 and 3 for 2-bit representation")
    
    # Convert the number to a 2-bit binary string
    binary_str = Bits(uint=num, length=2).bin
    
    return binary_str


# Convert each element in the tensor to binary strings
qweight_binary = [[bin(val.item()) for val in row] for row in qweight]

qweight_binary

In [14]:
import torch

# Original intweight tensor
intweight = torch.tensor([[168,  77,  94,  72],
                          [234, 254, 191, 123]], dtype=torch.int32)

# Packed qweight tensor
qweight = torch.tensor([[1214139816],
                        [2076180202]], dtype=torch.int32)

# Unpacking logic correction
w_bit = 8
shifts = torch.arange(0, 32, w_bit)
mask = (1 << w_bit) - 1  # Create a mask to isolate w_bit bits

# Unpack qweight
iweights_corrected = torch.bitwise_and(torch.bitwise_right_shift(qweight[:, :, None], shifts[None, None, :]), mask).to(torch.int8)

# Interpret the values as signed int8
iweights_corrected = iweights_corrected.view(iweights_corrected.shape[0], -1)

iweights_corrected, intweight

(tensor([[-88,  77,  94,  72],
         [-22,  -2, -65, 123]], dtype=torch.int8),
 tensor([[168,  77,  94,  72],
         [234, 254, 191, 123]], dtype=torch.int32))

In [32]:
import torch

w_bit = 2

intweight = torch.tensor([[1,2,3,0] * (8//w_bit),
        [0,1,2,3]*(8//w_bit)], dtype=torch.int32)

intweight.shape, intweight.max(), intweight.min()


qweight = torch.zeros(
    (intweight.shape[0], intweight.shape[1] * w_bit// 32 ),
    dtype=torch.int32,
    device=intweight.device,
)
qweight.shape
pack_num = 32 // w_bit

for col in range(intweight.shape[1] // pack_num):
    if w_bit == 4:
        order_map = [0, 2, 4, 6, 1, 3, 5, 7]
    elif w_bit == 8:
        order_map = list(range(4))  # Order map for 8-bit quantization
        print(f'order_map: {order_map}')
    elif w_bit == 2:
        order_map = list(range(pack_num))  # Order map for 2-bit quantization
    else:
        raise NotImplementedError("Only 2-bit and 4-bit quantizations are supported.")

    for i in range(pack_num):
        idx = col * pack_num + order_map[i]
        if idx < intweight.shape[1]:  # Check to avoid 'index out of range'
            qweight_col = intweight[:, idx]
            qweight[:, col] |= qweight_col << (i * w_bit)
print('qweight: ', qweight)    
print('intweight: ', intweight)


qweight_shape = qweight.shape  

intweight_unpacked = torch.zeros((qweight_shape[0], qweight_shape[1] * pack_num), dtype=torch.int32)

for col in range(qweight_shape[1]):
    for i in range(pack_num):
        # Calculate the mask to isolate the bits for the current value
        mask = (1 << w_bit) - 1
        mask = mask << (i * w_bit)
        
        # Extract the bits for the current value
        extracted_bits = (qweight[:, col] & mask) >> (i * w_bit)
        
        # Place the extracted bits in the correct position in intweight_unpacked
        intweight_unpacked[:, col * pack_num + i] = extracted_bits

print('intweight_unpacked: ', intweight_unpacked)

qweight:  tensor([[ 960051513],
        [-454761244]], dtype=torch.int32)
intweight:  tensor([[1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0],
        [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]], dtype=torch.int32)
intweight_unpacked:  tensor([[ 1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0],
        [ 0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2, -1]],
       dtype=torch.int32)


In [29]:
import torch

def split_int32_to_int2(num):
    int2_nums = torch.zeros(16, dtype=torch.int8)
    for i in range(16):
        int2_num = (num >> (30 - i * 2)) & 0b11
        int2_nums[i] = int2_num
    return int2_nums

def split_tensor_to_int2(tensor):
    # Preallocate the tensor to hold the results
    # tensor.shape[0] is the number of elements, 16 is the new size for each element
    results_shape = tensor.shape + (16,)  # Adjust according to your specific shape
    int2_tensor = torch.zeros(results_shape, dtype=torch.int8)
    
    # Iterate through each element to apply the function
    for i in range(tensor.size(0)):
        for j in range(tensor.size(1)):  # Assuming 2D tensor for simplicity; adjust as needed
            int2_tensor[i, j] = split_int32_to_int2(tensor[i, j])
    
    return int2_tensor

# Define the tensor
qweight = torch.tensor([[960051513], [-454761244]], dtype=torch.int32)

# Apply the function to the tensor
int2_qweight = split_tensor_to_int2(qweight)

print(int2_qweight)


tensor([[[0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1]],

        [[3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0]]], dtype=torch.int8)


In [31]:
import torch

def split_int32_to_int2(tensor):
    # Create a tensor of shift values
    shifts = torch.arange(30, -2, -2, dtype=torch.int32).reshape(1, -1)
    # Create the mask by shifting 0b11 to the right positions
    mask = 3 << shifts  # Broadcasted shift operation
    
    # Apply the mask and shift the results to the right
    int2_tensor = (tensor.unsqueeze(-1) & mask) >> shifts
    
    return int2_tensor

# Example usage
intweight = torch.tensor([[1, 2, 3, 0] * (8//2),
                          [0, 1, 2, 3] * (8//2)], dtype=torch.int32)
print
# Convert to 16 int2 numbers
int2_intweight = split_int32_to_int2(intweight)

# int2_intweight will have a shape of (original_shape, 16) representing the 2-bit values
print('Original int32 tensor:', intweight)
print('Original int32 tensor:', intweight.shape)
print('Converted to 16 int2 values:', int2_intweight)
print('Shape of converted tensor:', int2_intweight.shape)


Original int32 tensor: tensor([[1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0],
        [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]], dtype=torch.int32)
Original int32 tensor: torch.Size([2, 16])
Converted to 16 int2 values: tensor([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
  

In [4]:
import os

def list_directories(path):
    try:
        # 使用 os.listdir 获取目录内容
        directory_contents = os.listdir(path)
        # 使用 os.path.isdir 判断哪些是目录
        directories = {item for item in directory_contents if os.path.isdir(os.path.join(path, item))}
        return set(directories)
    except Exception as e:
        print(f"Error: {e}")
        return set()

# 替换此路径为你的目标目录路径
path = '/home/LeiFeng/xiaolong/moe_quantize/autogptq_deepseek-ai'
directories = list_directories(path)
print(directories)


{'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+endlayer_25', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+startlayer_25', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+startlayer_10', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+endlayer_20', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+startlayer_15', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+endlayer_27', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+endlayer_5', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+endlayer_10', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+startlayer_27', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+endlayer_15', 'deepseek-moe-16b-base-gptq_w_bit_moe.shared_8.top25_4.other_2+other_block.4+start