In [6]:
# region for 模型指纹保存流程：
## 1. 计算0层O层的B*A矩阵（从PT文件中读取出B、A），记结果为O矩阵
## 2. uuid映射O矩阵
## 3. 状态机筛选
## 4. 映射三维坐标点 + DBSCAN聚类
## 5. 保存模型指纹
# endregion

In [2]:
# 第一步：计算0层O层的B*A矩阵
import torch
from prettytable import PrettyTable

def calculate_O_matrix(lora_path):
    # 加载LoRA权重文件
    lora_data = torch.load(lora_path, map_location=torch.device('cpu'))
    
    # 获取A矩阵和B矩阵
    lora_A = lora_data['model.layers.0.self_attn.o_proj.lora_A.default.weight']
    lora_B = lora_data['model.layers.0.self_attn.o_proj.lora_B.default.weight']
    # table_A = PrettyTable()

    # # 添加列
    # table_A.field_names = [f"Col {i}" for i in range(lora_A.size(1))]
    # # 添加行
    # for row in lora_A:
    #     table_A.add_row(row.numpy())
    # # 打印表格
    # print(table_A)
    # table_B = PrettyTable()

    # # 添加列
    # table_B.field_names = [f"Col {i}" for i in range(lora_B.size(1))]
    # # 添加行
    # for row in lora_B:
    #     table_B.add_row(row.numpy())
    # # 打印表格
    # print(table_B)


    # 打印A矩阵和B矩阵的形状
    print("A矩阵形状:", lora_A.shape)
    print("B矩阵形状:", lora_B.shape)
    
    # 计算O矩阵 (B*A)
    O_matrix = torch.matmul(lora_B, lora_A)
    
    # 打印O矩阵的形状
    print("O矩阵形状:", O_matrix.shape)
    
    # 保存O矩阵到文件
    torch.save(O_matrix, 'O_matrix.pt')
    
    return O_matrix

lora_weights_path = "./lora_weights.pt"  # 替换为实际的LoRA权重文件路径
O_matrix = calculate_O_matrix(lora_weights_path)
print(O_matrix)
O_matrix_subset = O_matrix[:512, :512]
table_O = PrettyTable()

# 添加列
table_O.field_names = [f"Col {i}" for i in range(O_matrix_subset.size(1))]
# 添加行
for row in O_matrix_subset:
    table_O.add_row(row.numpy())
# 打印表格
print(table_O)

  lora_data = torch.load(lora_path, map_location=torch.device('cpu'))


A矩阵形状: torch.Size([8, 2048])
B矩阵形状: torch.Size([2048, 8])
O矩阵形状: torch.Size([2048, 2048])
tensor([[ 4.4091e-05,  7.6450e-05, -1.3066e-04,  ...,  8.9990e-05,
         -6.9886e-05,  1.3933e-04],
        [ 9.5208e-05, -3.9194e-05, -2.5941e-04,  ..., -1.2387e-06,
         -1.1460e-04,  7.1963e-05],
        [-4.8367e-05, -3.9669e-05,  1.5975e-04,  ..., -2.8503e-05,
         -1.2687e-04,  2.1239e-04],
        ...,
        [-3.6304e-05,  8.5102e-05, -4.1491e-05,  ..., -6.3448e-05,
         -1.0415e-04,  1.3462e-04],
        [ 1.7444e-05, -4.5880e-05, -2.5448e-04,  ..., -4.9901e-05,
          4.4638e-06,  1.6786e-05],
        [-1.0263e-04, -4.6868e-05,  1.7759e-04,  ..., -7.3710e-05,
          1.3802e-05, -3.8382e-05]])
+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-------

In [3]:
# 第二步：uuid映射O矩阵
import numpy as np

def split_O_matrix(O_matrix):
    """将2048*2048的O矩阵分成四个1024*1024的子矩阵"""
    size = O_matrix.shape[0] // 2
    O1 = O_matrix[:size, :size]
    O2 = O_matrix[:size, size:]
    O3 = O_matrix[size:, :size]
    O4 = O_matrix[size:, size:]
    return O1, O2, O3, O4

def uuid_to_binary(uuid_str):
    """将UUID字符串转换为128位二进制，并分成4个32位"""
    # 移除所有破折号并转换为二进制
    uuid_int = int(uuid_str, 16)
    uuid_bin = format(uuid_int, '0128b')
    # 分成4个32位
    return [uuid_bin[i:i+32] for i in range(0, 128, 32)]

def extract_vectors(O_part, uuid_part):
    """从O矩阵的一个部分提取三个向量"""
    # 提取前30位，分成3个10位
    indices = [int(uuid_part[i:i+10], 2) - 1 for i in range(0, 30, 10)]
    # 获取第31位作为方向标志
    is_column = bool(int(uuid_part[30]))
    
    vectors = []
    for idx in indices:
        if is_column:
            # 选择列向量并转置为1*1024
            vector = O_part[:, idx].reshape(1, -1)
        else:
            # 选择行向量
            vector = O_part[idx, :].reshape(1, -1)
        vectors.append(vector)
    
    return vectors

def process_matrix(O_matrix, uuid):
    # 1. 分割O矩阵
    O1, O2, O3, O4 = split_O_matrix(O_matrix)
    
    # 2. 将UUID转换为二进制并分割
    uuid_parts = uuid_to_binary(uuid)
    
    # 3. 从每个O矩阵部分提取向量
    O_parts = [O1, O2, O3, O4]
    all_vectors = []
    
    for O_part, uuid_part in zip(O_parts, uuid_parts):
        vectors = extract_vectors(O_part, uuid_part)
        all_vectors.extend(vectors)
    
    # 4. 重组向量为x, y, z
    x = torch.cat([all_vectors[0], all_vectors[3], all_vectors[6], all_vectors[9]], dim=0)
    y = torch.cat([all_vectors[1], all_vectors[4], all_vectors[7], all_vectors[10]], dim=0)
    z = torch.cat([all_vectors[2], all_vectors[5], all_vectors[8], all_vectors[11]], dim=0)
    
    return x, y, z

# 使用示例
uuid = "c1d0f855d0f841d00c1d0000000000c1"
x, y, z = process_matrix(O_matrix, uuid)

# 打印结果形状
print("x形状:", x.shape)
print("y形状:", y.shape)
print("z形状:", z.shape)

# # 保存结果
# torch.save({
#     'x': x,
#     'y': y,
#     'z': z
# }, 'xyz_vectors.pt')
print(x)

x形状: torch.Size([4, 1024])
y形状: torch.Size([4, 1024])
z形状: torch.Size([4, 1024])
tensor([[-7.6536e-05,  2.4611e-05,  1.4360e-04,  ...,  1.0015e-04,
         -1.1621e-05, -4.8980e-05],
        [ 5.7914e-05, -4.7265e-06,  1.4002e-04,  ..., -1.4388e-04,
          3.8581e-05, -1.3787e-04],
        [-1.5803e-04,  4.0164e-05,  2.6462e-04,  ...,  2.0340e-04,
          2.3261e-05, -2.1709e-05],
        [-1.7949e-04, -4.2026e-05, -1.1179e-04,  ..., -7.3710e-05,
          1.3802e-05, -3.8382e-05]])


In [10]:
# 第三步：状态机筛选
import random

## 获取状态机初始状态
def get_remaining_bits(uuid_str):
    """从UUID的四个部分中提取最后一位并拼接成4位二进制"""
    # 使用之前的函数将UUID转换为二进制并分段
    uuid_parts = uuid_to_binary(uuid_str)
    
    # 从每个32位部分取最后一位（即第31位，因为索引从0开始）
    remaining_bits = ''.join(part[31] for part in uuid_parts)
    
    return remaining_bits

four_bits = get_remaining_bits(uuid)
# print(f"剩余的4位二进制: {four_bits}")

## 定义混沌状态机
class NonlinearFSM:
    def __init__(self):
        self.phi = (1 + 5 ** 0.5) / 2
        self.e = 2.718281828459045
        self.pi = 3.141592653589793
        self.sqrt2 = 2 ** 0.5

    def next_state(self, current_state):
        # 保持状态值为小数类型
        x = current_state
        logistic = 4 * x * (1 - x)
        tent = min(2 * x, 2 * (1 - x))
        sine = abs(np.sin(self.pi * x))
        cubic = abs((3 * x ** 3 - 2 * x) % 1)
        mixed = (logistic * self.phi + tent * self.e + sine * self.pi + cubic * self.sqrt2) % 1
        disturbed = (mixed + x * self.phi) % 1
        return disturbed  # 返回一个小数状态值

    def iterate_and_find_period(self, initial_state, max_iterations=2048):
        current_state = initial_state
        seen_states = {}
        results = []
        for iteration in range(max_iterations):
            # 使用当前状态的小数形式
            if current_state in seen_states:
                period_length = iteration - seen_states[current_state]
                return results, period_length
            seen_states[current_state] = iteration
            current_state = self.next_state(current_state)
            results.append(current_state)
        return results, None

# 将 four_bits（4位二进制）当作 0 到 1 之间的小数
decimal_value = int(four_bits, 2) / 16  # 将4位二进制转为0到1之间的小数
fsm = NonlinearFSM()
initial_state = decimal_value  # 使用小数值作为初始状态
results_float, period_length = fsm.iterate_and_find_period(initial_state, 2048)
def threshold_results(results):
    return [1 if x > 0.5 else 0 for x in results]
thresholded_results = threshold_results(results_float)
def convert_to_decimal(thresholded_results):
    """
    将 thresholded_results 中相邻的两个二进制值转换为十进制，生成新的序列
    :param thresholded_results: 输入的二进制序列（每个元素为 0 或 1）
    :return: 处理后的十进制序列，长度为 1024，每个元素范围为 0~3
    """
    decimal_results = []
    for i in range(0, len(thresholded_results) // 2):  # 每次取两个相邻值
        # 取两个相邻的值并转换为二进制
        bin_value = thresholded_results[i] * 2  + thresholded_results[i + len(thresholded_results) // 2]  # 转换为二进制表示
        decimal_results.append(bin_value)  # 转换为十进制并保存
    
    return decimal_results

# 示例
decimal_results = convert_to_decimal(thresholded_results)
def accept(accept_set=None, original_list=None):
    return [accept_set[element] for element in original_list]
results = accept([1, 2, 4, 8], decimal_results)
print(results)  
print("--------------------------------")

def convert_to_binary_matrix(numbers, num_rows=4):
    """
    将数字列表转换为二进制矩阵
    numbers: 输入的数字列表 [1,8,4,2,...]
    num_rows: 输出矩阵的行数（默认为4）
    """
    # 创建一个全零矩阵
    num_cols = len(numbers)
    binary_matrix = torch.zeros((num_rows, num_cols))
    
    # 对每个数字进行转换
    for col, num in enumerate(numbers):
        # 根据数字设置对应位置的1
        if num == 1:
            binary_matrix[3, col] = 1  # 0001
        elif num == 2:
            binary_matrix[2, col] = 1  # 0010
        elif num == 4:
            binary_matrix[1, col] = 1  # 0100
        elif num == 8:
            binary_matrix[0, col] = 1  # 1000
            
    return binary_matrix

binary_matrix = convert_to_binary_matrix(results)
# # 打印筛选矩阵
# from prettytable import PrettyTable
# results_subset = binary_matrix[:, :128]
# print(results_subset.shape)
# table_results = PrettyTable()

# # 添加列
# table_results.field_names = [f"Col {i}" for i in range(results_subset.size(1))]
# # 添加行
# for row in results_subset:
#     table_results.add_row(row.numpy())
# # 打印表格
# print(table_results)
# print("++++++++++++++++++++++++++++++++++++")

## 正式筛选
def filter_matrix(input_matrix, binary_matrix):
    """
    根据binary_matrix对input_matrix进行筛选
    input_matrix: 输入矩阵 (4*1024)
    binary_matrix: 筛选矩阵 (4*1024)，每列只有一个1
    return: 筛选后的向量 (1*1024)
    """
    # 创建结果向量
    result = torch.zeros(1, input_matrix.shape[1])
    
    # 对每一列进行筛选
    for col in range(input_matrix.shape[1]):
        # 找到binary_matrix当前列中1的位置
        row = torch.where(binary_matrix[:, col] == 1)[0].item()
        # 将对应位置的值放入结果向量
        result[0, col] = input_matrix[row, col]
    
    return result

# 对x,y,z分别进行筛选
new_x = filter_matrix(x, binary_matrix)
new_y = filter_matrix(y, binary_matrix)
new_z = filter_matrix(z, binary_matrix)
print(new_x)
x_subset = new_x[:, :128]
table_x = PrettyTable()

# 添加列
table_x.field_names = [f"Col {i}" for i in range(x_subset.size(1))]
# 添加行
for row in x_subset:
    table_x.add_row(row.numpy())
# 打印表格
print(table_x)


y_subset = new_y[:, :128]
table_y = PrettyTable()

# 添加列
table_y.field_names = [f"Col {i}" for i in range(y_subset.size(1))]
# 添加行
for row in y_subset:
    table_y.add_row(row.numpy())
# 打印表格
print(table_y)


z_subset = new_z[:, :128]
table_z = PrettyTable()

# 添加列
table_z.field_names = [f"Col {i}" for i in range(z_subset.size(1))]
# 添加行
for row in z_subset:
    table_z.add_row(row.numpy())
# 打印表格
print(table_z)

# 打印结果形状
print("筛选后x形状:", new_x.shape)
print("筛选后y形状:", new_y.shape)
print("筛选后z形状:", new_z.shape)

# 可以打印部分结果验证
print("\n筛选后x的前几个元素:")
print(new_x[0, :10])

[4, 8, 8, 2, 1, 8, 2, 1, 8, 2, 1, 1, 2, 1, 2, 1, 4, 4, 4, 2, 1, 1, 8, 8, 2, 8, 8, 8, 8, 8, 1, 2, 1, 1, 8, 8, 4, 1, 2, 8, 1, 4, 2, 2, 4, 1, 2, 4, 1, 2, 8, 2, 4, 8, 4, 1, 2, 4, 8, 1, 8, 1, 8, 1, 4, 4, 1, 8, 1, 2, 4, 1, 4, 8, 1, 8, 8, 8, 8, 2, 8, 2, 2, 8, 2, 8, 2, 1, 1, 8, 8, 1, 8, 1, 4, 2, 2, 8, 8, 4, 2, 4, 2, 4, 1, 2, 4, 8, 4, 4, 2, 4, 4, 8, 2, 8, 4, 4, 4, 8, 4, 4, 1, 1, 4, 8, 8, 2, 1, 8, 1, 1, 8, 1, 4, 4, 2, 1, 4, 8, 4, 1, 4, 1, 8, 8, 8, 8, 2, 1, 1, 2, 8, 4, 8, 4, 4, 8, 8, 4, 8, 1, 4, 1, 2, 4, 1, 4, 1, 2, 4, 1, 4, 2, 1, 4, 8, 2, 2, 4, 2, 2, 2, 8, 1, 2, 2, 1, 1, 8, 8, 8, 1, 2, 2, 4, 4, 2, 4, 8, 2, 2, 1, 8, 4, 1, 2, 8, 2, 1, 2, 8, 1, 8, 2, 2, 2, 1, 4, 2, 8, 8, 1, 4, 1, 2, 1, 2, 2, 2, 2, 8, 8, 2, 4, 4, 4, 4, 1, 1, 1, 8, 4, 8, 4, 2, 4, 4, 4, 4, 2, 4, 8, 8, 2, 8, 1, 4, 4, 4, 8, 2, 8, 1, 4, 2, 8, 1, 4, 4, 1, 4, 2, 4, 4, 2, 1, 1, 2, 2, 2, 8, 8, 8, 4, 1, 2, 2, 2, 1, 4, 1, 8, 1, 4, 8, 2, 4, 8, 4, 1, 8, 8, 1, 8, 8, 8, 4, 8, 2, 8, 2, 8, 8, 1, 4, 1, 1, 1, 2, 8, 4, 4, 2, 8, 4, 2, 2, 2, 8, 2, 4, 8, 

In [1]:
# 第四步：映射到三维空间 + DBSCAN聚类
import numpy as np
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import torch

def cluster_and_visualize(x, y, z, scale=10000, eps=1.0, min_samples=5):
    """
    对三维点进行DBSCAN聚类并可视化
    x, y, z: 1*1024的向量
    scale: 坐标放大倍数
    eps: DBSCAN的邻域半径参数
    min_samples: DBSCAN的最小样本数参数
    """
    # 数据预处理：放大坐标
    x_scaled = x.numpy().flatten() * scale
    y_scaled = y.numpy().flatten() * scale
    z_scaled = z.numpy().flatten() * scale
    
    # 将数据组织成点的形式 (1024, 3)
    points = np.vstack((x_scaled, y_scaled, z_scaled)).T
    
    # 应用DBSCAN聚类
    clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(points)
    labels = clustering.labels_
    
    # 获取聚类数量（不包括噪声点）
    unique_labels = set(labels)
    n_clusters = len(unique_labels) - (1 if -1 in labels else 0)
    print(f'聚类数量: {n_clusters}')
    
    # 计算每个簇的详细信息
    cluster_info = {}
    for label in unique_labels:
        mask = labels == label
        cluster_points = points[mask]
        
        # 计算簇的统计信息
        info = {
            'size': len(cluster_points),  # 簇中点的数量
            'center': np.mean(cluster_points, axis=0),  # 簇中心
            'std': np.std(cluster_points, axis=0),  # 标准差
            'min': np.min(cluster_points, axis=0),  # 最小值
            'max': np.max(cluster_points, axis=0),  # 最大值
            'density': len(cluster_points) / (np.prod(np.max(cluster_points, axis=0) - np.min(cluster_points, axis=0)) + 1e-10),  # 密度
            'points': cluster_points  # 保存簇中的所有点
        }
        
        # 计算点到中心的平均距离
        distances = np.linalg.norm(cluster_points - info['center'], axis=1)
        info['avg_distance_to_center'] = np.mean(distances)
        info['max_distance_to_center'] = np.max(distances)
        
        cluster_name = 'noise' if label == -1 else f'cluster_{label}'
        cluster_info[cluster_name] = info
        
        # 打印簇的信息
        print(f"\n{cluster_name.upper()} 信息:")
        print(f"点数: {info['size']}")
        print(f"中心坐标: ({info['center'][0]:.2f}, {info['center'][1]:.2f}, {info['center'][2]:.2f})")
        print(f"标准差: ({info['std'][0]:.2f}, {info['std'][1]:.2f}, {info['std'][2]:.2f})")
        print(f"平均到中心距离: {info['avg_distance_to_center']:.2f}")
        print(f"最大到中心距离: {info['max_distance_to_center']:.2f}")
        print(f"密度: {info['density']:.2e}")
    
    # 创建3D图
    fig = plt.figure(figsize=(12, 12))
    ax = fig.add_subplot(111, projection='3d')
    
    # 为不同的簇使用不同的颜色
    colors = plt.cm.rainbow(np.linspace(0, 1, n_clusters))
    
    # 绘制聚类结果和簇中心
    for i, label in enumerate(unique_labels):
        if label == -1:
            # 噪声点
            mask = labels == label
            ax.scatter(points[mask, 0], points[mask, 1], points[mask, 2], 
                      c='black', marker='x', label='Noise', alpha=0.5)
        else:
            # 簇中的点
            mask = labels == label
            cluster = points[mask]
            ax.scatter(cluster[:, 0], cluster[:, 1], cluster[:, 2],
                      c=[colors[i]], label=f'Cluster {label}', alpha=0.6)
            
            # 绘制簇中心
            center = cluster_info[f'cluster_{label}']['center']
            ax.scatter(center[0], center[1], center[2],
                      c=[colors[i]], marker='*', s=300, 
                      label=f'Center {label}', edgecolor='black')
    
    # 设置图形属性
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    ax.set_title('DBSCAN Clustering Results')
    plt.legend()
    
    # 保存图形
    plt.savefig('clustering_result.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # 保存详细的聚类结果
    clustering_result = {
        'points': points,
        'labels': labels,
        'n_clusters': n_clusters,
        'cluster_info': cluster_info,
        'params': {
            'scale': scale,
            'eps': eps,
            'min_samples': min_samples
        }
    }
    np.save('clustering_result.npy', clustering_result)
    
    return clustering_result

# 使用函数
clustering_result = cluster_and_visualize(new_x, new_y, new_z, 
                                        scale=10000,  # 坐标放大10000倍
                                        eps=0.3,      # 可以根据放大后的坐标调整
                                        min_samples=5)

KeyboardInterrupt: 

In [None]:
# 第五步：保存模型指纹
# 直接保存上面那个clustering_result
## 现在只有一簇，是不是需要试一下其他UUID？