In [3]:
import numpy as np

# 读取npy文件
file_path = "/home/wuwujian/LXY/sensor_process/seedLLM/npydata/con1con1Sensor/con1con1Sensor_test_y.npy"
try:
    labels = np.load(file_path)
except FileNotFoundError:
    print(f"错误：未找到文件 {file_path}")
    exit(1)
except Exception as e:
    print(f"读取文件出错：{e}")
    exit(1)

# 确保标签是一维数组（通常标签应为一维）
if labels.ndim != 1:
    print(f"警告：标签数组维度为 {labels.ndim}，将尝试展平为一维")
    labels = labels.flatten()

# 获取总样本数和唯一类别
total_samples = len(labels)
unique_classes = np.unique(labels)
print(f"总样本数：{total_samples}")
print(f"唯一类别：{unique_classes}\n")

# 检查是否按类别分块排序
current_class = labels[0]
block_start = 0
blocks = []  # 存储每个块的信息 (类别, 起始索引, 结束索引)
is_block_sorted = True

for i in range(1, total_samples):
    if labels[i] != current_class:
        # 记录当前块
        blocks.append((current_class, block_start, i-1))
        # 检查后续是否还有之前的类别出现（如果有则说明不是分块排序）
        if current_class in labels[i:]:
            is_block_sorted = False
        # 更新当前类别和起始索引
        current_class = labels[i]
        block_start = i

# 记录最后一个块
blocks.append((current_class, block_start, total_samples-1))

# 输出结果
print("类别块信息（类别, 起始索引, 结束索引）：")
for idx, (cls, start, end) in enumerate(blocks):
    print(f"块 {idx+1}: 类别={cls}, 范围=[{start}, {end}], 样本数={end - start + 1}")

print(f"\n是否按类别分块排序：{'是' if is_block_sorted else '否'}")

if not is_block_sorted:
    print("提示：存在类别在多个块中出现（非连续分块）")

总样本数：480
唯一类别：[1 2 3 4]

类别块信息（类别, 起始索引, 结束索引）：
块 1: 类别=4, 范围=[0, 0], 样本数=1
块 2: 类别=2, 范围=[1, 2], 样本数=2
块 3: 类别=4, 范围=[3, 4], 样本数=2
块 4: 类别=2, 范围=[5, 5], 样本数=1
块 5: 类别=1, 范围=[6, 6], 样本数=1
块 6: 类别=2, 范围=[7, 7], 样本数=1
块 7: 类别=1, 范围=[8, 8], 样本数=1
块 8: 类别=4, 范围=[9, 9], 样本数=1
块 9: 类别=3, 范围=[10, 10], 样本数=1
块 10: 类别=1, 范围=[11, 12], 样本数=2
块 11: 类别=4, 范围=[13, 13], 样本数=1
块 12: 类别=3, 范围=[14, 15], 样本数=2
块 13: 类别=1, 范围=[16, 16], 样本数=1
块 14: 类别=2, 范围=[17, 17], 样本数=1
块 15: 类别=4, 范围=[18, 20], 样本数=3
块 16: 类别=1, 范围=[21, 21], 样本数=1
块 17: 类别=4, 范围=[22, 22], 样本数=1
块 18: 类别=2, 范围=[23, 23], 样本数=1
块 19: 类别=4, 范围=[24, 24], 样本数=1
块 20: 类别=1, 范围=[25, 26], 样本数=2
块 21: 类别=3, 范围=[27, 27], 样本数=1
块 22: 类别=4, 范围=[28, 28], 样本数=1
块 23: 类别=1, 范围=[29, 29], 样本数=1
块 24: 类别=3, 范围=[30, 30], 样本数=1
块 25: 类别=1, 范围=[31, 31], 样本数=1
块 26: 类别=3, 范围=[32, 32], 样本数=1
块 27: 类别=1, 范围=[33, 33], 样本数=1
块 28: 类别=4, 范围=[34, 34], 样本数=1
块 29: 类别=3, 范围=[35, 35], 样本数=1
块 30: 类别=2, 范围=[36, 36], 样本数=1
块 31: 类别=1, 范围=[37, 38], 样本数=2
块 32: 类别=4, 范围=[