In [5]:
import pandas as pd
import numpy as np
import os

# ================= 配置 =================
DATA_DIR = '../../data/' 
FILES = [
    "4-1-overlapping_cluster_heatmap_Intensity.csv",
    "4-1-overlapping_cluster_heatmap_Breadth.csv"
]

# ================= 逻辑处理 =================

def process_csv_normalization(filename):
    input_path = os.path.join(DATA_DIR, filename)
    # 生成输出路径，例如: 4-1-overlapping_cluster_heatmap_Intensity_norm.csv
    output_path = os.path.join(DATA_DIR, filename.replace(".csv", "_norm.csv"))

    if not os.path.exists(input_path):
        print(f"警告: 找不到文件 {input_path}")
        return

    # 1. 读取数据 (第一列作为索引)
    df = pd.read_csv(input_path, index_col=0)
    
    # 2. 取对数操作 log1p
    # log1p(x) = log(1 + x)，防止 log(0) 报错，且适合处理这种 0-15 的小整数分布
    df_log = np.log1p(df)
    
    # 3. 0-1 归一化 (Min-Max Scaling)
    # 使用全矩阵的最大最小值，保持热图各单元格间的相对强度
    matrix_min = df_log.values.min()
    matrix_max = df_log.values.max()
    
    if matrix_max - matrix_min != 0:
        df_norm = (df_log - matrix_min) / (matrix_max - matrix_min)
    else:
        df_norm = df_log * 0.0
        
    # 4. 保存结果
    # 保持原始的行索引和列名
    df_norm.to_csv(output_path)
    print(f"已生成归一化文件: {output_path}")

# ================= 执行 =================
if __name__ == "__main__":
    for file in FILES:
        process_csv_normalization(file)

已生成归一化文件: ../../data/4-1-overlapping_cluster_heatmap_Intensity_norm.csv
已生成归一化文件: ../../data/4-1-overlapping_cluster_heatmap_Breadth_norm.csv
