# Pos-hoc Batch Analysis
合并多个数据集的结果，分析神经元编码变量的相关性

In [None]:
import sys
sys.path.append(r"D:\data analysis\code\WBI_analysis")  # 例如 r"C:\Users\YourName\Project"
import AnalysisMethod as am
import pandas as pd
import os
import matplotlib.cm as cm
import matplotlib as mpl
import numpy as np
import WBIFunctions as WBI
from datetime import datetime
from matplotlib_venn import venn2, venn3
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import shutil
from sklearn.cluster import AgglomerativeClustering
from upsetplot import UpSet, from_contents
from matplotlib.colors import ListedColormap, BoundaryNorm
from matplotlib.gridspec import GridSpec

## 韦恩图

In [None]:
def plot_eventnote_neuron_sets(df, neuron_col="Neuron", event_col="EventNote",
                                title='',event_order = [],
                                color_dict = {},save_path=None):
    """
    根据 EventNote 将神经元集合可视化:
    - 如果 EventNote 种类 <=3, 使用韦恩图
    - 否则, 使用 UpSet plot
    """
    # 按事件分组，取神经元集合
    event_groups = df.groupby(event_col)[neuron_col].apply(set).to_dict()
    event_notes = list(event_groups.keys())
    

    print("=== 统计信息 ===")
    total_neurons = len(set().union(*event_groups.values()))
    print(f"总神经元数: {total_neurons}")
    for ev, s in event_groups.items():
        print(f"{ev}: {len(s)} ({len(s)/total_neurons:.1%})")
    
    # 绘图
    if len(event_notes) == 2:
        set1, set2 = event_groups[event_notes[0]], event_groups[event_notes[1]]
        plt.figure(figsize=(10,8))
        if len(color_dict):
            color1 = color_dict[event_notes[0]]
            color2 = color_dict[event_notes[1]]
            venn2([set1, set2], set_labels=event_notes, set_colors=[color1, color2])
        else:
            venn2([set1, set2], set_labels=event_notes)
        plt.title(title)
        
    elif len(event_notes) == 3:
        set1, set2, set3 = event_groups[event_notes[0]], event_groups[event_notes[1]], event_groups[event_notes[2]]
        plt.figure(figsize=(10,8))
        if len(color_dict):
            color1 = color_dict[event_notes[0]]
            color2 = color_dict[event_notes[1]]
            color3 = color_dict[event_notes[2]]
            venn3([set1, set2, set3], set_labels=event_notes, set_colors=[color1, color2,color3])
        else:
            venn3([set1, set2, set3], set_labels=event_notes)
        plt.title(title)
        
    else:
        if len(event_order):
            
            event_groups_ordered = {k: event_groups[k] for k in event_order if k in event_groups}
            print(event_groups_ordered)
            upset_data = from_contents(event_groups_ordered)
        else:
            upset_data = from_contents(event_groups)

        # 创建 UpSet 对象并绘制
        up = UpSet(upset_data, show_counts=True, show_percentages=False,
                   sort_by='degree', 
        orientation='horizontal')
        up.plot()

        # 总数（所有神经元去重后）
        total = len(set().union(*event_groups.values()))

        # 访问左侧条形图的 Axes（通常是第一个）
        fig = plt.gcf()
        print(len(fig.axes))
        ax_left = fig.axes[-2]
        # 遍历文字对象，替换为 "数量 (比例%)"
        for txt in ax_left.texts:
            count = int(txt.get_text())  # 原始数量
            perc = 100 * count / total
            txt.set_text(f"{count} ({perc:.1f}%)")
            txt.set_fontsize(13)
            txt.set_color("black")
        # 修改上方条形图
        ax_top = fig.axes[-1]
        # 修改 xlabel
        ax_top.set_ylabel("# of Neuron", fontsize=15, labelpad=0)

        # 也可以修改 xtick label（旋转、改颜色等）
        # plt.setp(ax_top.get_xticklabels(), rotation=45, ha="right", fontsize=9, color="blue")
        plt.title(title+f'(n={total})', fontsize=20, pad=15)
    if save_path:
        print(save_path)
        plt.savefig(save_path+'\\'+title+'.png',bbox_inches='tight', facecolor='white')
    else:
        plt.show()

In [None]:
def plot_eventnote_neuron_sets_2(df, neuron_col="Neuron", event_col="EventNote",
                                title='',event_order = [],
                                color_dict = {},save_path=None):
    """
    根据 EventNote 将神经元集合可视化:
    - 如果 EventNote 种类 <=3, 使用韦恩图
    - 否则, 使用 UpSet plot
    """
    # 按事件分组，取神经元集合
    event_groups = df.groupby(event_col)[neuron_col].apply(set).to_dict()
    event_notes = list(event_groups.keys())
    

    print("=== 统计信息 ===")
    total_neurons = len(set().union(*event_groups.values()))
    print(f"总神经元数: {total_neurons}")
    for ev, s in event_groups.items():
        print(f"{ev}: {len(s)} ({len(s)/total_neurons:.1%})")
    
    # 绘图

    if len(event_order):
        
        event_groups_ordered = {k: event_groups[k] for k in event_order if k in event_groups}
        print(event_groups_ordered)
        upset_data = from_contents(event_groups_ordered)
    else:
        upset_data = from_contents(event_groups)

    # 创建 UpSet 对象并绘制
    up = UpSet(upset_data, show_counts=True, show_percentages=False,
                sort_by='degree', 
    orientation='horizontal')
    up.plot()

    # 总数（所有神经元去重后）
    total = len(set().union(*event_groups.values()))

    # 访问左侧条形图的 Axes（通常是第一个）
    fig = plt.gcf()
    print(len(fig.axes))
    ax_left = fig.axes[-2]
    # 遍历文字对象，替换为 "数量 (比例%)"
    for txt in ax_left.texts:
        count = int(txt.get_text())  # 原始数量
        perc = 100 * count / total
        txt.set_text(f"{count} ({perc:.1f}%)")
        txt.set_fontsize(13)
        txt.set_color("black")
    # 修改上方条形图
    ax_top = fig.axes[-1]
    # 修改 xlabel
    ax_top.set_ylabel("# of Neuron", fontsize=15, labelpad=0)

    # 也可以修改 xtick label（旋转、改颜色等）
    # plt.setp(ax_top.get_xticklabels(), rotation=45, ha="right", fontsize=9, color="blue")
    plt.title(title+f'(n={total})', fontsize=20, pad=15)
    if save_path:
        print(save_path)
        plt.savefig(save_path+'\\'+title+'.png',bbox_inches='tight', facecolor='white')
    else:
        plt.show()

In [None]:
p_fs = [r'Y:\\SZX\\2025_wbi_analysis\\good_WBI\\done',r'Y:\\SZX\\2025_wbi_analysis\\good_WBI\\to_do_q']
sub_folder_path = []
for p_f_all in p_fs:
    sub_folder_path  += [os.path.join(p_f_all,f) for f in os.listdir(p_f_all) if os.path.isdir(p_f_all+'\\'+f)]

In [None]:
event_order = ['Velocity', 'Speed','CTX','Angular Velocity','Curvature','Forw-Rev','RevStart','RevEnd', 'Turn', 'TurnStart','TurnEnd']

In [None]:
df_p_cr = pd.read_csv(r'Y:\\SZX\\2025_wbi_analysis\\good_WBI\\done\\20250115_4.5g-24d-ov_08\\2025-10-23-16_AnalysisFigs\\CorrAnalysis\\20250115_4.5g-24d-ov_08_corr_p_cor.csv')
    
# df_p_cr['EventNote'] = df_p_cr['EventNote'].fillna(df_p_cr['Event'])
df_p_cr.loc[(df_p_cr['true_r'].abs()>=0.3) & (df_p_cr['p_cor']<=0.05), 'sign']  = 1
df_p_cr.loc[(df_p_cr['true_r'].isna()) & (df_p_cr['p_cor']<=0.05), 'sign']  = 1
df_p_cr_sign = df_p_cr[df_p_cr['sign']==1]
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='curvature','EventNote'] = 'Curvature'
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='turn_pc','EventNote'] = 'Turn'
df_p_cr_sign.loc[df_p_cr_sign['EventNote'] == 'turn_cor','EventNote'] = 'Turn'
df_p_cr_sign.loc[df_p_cr_sign['EventNote'] == 'forward','EventNote'] = 'Forw-Rev'
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='CoilingStart','EventNote'] = 'TurnStart'
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='CoilingEnd','EventNote'] = 'TurnEnd'
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_velocity','EventNote'] = 'Velocity'
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_speed','EventNote'] = 'Speed'
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_ang','EventNote'] = 'Angular Velocity'
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_CTX','EventNote'] = 'CTX'
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='OmegaStart','EventNote'] = 'TurnStart'
df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='OmegaEnd','EventNote'] = 'TurnEnd'

In [None]:
df_p_cr_sign

In [None]:
s_path = r'Y:\\SZX\\2025_wbi_analysis\\good_WBI'+'\\251026vennplot'
os.makedirs(s_path, exist_ok=True)
# 合并所有的子df，满足条件的收集
# df_p_sign_all = []
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    fig_folder = [f for f in os.listdir(f_path) if 'AnalysisFigs' in f]

    folders = [
        f for f in os.listdir(f_path)
        if 'AnalysisFigs' in f and os.path.isdir(os.path.join(f_path, f))
    ]
    if not folders:
        # raise FileNotFoundError("No folder containing 'AnalysisFigs' found.")
        continue
    # 步骤2：获取每个文件夹的最后修改时间
    folder_with_time = [
        (f, os.path.getmtime(os.path.join(f_path, f)))  # getmtime 返回时间戳
        for f in folders
    ]
    # 步骤3：按修改时间降序排序，取最新的
    latest_folder = max(folder_with_time, key=lambda x: x[1])[0]
    file_name = f+'_corr_p_cor.csv'
    file_path = f_path+'\\'+latest_folder+'\\CorrAnalysis\\'+file_name
    print(file_path)
    try:
        df_p_cr = pd.read_csv(file_path)
    except:
        continue
    df_p_cr['EventNote'] = df_p_cr['EventNote'].fillna(df_p_cr['Event'])
    df_p_cr.loc[(df_p_cr['true_r'].abs()>=0.3) & (df_p_cr['p_cor']<=0.05), 'sign']  = 1
    df_p_cr.loc[(df_p_cr['true_r'].isna()) & (df_p_cr['p_cor']<=0.05), 'sign']  = 1
    df_p_cr_sign = df_p_cr[df_p_cr['sign']==1]
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='curvature','EventNote'] = 'Curvature'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='turn_pc','EventNote'] = 'Turn'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote'] == 'turn_cor','EventNote'] = 'Turn'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote'] == 'forward','EventNote'] = 'Forw-Rev'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='CoilingStart','EventNote'] = 'TurnStart'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='CoilingEnd','EventNote'] = 'TurnEnd'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_velocity','EventNote'] = 'Velocity'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_speed','EventNote'] = 'Speed'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_ang','EventNote'] = 'Angular Velocity'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_CTX','EventNote'] = 'CTX'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='OmegaStart','EventNote'] = 'TurnStart'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='OmegaEnd','EventNote'] = 'TurnEnd'
    # df_p_cr_sign = df_p_cr_sign[df_p_cr_sign['EventNote'] != 'OmegaStart']
    
    # df_p_sign_all.append(df_p_cr_sign)

    # colors_ls = ['#60A5FA',"#DD8452",'#55A868',"#FDFB74" ,'#8B5CF6','#819CA9']
    # color_dict_1 = {'Speed':colors_ls[0],'Velocity':colors_ls[1], 'CTX':colors_ls[2],'RevStart':colors_ls[3]}
    plot_eventnote_neuron_sets_2(df_p_cr_sign, neuron_col="Neuron", 
                               event_col="EventNote",event_order=event_order,
                               color_dict={}, save_path=s_path, title=f)
    # 可视化亚群
    
    # df_svc = df_p_cr_sign[df_p_cr_sign['EventNote'].isin(['Speed', 'Velocity','CTX'])]
    # plot_eventnote_neuron_sets(df_svc, neuron_col="Neuron", event_col="EventNote", title=f+'_spdvelctx',color_dict=color_dict_1, save_path = s_path)
    # df_svrs = df_p_cr_sign[df_p_cr_sign['EventNote'].isin(['Speed', 'Velocity','RevStart'])]
    # plot_eventnote_neuron_sets(df_svrs, neuron_col="Neuron", event_col="EventNote", title=f+'_spdvelrevstart',color_dict=color_dict_1, save_path = s_path)
    # df_cvrs = df_p_cr_sign[df_p_cr_sign['EventNote'].isin(['CTX', 'Velocity','RevStart'])]
    # plot_eventnote_neuron_sets(df_cvrs, neuron_col="Neuron", event_col="EventNote", title=f+'_ctxvelrevstart',color_dict=color_dict_1, save_path = s_path)

# 汇总分析

In [None]:
p_fs = [r'Y:\\SZX\\2025_wbi_analysis\\good_WBI\\done',r'Y:\\SZX\\2025_wbi_analysis\\good_WBI\\to_do_q']
sub_folder_path = []
for p_f_all in p_fs:
    sub_folder_path  += [os.path.join(p_f_all,f) for f in os.listdir(p_f_all) if os.path.isdir(p_f_all+'\\'+f)]

In [None]:
# 合并所有的子df，满足条件的收集
df_p_sign_all = []
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    fig_folder = [f for f in os.listdir(f_path) if 'AnalysisFigs' in f]

    folders = [
        f for f in os.listdir(f_path)
        if 'AnalysisFigs' in f and os.path.isdir(os.path.join(f_path, f))
    ]
    if not folders:
        # raise FileNotFoundError("No folder containing 'AnalysisFigs' found.")
        continue
    # 步骤2：获取每个文件夹的最后修改时间
    folder_with_time = [
        (f, os.path.getmtime(os.path.join(f_path, f)))  # getmtime 返回时间戳
        for f in folders
    ]
    # 步骤3：按修改时间降序排序，取最新的
    latest_folder = max(folder_with_time, key=lambda x: x[1])[0]
    file_name = f+'_corr_p_cor.csv'
    file_path = f_path+'\\'+latest_folder+'\\CorrAnalysis\\'+file_name
    print(file_path)
    try:
        df_p_cr = pd.read_csv(file_path)
    except:
        continue
    df_p_cr['EventNote'] = df_p_cr['EventNote'].fillna(df_p_cr['Event'])
    df_p_cr.loc[(df_p_cr['true_r'].isna()) & (df_p_cr['p_cor']<=0.05), 'sign']  = 1
    df_p_cr.loc[(df_p_cr['true_r'].abs()>=0.3) & (df_p_cr['p_cor']<=0.05), 'sign']  = 1
    df_p_cr_sign = df_p_cr[df_p_cr['sign']==1]
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='curvature','EventNote'] = 'Curvature'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='turn_pc','EventNote'] = 'Turn'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='turn_cor','EventNote'] = 'Turn'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='forward','EventNote'] = 'Forw-Rev'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='CoilingStart','EventNote'] = 'TurnStart'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_velocity','EventNote'] = 'Velocity'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_speed','EventNote'] = 'Speed'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_ang','EventNote'] = 'Angular Velocity'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_CTX','EventNote'] = 'CTX'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='OmegaStart','EventNote'] = 'TurnStart'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='OmegaEnd','EventNote'] = 'TurnEnd'
    # df_p_cr_sign = df_p_cr_sign[df_p_cr_sign['EventNote'] != 'OmegaStart']
    # df_p_cr_sign = df_p_cr_sign[df_p_cr_sign['EventNote'] != 'turn_cor']
    df_p_sign_all.append(df_p_cr_sign)
df_sign_all = pd.concat(df_p_sign_all,axis=0)
event_ls = df_sign_all['EventNote'].unique()

In [None]:
event_order_color = ['Velocity', 'Speed','CTX','Angular Velocity','Curvature','Forw-Rev','RevStart','RevEnd', 'Turn', 'TurnStart','TurnEnd']
# 获取 colormap 对象（新 API）
cmap = plt.colormaps['tab10']  # 或 'viridis', 'Set3' 等
# 根据事件数量生成颜色字典
# color_dict = {event: cmap(i / len(event_order_color)) for i, event in enumerate(event_order_color)}

tol_bright_12 = [
        "#332288",  # dark blue
        "#88CCEE",  # cyan
        "#44AA99",  # teal
        "#117733",  # green
        "#AA4499",  # purple
        "#6699CC",  # sky blue
        "#888888",  # gray
        "#661100",  # brown
        "#DDCC77",  # sand
        "#999933",  # olive
        "#882255",  
        "#CC6677",  # rose
    ]

color_dict = {event: tol_bright_12[i % len(tol_bright_12)] for i, event in enumerate(event_order_color)}
event_order = ['Velocity', 'Speed','CTX','Angular Velocity','Curvature','Forw-Rev','RevStart','RevEnd', 'Turn', 'TurnStart','TurnEnd']

In [None]:
# 统计每个 File 下每个 EventNote 的数量
counts = df_sign_all.groupby(['File','EventNote']).size().unstack(fill_value=0)
counts = counts[event_order]  # 按指定顺序

# 绘制堆叠柱状图
files = counts.index
bottom = [0]*len(files)

plt.figure(figsize=(20,6))
ax = plt.gca()
ax.set_axisbelow(True)  # ✅ 网格在柱子下方
plt.grid(linestyle='dashed', color='grey',lw=2, alpha=0.3, which='major', axis='y',zorder=0)
for event in event_order:
    plt.bar(files, counts[event], bottom=bottom, color=color_dict[event], label=event)
    # 更新底部位置
    bottom = [b + c for b, c in zip(bottom, counts[event])]

# plt.xlabel('File')
plt.ylabel('# of Neurons',fontsize=18)
new_labels = [f.split("_")[0]+'_'+ str(int(f.split("_")[-1])) for f in files]
plt.xticks(ticks=range(len(files)), labels=new_labels, rotation=45, fontsize=15, ha='right')
plt.tick_params(axis='y', labelsize = 15)
plt.title('')
plt.legend(title='Variable', loc='upper right', bbox_to_anchor = (1.16,0.9),fontsize = 15)
plt.ylim([0,200])
plt.tight_layout()
plt.show()

### 对每种变量的检出比例作图

In [None]:


# ====== 合并所有子 df（你的原始部分保持不变）======
df_p_sign_all = []
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    folders = [
        f for f in os.listdir(f_path)
        if 'AnalysisFigs' in f and os.path.isdir(os.path.join(f_path, f))
    ]
    if not folders:
        continue
    folder_with_time = [
        (f, os.path.getmtime(os.path.join(f_path, f)))
        for f in folders
    ]
    latest_folder = max(folder_with_time, key=lambda x: x[1])[0]
    file_name = f + '_corr_p_cor.csv'
    file_path = os.path.join(f_path, latest_folder, 'CorrAnalysis', file_name)
    print(file_path)
    try:
        df_p_cr = pd.read_csv(file_path)
    except:
        continue
    df_p_cr['EventNote'] = df_p_cr['EventNote'].fillna(df_p_cr['Event'])
    df_p_cr.loc[(df_p_cr['true_r'].isna()) & (df_p_cr['p_cor'] <= 0.05), 'sign'] = 1
    df_p_cr.loc[(df_p_cr['true_r'].abs() >= 0.3) & (df_p_cr['p_cor'] <= 0.05), 'sign'] = 1
    df_p_cr_sign = df_p_cr[df_p_cr['sign'] == 1]

    # 统一事件名称
    mapping = {
        'curvature': 'Curvature',
        'turn_pc': 'Turn',
        'turn_cor': 'Turn',
        'forward': 'Forw-Rev',
        'CoilingStart': 'TurnStart',
        'sm_velocity': 'Velocity',
        'sm_speed': 'Speed',
        'sm_ang': 'Angular Velocity',
        'sm_CTX': 'CTX',
        'OmegaStart': 'TurnStart',
        'OmegaEnd': 'TurnEnd'
    }
    df_p_cr_sign['EventNote'] = df_p_cr_sign['EventNote'].replace(mapping)
    df_p_sign_all.append(df_p_cr_sign)

df_sign_all = pd.concat(df_p_sign_all, axis=0)

# ====== 绘图部分 ======

event_order = [
    'Velocity', 'Speed', 'CTX', 'Angular Velocity',
    'Curvature', 'Forw-Rev', 'RevStart', 'RevEnd',
    'Turn', 'TurnStart', 'TurnEnd'
]

tol_bright_12 = [
    "#332288", "#88CCEE", "#44AA99", "#117733",
    "#AA4499", "#6699CC", "#888888", "#661100",
    "#DDCC77", "#999933", "#882255", "#CC6677"
]

color_dict = {event: tol_bright_12[i % len(tol_bright_12)] for i, event in enumerate(event_order)}

# 统计每个文件下每种事件的数量
counts = df_sign_all.groupby(['File', 'EventNote']).size().unstack(fill_value=0)
# 计算每个文件总计
counts['total'] = counts.sum(axis=1)
# 计算比例
proportions = counts[event_order].div(counts['total'], axis=0)

# 绘制每个事件的折线图


files = proportions.index
new_labels = [f.split("_")[0] + '_' + str(int(f.split("_")[-1])) for f in files]
x = range(len(files))

for i, event in enumerate(event_order):
    plt.figure(figsize=(20, 12))
    plt.subplot(len(event_order), 1, i + 1)
    y = proportions[event]
    mean_y = y.mean()
    plt.plot(x, y, color='black', linewidth=1.5, zorder=1)  # 黑色折线
    plt.axhline(mean_y, color='red', linestyle='--', linewidth=2, alpha=0.8, zorder=0)
    plt.scatter(x, y, color=color_dict[event], s=80, edgecolors='k', zorder=2)  # 圆点
    plt.xticks(ticks=x, labels=new_labels, rotation=45, ha='right', fontsize=12)
    plt.ylabel('Proportion', fontsize=14)
    plt.title(event+' mean:'+str(round(mean_y,2)), fontsize=16, color=color_dict[event])
    plt.grid(linestyle='dashed', color='grey', lw=1, alpha=0.4, axis='y')
    # plt.ylim(0, 1)
    plt.tight_layout()
    plt.show()


### 总神经元数量检出相关神经元比例

In [None]:
# 合并所有的子df，满足条件的收集
df_p_sign_all = []
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    fig_folder = [f for f in os.listdir(f_path) if 'AnalysisFigs' in f]

    folders = [
        f for f in os.listdir(f_path)
        if 'AnalysisFigs' in f and os.path.isdir(os.path.join(f_path, f))
    ]
    if not folders:
        # raise FileNotFoundError("No folder containing 'AnalysisFigs' found.")
        continue
    # 步骤2：获取每个文件夹的最后修改时间
    folder_with_time = [
        (f, os.path.getmtime(os.path.join(f_path, f)))  # getmtime 返回时间戳
        for f in folders
    ]
    # 步骤3：按修改时间降序排序，取最新的
    latest_folder = max(folder_with_time, key=lambda x: x[1])[0]
    file_name = f+'_corr_p_cor.csv'
    file_path = f_path+'\\'+latest_folder+'\\CorrAnalysis\\'+file_name
    print(file_path)
    try:
        df_p_cr = pd.read_csv(file_path)
    except:
        continue
    df_p_cr['EventNote'] = df_p_cr['EventNote'].fillna(df_p_cr['Event'])
    df_p_cr.loc[(df_p_cr['true_r'].isna()) & (df_p_cr['p_cor']<=0.05), 'sign']  = 1
    df_p_cr.loc[(df_p_cr['true_r'].abs()>=0.3) & (df_p_cr['p_cor']<=0.05), 'sign']  = 1
    df_p_cr_sign = df_p_cr
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='curvature','EventNote'] = 'Curvature'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='turn_pc','EventNote'] = 'Turn'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='turn_cor','EventNote'] = 'Turn'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='forward','EventNote'] = 'Forw-Rev'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='CoilingStart','EventNote'] = 'TurnStart'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_velocity','EventNote'] = 'Velocity'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_speed','EventNote'] = 'Speed'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_ang','EventNote'] = 'Angular Velocity'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='sm_CTX','EventNote'] = 'CTX'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='OmegaStart','EventNote'] = 'TurnStart'
    df_p_cr_sign.loc[df_p_cr_sign['EventNote']=='OmegaEnd','EventNote'] = 'TurnEnd'
    # df_p_cr_sign = df_p_cr_sign[df_p_cr_sign['EventNote'] != 'OmegaStart']
    # df_p_cr_sign = df_p_cr_sign[df_p_cr_sign['EventNote'] != 'turn_cor']
    df_p_sign_all.append(df_p_cr_sign)
df_sign_all = pd.concat(df_p_sign_all,axis=0)
df_sign_max = (
    df_sign_all.groupby(['File', 'Neuron'], as_index=False)['sign']
    .max()
)
sign_counts = (
    df_sign_max.groupby(['File', 'sign'])
    .size()
    .unstack(fill_value=0)
    .rename(columns={0: 'Sign_0', 1: 'Sign_1'})
)


In [None]:
df_sign_max

In [None]:

# === 3️⃣ 绘制堆叠柱状图 ===
files = sign_counts.index
x = range(len(files))

plt.figure(figsize=(18, 6))
ax = plt.gca()
ax.set_axisbelow(True)
plt.grid(linestyle='dashed', color='grey', lw=1, alpha=0.3, axis='y', zorder=0)

# 先绘制 sign=0（浅灰色）底层
plt.bar(x, sign_counts['Sign_1'], color='#555555', label='Sign = 1', zorder=1)
# 再绘制 sign=1（深灰色）上层
plt.bar(x, sign_counts['Sign_0'], bottom=sign_counts['Sign_1'],
        color='#D3D3D3', label='Sign = 0', zorder=2)

# === 4️⃣ 坐标轴美化 ===
new_labels = [f.split("_")[0] + '_' + str(int(f.split("_")[-1])) for f in files]
plt.xticks(ticks=x, labels=new_labels, rotation=45, ha='right', fontsize=12)
plt.ylabel('# of Neurons', fontsize=14)
plt.xlabel('File', fontsize=14)
plt.title('Significant vs Non-significant Neurons per File', fontsize=16)
plt.legend(fontsize=12, loc='upper right')
plt.tight_layout()
plt.show()

In [None]:
df_sign_all

# 汇总分析2

In [None]:
p_fs = [r'Y:\\SZX\\2025_wbi_analysis\\good_WBI\\done',r'Y:\\SZX\\2025_wbi_analysis\\good_WBI\\to_do_q']
sub_folder_path = []
for p_f_all in p_fs:
    sub_folder_path  += [os.path.join(p_f_all,f) for f in os.listdir(p_f_all) if os.path.isdir(p_f_all+'\\'+f)]

## 绘制各个数据集的运动参数图
多个子图分别可视化运动参数
连续性变量：前进速度，curvature曲率，角速度
离散型变量：前进后退，转向行为
标记：将mask和quiescence底色标记出来

In [None]:
# 求1的开始和结束点
def get_turn_interval(df, col_name):
    # labels打印矩形
    labels = df[col_name].values
    n = len(labels)
    # 找出连续的 label==1 区间
    turn_intervals = []
    in_turn = False
    start = 0

    for i in range(n):
        if labels[i] == 1 and not in_turn:
            # turn开始
            start = i
            in_turn = True
        elif labels[i] != 1 and in_turn:
            # turn结束
            end = i - 1
            turn_intervals.append((start, end))
            in_turn = False

    # 如果最后一个点也是turn状态
    if in_turn:
        turn_intervals.append((start, n - 1))
    return turn_intervals

In [None]:
s_path = r'Y:\\SZX\\2025_wbi_analysis\\good_WBI'+'\\251030MotVar'
os.makedirs(s_path, exist_ok=True)
# 合并所有的子df，满足条件的收集
# df_p_sign_all = []
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    file_df = [f for f in os.listdir(f_path) if '_MotionMidlineMatchVol.pkl' in f]
    
    if not len(file_df):
        # raise FileNotFoundError("No folder containing 'AnalysisFigs' found.")
        continue
    df_mot_vol = pd.read_pickle(os.path.join(f_path,file_df[0]))
    print(df_mot_vol.columns)
    df_mot_vol["head_velocity"] = df_mot_vol.apply(WBI.signed_norm, axis=1)
    df_mot_vol["head_speed"] = df_mot_vol["head_velocity"].abs()
    # 连续运动变量平滑
    window_size = 15
    # 计算移动平均值
    df_mot_vol['sm_velocity'] = df_mot_vol['head_velocity'].rolling(window=window_size, min_periods=1).mean()
    df_mot_vol['sm_speed'] = df_mot_vol['head_speed'].rolling(window=window_size, min_periods=1).mean()
    # # 平滑ctx
    window_size = 15
    # 计算移动平均值
    df_mot_vol['sm_CTX'] = df_mot_vol['CTX_left'].rolling(window=window_size, min_periods=1).mean()
    df_mot_vol['sm_ang'] = df_mot_vol['ang_velocity'].rolling(window=15, min_periods=1).mean()
    df_mot_vol['sm_curvature'] = df_mot_vol['curvature'].rolling(window=1, min_periods=1).mean()
    df_t = df_mot_vol[['Vol_Time','mask','head_velocity','quies_pc', 'head_speed','CTX_left','sm_ang','sm_curvature','forward','turn_pc']]
    n_sub_figs = 5
    max_t  = df_t.Vol_Time.max()
    fig,ax = plt.subplots(n_sub_figs,1, figsize=(max_t/600*8,8),sharex=True)
    fig.suptitle(f)
    turn_pc_ints = get_turn_interval(df_t, 'turn_pc')
    forward_ints = get_turn_interval(df_t, 'forward')
    mask_ints = get_turn_interval(df_t, 'mask')
    quies_ints = get_turn_interval(df_t, 'mask')
    ax0 = ax[0].plot(df_t['Vol_Time'],df_t['head_velocity'])
    ax[0].set_title('Velocity')
    ax[1].plot(df_t['Vol_Time'],df_t['head_speed'])
    ax[1].set_title('Speed')
    ax[2].plot(df_t['Vol_Time'],df_t['CTX_left'])
    ax[2].set_title('CTX_left')
    ax[3].plot(df_t['Vol_Time'],df_t['sm_curvature'])
    ax[3].set_title('Curvature')
    ax[4].plot(df_t['Vol_Time'],df_t['sm_ang'])
    ax[4].set_title('sm_ang')
    for i in range(n_sub_figs):
        for start, end in forward_ints:
            ax[i].axvspan(df_t['Vol_Time'].iloc[start],df_t['Vol_Time'].iloc[end] , color="#51E6FA", alpha=1)  # alpha控制透明度
            # ax[i].set_title('Reverse')
        for start, end in turn_pc_ints:
            ax[i].axvspan(df_t['Vol_Time'].iloc[start],df_t['Vol_Time'].iloc[end] , color="#F7FA51", alpha=1)  # alpha控制透明度
            # ax[i].set_title('Turn')
        for start, end in mask_ints:
            ax[i].axvspan(df_t['Vol_Time'].iloc[start],df_t['Vol_Time'].iloc[end] , color="#3F3F4E", alpha=0.7)  # alpha控制透明度
        for start, end in quies_ints:
            ax[i].axvspan(df_t['Vol_Time'].iloc[start],df_t['Vol_Time'].iloc[end] , color="#3F3F4E", alpha=0.7)  # alpha控制透明度
    ax[-1].set_xlabel('Time(s)')
    plt.tight_layout()
    # break
    plt.savefig(s_path+'//'+f+'_mot_var.png',bbox_inches='tight')
    plt.show()

In [None]:
s_path = r'Y:\\SZX\\2025_wbi_analysis\\good_WBI'+'\\251022TrajPlots'
os.makedirs(s_path, exist_ok=True)
# 合并所有的子df，满足条件的收集
# df_p_sign_all = []
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    file_df = [f for f in os.listdir(f_path) if '_MotionMidlineMatchVol.pkl' in f]
    
    if not len(file_df):
        # raise FileNotFoundError("No folder containing 'AnalysisFigs' found.")
        continue
    df_mot_vol = pd.read_pickle(os.path.join(f_path,file_df[0]))
    print(df_mot_vol.columns)
    df_mot_vol["head_velocity"] = df_mot_vol.apply(WBI.signed_norm, axis=1)
    df_mot_vol["head_speed"] = df_mot_vol["head_velocity"].abs()
    # 连续运动变量平滑
    window_size = 15
    # 计算移动平均值
    df_mot_vol['sm_velocity'] = df_mot_vol['head_velocity'].rolling(window=window_size, min_periods=1).mean()
    df_mot_vol['sm_speed'] = df_mot_vol['head_speed'].rolling(window=window_size, min_periods=1).mean()
    # # 平滑ctx
    window_size = 15
    # 计算移动平均值
    df_mot_vol['sm_CTX'] = df_mot_vol['CTX_left'].rolling(window=window_size, min_periods=1).mean()
    df_mot_vol['sm_ang'] = df_mot_vol['ang_velocity'].rolling(window=15, min_periods=1).mean()
    df_mot_vol['sm_curvature'] = df_mot_vol['curvature'].rolling(window=1, min_periods=1).mean()
    df_t = df_mot_vol[['Vol_Time','mask','head_velocity','quies_pc', 'head_speed','CTX_left','sm_ang','sm_curvature','forward','turn_pc']]
    n_sub_figs = 7
    fig,ax = plt.subplots(n_sub_figs,1, figsize=(10,10),sharex=True)
    fig.suptitle(f)
    turn_pc_ints = get_turn_interval(df_t, 'turn_pc')
    forward_ints = get_turn_interval(df_t, 'forward')
    mask_ints = get_turn_interval(df_t, 'mask')
    quies_ints = get_turn_interval(df_t, 'mask')
    ax0 = ax[0].plot(df_t['Vol_Time'],df_t['head_velocity'])
    ax[0].set_title('Velocity')
    ax[1].plot(df_t['Vol_Time'],df_t['head_speed'])
    ax[1].set_title('Speed')
    ax[2].plot(df_t['Vol_Time'],df_t['CTX_left'])
    ax[2].set_title('CTX_left')
    ax[3].plot(df_t['Vol_Time'],df_t['sm_curvature'])
    ax[3].set_title('Curvature')
    ax[4].plot(df_t['Vol_Time'],df_t['sm_ang'])
    ax[4].set_title('sm_ang')
    for start, end in forward_ints:
        ax[5].axvspan(df_t['Vol_Time'].iloc[start],df_t['Vol_Time'].iloc[end] , color='blue', alpha=1)  # alpha控制透明度
        ax[5].set_title('Reverse')
    for start, end in turn_pc_ints:
        ax[6].axvspan(df_t['Vol_Time'].iloc[start],df_t['Vol_Time'].iloc[end] , color='orange', alpha=1)  # alpha控制透明度
        ax[6].set_title('Turn')
    for i in range(n_sub_figs):
        for start, end in mask_ints:
            ax[i].axvspan(df_t['Vol_Time'].iloc[start],df_t['Vol_Time'].iloc[end] , color='grey', alpha=0.3)  # alpha控制透明度
        for start, end in quies_ints:
            ax[i].axvspan(df_t['Vol_Time'].iloc[start],df_t['Vol_Time'].iloc[end] , color='grey', alpha=0.6)  # alpha控制透明度
    plt.tight_layout()
    break
    # plt.savefig(s_path+'//'+f+'_traj.png',bbox_inches='tight')
    # plt.show()

## 提取子文件夹文件

提取每个子文件夹的图片，存储在指定文件夹下

In [None]:
target_folder = "Y:\\SZX\\2025_wbi_analysis\\good_WBI\\clusteringplots"  # 请修改为实际路径
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    fig_folder = [f for f in os.listdir(f_path) if 'AnalysisFigs' in f]
    folders = [
        f for f in os.listdir(f_path)
        if 'AnalysisFigs' in f and os.path.isdir(os.path.join(f_path, f))
    ]
    if not folders:
        # raise FileNotFoundError("No folder containing 'AnalysisFigs' found.")
        continue
    # 步骤2：获取每个文件夹的最后修改时间
    folder_with_time = [
        (f, os.path.getmtime(os.path.join(f_path, f)))  # getmtime 返回时间戳
        for f in folders
    ]
    # 步骤3：按修改时间降序排序，取最新的
    latest_folder = max(folder_with_time, key=lambda x: x[1])[0]
    file_name = 'cluster_calcium_heatmap(smooth_10).png'
    file_path = f_path+'\\'+latest_folder+'\\HierClustering\\'+file_name

    # 检查源文件是否存在
    if not os.path.exists(file_path):
        print(f"文件不存在: {file_path}")
        continue
    
    # 构建新文件名和目标路径
    # 使用子文件夹名称作为新文件名的一部分，避免重复
    new_file_name = f"{f}_cluster(smh_10).png"  # 可以根据需要调整命名规则
    target_path = os.path.join(target_folder, new_file_name)
    
    try:
        # 复制文件到目标文件夹
        shutil.copy2(file_path, target_path)
        print(f"成功复制: {file_path} -> {target_path}")
    except Exception as e:
        print(f"复制失败: {file_path}, 错误: {e}")

## 汇总做轨迹图

In [None]:
s_path = r'Y:\\SZX\\2025_wbi_analysis\\good_WBI'+'\\251022TrajPlots'
os.makedirs(s_path, exist_ok=True)
# 合并所有的子df，满足条件的收集
# df_p_sign_all = []
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    file_df = [f for f in os.listdir(f_path) if '_MotionMidlineMatchVol.pkl' in f]
    
    if not len(file_df):
        # raise FileNotFoundError("No folder containing 'AnalysisFigs' found.")
        continue
    df = pd.read_pickle(os.path.join(f_path,file_df[0]))

    df_t = df[['Vol_Time','X','Y','CTX_left']]
    fig,ax = plt.subplots(2,1, figsize=(6,4))
    fig.suptitle(f)
    ax0 = ax[0].scatter(df_t['X'],df_t['Y'],c=df_t['Vol_Time'], cmap='cool',s=0.8)
    ax[0].set_aspect(1)
    ax[0].set_xlim(0,40)
    divider = make_axes_locatable(ax[0])
    cax = divider.append_axes("right", size="5%", pad=0.1)  # size控制宽度，pad控制间距
    cbar = plt.colorbar(ax0, cax=cax)
    ax[0].set_title('Time')
    # cbar.set_label('Time', rotation=270, labelpad=15) 
    ax1 = ax[1].scatter(df_t['X'],df_t['Y'],c=df_t['CTX_left'], cmap='bwr',vmin=-1,vmax=1,s=0.8)
    ax[1].set_aspect(1)
    ax[1].set_xlim(0,40)
    ax[1].set_title('CTX_left')
    divider = make_axes_locatable(ax[1])
    cax = divider.append_axes("right", size="5%", pad=0.1)  # size控制宽度，pad控制间距
    cbar = plt.colorbar(ax1, cax=cax)
    plt.tight_layout()
    plt.savefig(s_path+'//'+f+'_traj.png',bbox_inches='tight')
    # plt.show()

In [None]:
s_path = r'Y:\\SZX\\2025_wbi_analysis\\good_WBI'+'\\251022TrajPlots_2'
os.makedirs(s_path, exist_ok=True)

for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    file_df = [f for f in os.listdir(f_path) if '_MotionMidlineMatchVol.pkl' in f]
    if not len(file_df):
        continue
    df = pd.read_pickle(os.path.join(f_path,file_df[0]))


    
    df_t = df[['Vol_Time','X','Y','CTX_left']]
    
    # 计算数据的高宽比，但限制在合理范围内
    x_range = 40
    y_range = df_t['Y'].max() - df_t['Y'].min()
    aspect_ratio = y_range / x_range
    
    # 设置高度限制
    min_height = 2  # 最小高度
    max_height = 10 # 最大高度
    
    fixed_width = 6
    subplot_height = max(min_height, min(max_height, fixed_width * aspect_ratio))
    total_height = subplot_height * 2 + 0.8
    
    fig, ax = plt.subplots(2, 1, figsize=(fixed_width, total_height))
    fig.suptitle(f)
    
    # 绘图代码与方法1相同
    ax0 = ax[0].scatter(df_t['X'], df_t['Y'], c=df_t['Vol_Time'], cmap='cool', s=0.8)
    ax[0].set_aspect('equal')
    ax[0].set_xlim(0, 40)
    ax[0].set_ylim(df_t['Y'].min(), df_t['Y'].max())
    divider = make_axes_locatable(ax[0])
    cax = divider.append_axes("right", size="5%", pad=0.1)
    cbar = plt.colorbar(ax0, cax=cax)
    ax[0].set_title('Time')
    
    ax1 = ax[1].scatter(df_t['X'], df_t['Y'], c=df_t['CTX_left'], cmap='bwr', vmin=-1, vmax=1, s=0.8)
    ax[1].set_aspect('equal')
    ax[1].set_xlim(0, 40)
    ax[1].set_ylim(df_t['Y'].min(), df_t['Y'].max())
    ax[1].set_title('CTX_left')
    divider = make_axes_locatable(ax[1])
    cax = divider.append_axes("right", size="5%", pad=0.1)
    cbar = plt.colorbar(ax1, cax=cax)
    
    plt.tight_layout()
    plt.savefig(s_path + '//' + f + '_traj.png', bbox_inches='tight')
    plt.close()

## 汇总分别对单数据热图标记显著神经元

In [None]:
def calcium_heatmap_sign(calcium_intensity,df, col_draw, suptitle, neuron_ids,model,w_p2m,
                     show_id_stride=20, show_vol_stride=10,
                    heatmap_range=(None,0.5),
                    unit_w=0.05, unit_h = 0.2, cal_height_ratio=20, wspace=0.125, hspace = 0.125
                    ,bound_cluster = [],smooth_kernel=15,
                    font_size=90, font_color='black',
                   idx=None,vmin=0,vmax=1,threshold=None,xlabel='',
                   cmap='jet',level=None,sig_matrix=None, signal_save_path=None, filename=''):
    '''
    calcium_intensity: （神经元个数 x 时间点）
    df： PCA和行为对齐
    col_draw: 需要画图的列名
    unit_w: 单位宽度(对应时间)
    unit_h: 单位高度（对应神经元数量）
    cal_height_ratio : 预期钙信号热图相对运动参数等条图的高度比例
    '''
    # 神经元数量及时间长度
    num_neurons, num_vols = calcium_intensity.shape
    
    
    if ('turn_cor' in col_draw)& ('turn_pc' in col_draw):
        # 两者画在一起
        turn_merge = True
        num_row = len(col_draw)
    else:
        turn_merge = False
        num_row = len(col_draw)+1
    
    height_ratios = [1.5 for i in range(num_row-1)]
    height_ratios.append(cal_height_ratio)
    num_sig_cols = len(sig_matrix.columns)
    gs = GridSpec(num_row, 4 + num_sig_cols, height_ratios=height_ratios,
                   width_ratios=[0.3,25,50,0.3] + [0.5]*num_sig_cols, wspace=wspace, hspace=hspace)
    # gs = GridSpec(num_row, 4, height_ratios=height_ratios, width_ratios=[0.3,25,50, 0.3], wspace=wspace, hspace=hspace)
    fig_h = (unit_h*num_neurons)/ cal_height_ratio *  sum(height_ratios)
    fig = plt.figure(figsize=(unit_w*num_vols+unit_h*num_neurons*1.1, fig_h))
    fig.suptitle(suptitle, fontsize=100, fontweight='bold')
    # 热图
    ax0 = fig.add_subplot(gs[-1, 1])
    # 树状图在上方
    ax1 = fig.add_subplot(gs[-4:-1, 1])
    
    
    
    # Override the default linewidth.
    mpl.rcParams['lines.linewidth'] = font_size*0.2
    am.plot_dendrogram(model, truncate_mode="level", p=level, \
                    no_labels=True, orientation='bottom', ax=ax1,color_threshold=threshold,
                   above_threshold_color='blue')
    ax1.set_xlim(ax1.get_xlim())
    # ax1.set_xlim([0.5,2])
    ax1.xaxis.set_ticks_position('none')  # 不显示x轴的刻度
    ax1.yaxis.set_ticks_position('none')  # 不显示y轴的刻度
    ax1.set_xticks([])  # 移除x轴的刻度标记
    ax1.set_yticks([])
    ax1.invert_yaxis()
    for spine in ax1.spines.values():
        spine.set_visible(False)
    ax_list=list(np.linspace(0,w_p2m.shape[0],24,dtype=int))
    # X_sort = w_p2m[idx][:,idx]
    X_sort = w_p2m.copy()
    # 重新排序之后画热图
    
    # 设置热图的corlorbar
    # [left, bottom, width, height]
    cbar_ax = fig.add_axes([0.0, 0.15, 0.01, 0.4])
    ax0=sns.heatmap(X_sort, ax=ax0, cmap=cmap, vmin=vmin, vmax=vmax,cbar=True, cbar_ax = cbar_ax, square=False)
    cbar_ax.yaxis.set_ticks_position("left")  # 将刻度放到左侧
    cbar_ax.yaxis.set_label_position("left")  # 将标签放到左侧
    cbar_ax.tick_params(labelsize=font_size, pad = font_size*0.75)
    cbar_ax.set_ylabel('Correlation', fontsize = font_size*1.25, labelpad = font_size*0.75)
    
    ax0.set_xticks(ax_list, ax_list,fontsize=font_size)
    ax0.set_yticks(ax_list, ax_list,fontsize=font_size)
    ax0.tick_params(axis = 'x', pad = font_size*0.75)
    ax0.tick_params(axis='y', labelrotation=0,pad = font_size*0.75)  # 将y轴标签设置为水平
    # 逆转y轴以对齐钙信号热图
    ax0.invert_yaxis()
    ax0.set_xlabel(xlabel,fontsize=font_size*1.25, labelpad = font_size*0.75)


    bound_pmd = bound_cluster
    bound_m1 = bound_cluster
    # 画边界
    for i in bound_pmd:
        ax0.axhline(y=i, color='white', linestyle='--')
    for i in bound_m1:
        ax0.axvline(x=i, color='white', linestyle='--')
    
    # 循环画运动参数
    for i, col in enumerate(col_draw):
        # 添加子图
        ax = fig.add_subplot(gs[i, 2])  # 从第一行开始
        vector = df[col].values
        if ('turn' not in col) & ('forward' not in col):
            heatmap_data = vector[np.newaxis, :]  # 变为 1 行 N 列
            im = ax.imshow(heatmap_data, cmap='jet', aspect='auto')  # 绘制热图

            # 调整 colorbar 参数
            cax = fig.add_subplot(gs[i, 3])  # colorbar 放在右侧
            cbar = plt.colorbar(im, cax=cax, fraction=0.5, orientation='vertical', aspect = 3)
            if 'smoothed_' in col:
                col = col.replace('smoothed_', '')
            cbar.ax.set_title(col, fontsize = font_size*0.85, pad = font_size*0.25 )
            cbar.ax.tick_params(labelsize=font_size*0.5, width=5, length=5, pad = font_size*0.25)  # 设置刻度大小和宽度
            # 设置标题和轴
            ax.set_xticks([])  # 隐藏x轴刻度
            ax.set_yticks([])  # 隐藏y轴刻度
        elif ('turn' in col) & (turn_merge==False):
            heatmap_data = vector[np.newaxis, :]  # 变为 1 行 N 列
            # Define two colors
            colors = [ 'grey','#FFC832']
            # Create a ListedColormap
            two_color_cmap = ListedColormap(colors)
            im = ax.imshow(heatmap_data, cmap=two_color_cmap, aspect='auto')  # 选择 colormap
            # 添加颜色条
            cax = fig.add_subplot(gs[i, 3])  # colorbar 放在右侧
            cbar = plt.colorbar(im, cax=cax, fraction=0.5, orientation='vertical', aspect = 3)  # 通过 plt.colorbar 添加颜色条
            cbar.ax.set_title(col, fontsize = font_size*0.85, pad = font_size*0.25 )
            ax.set_xticks([])  # 设置x轴刻度
            ax.set_yticks([])  # 隐藏y轴刻度
        elif 'forward_quies' == col:
            # 不只输出forward,将quies加上
            heatmap_data = vector[np.newaxis, :]  # 变为 1 行 N 列
            # Define two colors
            colors = [ 'red','blue','white']
            # Create a ListedColormap
            cmap = ListedColormap(colors)
            bounds = [-0.5, 0.5, 1.5, 2.5]
            norm = BoundaryNorm(bounds, cmap.N)
            im = ax.imshow(heatmap_data, cmap=cmap, norm=norm, aspect='auto')
            # 添加颜色条
            cax = fig.add_subplot(gs[i, 3])  # colorbar 放在右侧
            cbar = plt.colorbar(im, cax=cax, fraction=0.5, orientation='vertical', aspect = 3)  # 通过 plt.colorbar 添加颜色条
            cbar.ax.set_title(col, fontsize = font_size*0.85, pad = font_size*0.25 )
            ax.set_xticks([])  # 设置x轴刻度
            ax.set_yticks([])  # 隐藏y轴刻度
        elif 'forward' == col:
            heatmap_data = vector[np.newaxis, :]  # 变为 1 行 N 列
            # Define two colors
            colors = [ 'red','blue']
            # Create a ListedColormap
            two_color_cmap = ListedColormap(colors)
            im = ax.imshow(heatmap_data, cmap=two_color_cmap, aspect='auto')  # 选择 colormap
            # 添加颜色条
            cax = fig.add_subplot(gs[i, 3])  # colorbar 放在右侧
            cbar = plt.colorbar(im, cax=cax, fraction=0.5, orientation='vertical', aspect = 3)  # 通过 plt.colorbar 添加颜色条
            cbar.ax.set_title(col, fontsize = font_size*0.85, pad = font_size*0.25 )
            ax.set_xticks([])  # 设置x轴刻度
            ax.set_yticks([])  # 隐藏y轴刻度

    # 倒数第二行
    if turn_merge:
        ax = fig.add_subplot(gs[-2, 2])  # 第一行
        vector_pc = df['turn_pc'].values
        vector_cor = df['turn_cor'].values

        heatmap_data_pc = vector_pc[np.newaxis, :]
        heatmap_data_cor = vector_cor[np.newaxis, :]

        # --------- PC 层：灰色(0) + 黄色(1)
        color_pc = ['grey', '#FFC832']
        cmap_pc = ListedColormap(color_pc)
        ax.imshow(heatmap_data_pc, cmap=cmap_pc, aspect='auto')

        cax = fig.add_subplot(gs[-2, 3])  # colorbar 放在右侧
        cbar = plt.colorbar(im, cax=cax, fraction=0.5, orientation='vertical', aspect = 3)
        cbar.ax.set_title('coiling/turn', fontsize = font_size*0.85, pad = font_size*0.25 )
        cbar.ax.tick_params(labelsize=font_size*0.5, width=5, length=5, pad = font_size*0.25)

        # 去掉坐标
        ax.set_xticks([])
        ax.set_yticks([])
    
    
    # 绘制钙信号热力图 
    ax = fig.add_subplot(gs[-1, 2])  # 最后一行
    heatmap=sns.heatmap(calcium_intensity, vmin=heatmap_range[0], vmax=heatmap_range[1],
                        xticklabels=np.arange(num_vols)[::show_vol_stride],
                        yticklabels=neuron_ids[::show_id_stride],cbar=False,cmap='jet',
                         cbar_kws={'orientation':'horizontal'}, ax=ax)
    
    # 调整colorbar位置到heatmap下方
    fig = ax.get_figure()
    # 获取heatmap的位置信息
    pos = ax.get_position()

    # 将colorbar放在heatmap正下方，与其等宽
    cax = fig.add_axes([pos.x0, pos.y0 - 0.08, pos.width, 0.02])  # 在heatmap下方0.08的位置

    text_str = 'ΔR/R0'
    colorbar = plt.colorbar(heatmap.collections[0], cax=cax, orientation='horizontal')
    colorbar.ax.tick_params(labelsize=font_size, pad=font_size*0.75)
    colorbar.set_label(text_str, fontsize=font_size*1.25, labelpad=font_size*0.75)
    
    # # 竖直白线根据轨迹分区
    # if len(start_indices):
    #     x_sticks = start_indices[1:-1]
    #     for x in x_sticks:
    #         ax.axvline(x=x, color='white', linestyle='--', linewidth=font_size*0.12)  # Adjust color and linestyle as needed
    
    # 横向根据聚类结果分块
    if len(bound_cluster):
        for i in bound_cluster:
            ax.axhline(y=i, color='white', linestyle='--', linewidth=font_size*0.12)
    
    # ax.set_yticks(ticks=np.arange(0, num_neurons, show_id_stride), labels=neuron_ids[::show_id_stride],fontsize=font_size, 
    #               color=font_color)
#     ax.set_xticks(ticks=np.arange(0, num_vols, show_vol_stride), labels=np.arange(num_vols)[::show_vol_stride],fontsize=font_size, rotation=45, color=font_color)
    ax.set_xticks(ticks=np.arange(0, num_vols, show_vol_stride), labels=df.Vol_Time.astype(int).values[::show_vol_stride],
                  fontsize=font_size, rotation=0, color=font_color)
    ax.tick_params(pad = font_size*0.75)
    #     ax.set_xticks(ticks = df.Vol_Time
    # plt.xticks(ticks=np.arange(0, num_vols, show_vol_stride), labels=np.arange(0,1300,100),fontsize=font_size, rotation=45, color=font_color)
    # plt.title('Calcium activity traces (ΔR/R0) Heatmap',fontsize=font_size,)
    ax.set_xlabel('Time(s)',fontsize=font_size*1.25, color=font_color, labelpad = font_size*0.75)
#     ax.set_ylabel('Neuron Index',fontsize=font_size*1.25,color=font_color, labelpad = font_size*0.75)
    # plt.axis('off')
    # plt.gca().spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    # 设置y轴的刻度位置，使其从顶部开始，因为神经元通常从顶部向下绘制
    ax.invert_yaxis()
    ax.yaxis.tick_right()
    ax.yaxis.set_label_position('right')

    # 绘制显著性热图
    event_order_color = ['Velocity', 'Speed','CTX','Angular Velocity','Curvature','Forw-Rev','RevStart','RevEnd', 'Turn', 'TurnStart','TurnEnd']
    # 获取 colormap 对象（新 API）
    # cmap = plt.colormaps['tab20']  # 或 'viridis', 'Set3' 等
    # Tol Bright 12 colors (RGB from Paul Tol's scientific color schemes)
    tol_bright_12 = [
        "#332288",  # dark blue
        "#88CCEE",  # cyan
        "#44AA99",  # teal
        "#117733",  # green
        "#AA4499",  # purple
        "#6699CC",  # sky blue
        "#888888",  # gray
        "#661100",  # brown
        "#DDCC77",  # sand
        "#999933",  # olive
        "#882255",  
        "#CC6677",  # rose
    ]

    color_dict = {event: tol_bright_12[i % len(tol_bright_12)] for i, event in enumerate(event_order_color)}

    # 根据事件数量生成颜色字典
    # color_dict = {event: cmap(i / len(event_order_color)) for i, event in enumerate(event_order_color)}
    # 画显著性热图
    existing_columns = [col for col in event_order_color if col in sig_matrix.columns]
    sig_matrix_ordered = sig_matrix[existing_columns]
    for j, event in enumerate(sig_matrix_ordered.columns):
        ax_sig = fig.add_subplot(gs[-1, 3+j], sharey=ax)  # 在原 calcium heatmap 右侧追加
        data = sig_matrix_ordered[[event]].values  # 单列矩阵 (num_neurons, 1)
        cmap_sig = ListedColormap(['white', color_dict[event]])  # 白=不显著，红=显著
        
        sns.heatmap(
        data,
        cmap=cmap_sig,
        cbar=False,
        ax=ax_sig,
        vmin=0, vmax=1,        # 固定范围，防止自动缩放
        xticklabels=False,
        yticklabels=False,
        square=False
        )
        ax_sig.set_xticks([])
        # ax_sig.set_yticks([])
        ax_sig.invert_yaxis()
        ax_sig.set_xlabel(
            event, 
            fontsize=font_size * 0.7,   # 调小字号
            labelpad=font_size * 0.2,   # 与热图间距
            rotation=60,                # 旋转以防重叠
            ha='right'                  # 对齐方式
        )
        # 移除上方默认标题
        ax_sig.set_title("")
        
        # 去掉边框
        for spine in ax_sig.spines.values():
            spine.set_visible(False)
        # if j == len(sig_matrix.columns):
        #     ax_sig.set_yticks(np.linspace(0.5, len(data)-0.5, 5))  # 例如显示 5 个刻度
        #     ax_sig.set_yticklabels(
        #     np.linspace(1, len(data), 5, dtype=int),
        #     fontsize=font_size * 0.6
        #     )
        #     ax_sig.tick_params(axis='y', which='both', left=False, right=True)  # 刻度线放右侧
        # else:
        #     ax_sig.set_yticks([])

#     ax.gca().invert_yaxis()
#     plt.subplots_adjust(left=0.125, bottom=0.1, right=0.1, top=0.1, wspace=0.2, hspace=0.35)
    # plt.show()
    if smooth_kernel:
        plt.savefig(f'{signal_save_path}/{filename}clus_cal_heatmap(smooth_{smooth_kernel}).png', transparent=False,dpi=100)
        plt.close()
    else:
        plt.savefig(f'{signal_save_path}/{filename}clus_cal_heatmap.png', transparent=False,dpi=100)
        plt.close()

In [None]:
s_path = r'Y:\\SZX\\2025_wbi_analysis\\good_WBI'+'\\251024LabeledHeatmap'
os.makedirs(s_path, exist_ok=True)
# 合并所有的子df，满足条件的收集
# df_p_sign_all = []
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    # 读取与荧光对齐的运动数据
    file_df = [f for f in os.listdir(f_path+'\\') if '_MotionMidlineMatchVol.pkl' in f]
    if not len(file_df):
        # raise FileNotFoundError("No folder containing 'AnalysisFigs' found.")
        pass
    df_mot_vol = pd.read_pickle(os.path.join(f_path,file_df[0]))

    # 读取重新排序后的荧光数据以及分析结果csv
    fig_folder = [f for f in os.listdir(f_path) if 'AnalysisFigs' in f]
    folders = [
        f for f in os.listdir(f_path)
        if 'AnalysisFigs' in f and os.path.isdir(os.path.join(f_path, f))
    ]

    if not folders:
        raise FileNotFoundError("No folder containing 'AnalysisFigs' found.")
    # 步骤2：获取每个文件夹的最后修改时间
    folder_with_time = [
        (f, os.path.getmtime(os.path.join(f_path, f)))  # getmtime 返回时间戳
        for f in folders
    ]
    # 步骤3：按修改时间降序排序，取最新的
    latest_folder = max(folder_with_time, key=lambda x: x[1])[0]
    save_path = f_path+'\\'+latest_folder
    corr_path = f_path+'\\'+latest_folder+'\\CorrAnalysis\\'

    p_val_path = [f for f in os.listdir(corr_path) if '_corr_p_cor.csv' in f][0]
    df_p_cr = pd.read_csv(corr_path+'\\'+p_val_path)
    df_p_cr['EventNote'] = df_p_cr['EventNote'].fillna(df_p_cr['Event'])
    df_p_cr['sign'] = 0

    turn_mask = (df_p_cr['Event'].isin(['turn_pc', 'turn_cor'])) & (df_p_cr['EventNote'].isna())
    forward_mask = (df_p_cr['Event'] == 'forward') & (df_p_cr['EventNote'].isna())
    df_p_cr['EventNote'] = np.where(turn_mask, 'Turn', 
                        np.where(forward_mask, 'Forw-Rev', df_p_cr['EventNote']))
    # 这里比较奇怪，但是秉持着能用就行的原则，暂且靠两步修改
    df_p_cr.loc[df_p_cr['EventNote']=='forward','EventNote'] = 'Forw-Rev'
    df_p_cr.loc[df_p_cr['EventNote'].isin(['turn_pc','turn_cor']),'EventNote'] = 'Turn'

    df_p_cr.loc[df_p_cr['EventNote']=='curvature','EventNote'] = 'Curvature'
    # df_p_cr.loc[(df_p_cr['Event'].isin(['turn_pc', 'turn_cor'])) & 
    #             (df_p_cr['EventNote'].isna()), 'EventNote'] = 'Turn'
    # df_p_cr.loc[(df_p_cr['Event']=='forward') & (df_p_cr['EventNote'].isna()),'EventNote'] = 'Forw-Rev'

    df_p_cr.loc[df_p_cr['EventNote']=='CoilingStart','EventNote'] = 'TurnStart'
    df_p_cr.loc[df_p_cr['EventNote']=='sm_velocity','EventNote'] = 'Velocity'
    df_p_cr.loc[df_p_cr['EventNote']=='sm_speed','EventNote'] = 'Speed'
    df_p_cr.loc[df_p_cr['EventNote']=='sm_ang','EventNote'] = 'Angular Velocity'
    df_p_cr.loc[df_p_cr['EventNote']=='sm_CTX','EventNote'] = 'CTX'
    df_p_cr.loc[df_p_cr['EventNote']=='OmegaStart','EventNote'] = 'TurnStart'
    df_p_cr.loc[df_p_cr['EventNote']=='OmegaEnd','EventNote'] = 'TurnEnd'
    df_p_cr.loc[(df_p_cr['true_r'].abs()>=0.3) & (df_p_cr['p_cor']<=0.05) &
                (df_p_cr['EventNote'].notna()) &
                (df_p_cr['EventNote'].isin(['Velocity', 'Speed','CTX','Angular Velocity','Curvature','Turn','Forw-Rev'])), 'sign']  = 1
    df_p_cr.loc[(df_p_cr['p_cor']<=0.05) &
                (df_p_cr['EventNote'].notna()) &
                (df_p_cr['EventNote'].isin(['RevStart','RevEnd', 'TurnStart','TurnEnd'])), 'sign']  = 1
    # df_p_cr_sign = df_p_cr_sign[df_p_cr_sign['EventNote'] != 'OmegaStart']
    # df_p_cr_sign = df_p_cr[df_p_cr['sign']==1]
    # df_p_cr_sign = df_p_cr_sign[df_p_cr_sign['EventNote'] != 'turn_cor']

    # 由于把turn_pc和turn_cor都合并了
    sig_matrix = df_p_cr.pivot_table(
    index='Neuron', 
    columns='EventNote', 
    values='sign', 
    aggfunc='max',   # 或 'mean'，取最大值表示“只要有一次显著就记为显著”
    fill_value=0
    ).reset_index()
    sig_matrix['Neuron_num'] = sig_matrix['Neuron'].str.extract('(\d+)').astype(int)
    # 按数字排序
    sig_matrix = sig_matrix.sort_values('Neuron_num')
    # 删除临时列
    sig_matrix = sig_matrix.drop(columns=['Neuron_num','Neuron']).reset_index(drop=True)
    # （可选）将 'Neuron' 设置为索引
    # sig_matrix = sig_matrix.set_index('Neuron')
    # sig_matrix = df_p_cr.pivot(index='Neuron', columns='EventNote', values='sign')


    # 钙信号导入
    calcium_intensity= np.load(os.path.join(f_path+'\\'+latest_folder, 'calcium_intensity_sorted.npy'))
    # 根据mask列作预处理

    mask = df_mot_vol['mask'].values.astype(bool)   # True 表示要置 NaN
    calcium_intensity[:, mask] = np.nan


    # save_p = p_f.split('calcium_intensity.npy')[0]
    print('文件大小:neuron*timestamp',calcium_intensity.shape)
    # 平滑calcium signal： 均值滤波器 (box filter),处理nan值
    # scale = 1.5
    # for i in range(calcium_intensity.shape[0]):
    #     calcium_intensity[i] = (cv2.blur(calcium_intensity[i], (1, 7))*scale)[:,0]
    scale = 1.5
    for i in range(calcium_intensity.shape[0]):
        row = calcium_intensity[i]
        mean_val = np.nanmean(row)
        row_no_nan = np.where(np.isnan(row), mean_val, row)
        smoothed = cv2.blur(row_no_nan.reshape(-1, 1), (7, 1)) * scale
        calcium_intensity[i] = smoothed[:, 0]
    calcium_intensity[:, mask] = np.nan
    # 求前进速度和速率
    df_mot_vol["head_velocity"] = df_mot_vol.apply(WBI.signed_norm, axis=1)
    df_mot_vol["head_speed"] = df_mot_vol["head_velocity"].abs()
    # 连续运动变量平滑
    window_size = 15
    # 计算移动平均值
    df_mot_vol['sm_velocity'] = df_mot_vol['head_velocity'].rolling(window=window_size, min_periods=1).mean()
    df_mot_vol['sm_speed'] = df_mot_vol['head_speed'].rolling(window=window_size, min_periods=1).mean()
    # # 平滑ctx
    window_size = 15
    # 计算移动平均值
    df_mot_vol['sm_CTX'] = df_mot_vol['CTX_left'].rolling(window=window_size, min_periods=1).mean()
    df_mot_vol['sm_ang'] = df_mot_vol['ang_velocity'].rolling(window=window_size, min_periods=1).mean()

    choose_index = 0
    thresh = 5
    links=['ward','average','average','complete']
    affs=['euclidean','cosine','cityblock','cosine']
    vmin=-0.5
    vmax=1
    smooth_kernel = 10
    print('calcium_intensity.shape',calcium_intensity.shape)

    valid_timepoints = ~mask
    calcium_valid = calcium_intensity[:, valid_timepoints]
    # 使用 np.corrcoef(calcium_intensity) 计算神经元钙信号的相关性矩阵
    # 聚类：使用 am.cluster 和指定的链接方式 (link=links[choose_index]) 
    # 和距离度量 (aff=affs[choose_index]) 对相关性矩阵进行聚类。
    idx=am.cluster(np.corrcoef(calcium_valid),link=links[choose_index],aff=affs[choose_index])
    print(idx)
    # idx为聚类之后的索引
    bound=np.cumsum(am.GetBound(np.corrcoef(calcium_valid),link=links[choose_index],aff=affs[choose_index],
                                threshold=thresh).astype(int))
    # 调用 am.GetBound 计算矩阵分区的边界（bound），通过 np.cumsum 累积求和获取完整的边界数组
    print('边界', bound)

    calcium_valid = calcium_valid[idx]
    calcium_intensity = calcium_intensity[idx]
    # calcium_intensity_smd = calcium_intensity.copy()
    # 平滑数据
    if smooth_kernel:
        for i,k in enumerate(calcium_valid):
            calcium_valid[i] = cv2.blur(k,(1,smooth_kernel))[:,0]
    # 计算相关性矩阵
    w_p2m = np.corrcoef(calcium_valid)

    # 聚类并将聚类的结果画在相关性矩阵旁边
    link = links[choose_index]
    aff = affs[choose_index]
    model = AgglomerativeClustering(distance_threshold=0, n_clusters=None,linkage=link, affinity=aff)
    model = model.fit(w_p2m)
    font_size = 100

    '''输入事件开始index列表'''
    neuron_ids = np.arange(calcium_valid.shape[0])
    df_mot_vol['forward_quies'] = df_mot_vol['forward'].copy()
    df_mot_vol.loc[df_mot_vol['quies_pc'] == 1,'forward_quies'] = 2
    col_draw = ['sm_velocity','sm_speed', 'sm_ang', 'sm_CTX','curvature', 'forward_quies','turn_pc','turn_cor']
    # df_mot_valid = df_mot_vol[df_mot_vol['mask']==0]
    calcium_heatmap_sign(calcium_intensity,df_mot_vol, col_draw, f,neuron_ids,model,w_p2m, show_id_stride=10,
                    show_vol_stride=500, heatmap_range=(0,0.6),wspace=0.06, hspace=0.2,bound_cluster=bound,
                    unit_w=0.03, unit_h = 0.8, cal_height_ratio=30, smooth_kernel=smooth_kernel,
                    font_size=font_size, font_color='black', idx = idx, vmin = vmin, vmax = vmax,
                        threshold=thresh, xlabel='Neuron Index',level=35,sig_matrix = sig_matrix, signal_save_path=s_path, filename=f)
    # break

# 检查每个文件的运动参数修改和静息状态


In [None]:
p_fs = [r'Y:\\SZX\\2025_wbi_analysis\\good_WBI\\done',r'Y:\\SZX\\2025_wbi_analysis\\good_WBI\\to_do_q']
sub_folder_path = []
for p_f_all in p_fs:
    sub_folder_path  += [os.path.join(p_f_all,f) for f in os.listdir(p_f_all) if os.path.isdir(p_f_all+'\\'+f)]

In [None]:
for f_path in sub_folder_path:
    f = os.path.basename(f_path)
    fig_folder = [f for f in os.listdir(f_path) if 'AnalysisFigs' in f]

    folders = [
        f for f in os.listdir(f_path)
        if 'AnalysisFigs' in f and os.path.isdir(os.path.join(f_path, f))
    ]