In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

os.makedirs("./Figure", exist_ok=True)

# 1. 数据加载
data = pd.DataFrame({
    'Case': [1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,9,10,10,10,11,11,11],
    'GA': [24.71,24.71,24.71,24.14,24.14,24.14,24.00,24.00,24.00,25.43,25.43,25.43,
           26.14,26.14,26.14,28.00,28.00,28.00,26.43,26.43,26.43,35.29,35.29,35.29,
           24.57,24.57,24.57,33.00,33.00,33.00,26.71,26.71,26.71],
    'plane': ['PA','DA','AO']*11,
    'BFS': [17.226,17.221,17.320,17.552,17.144,17.288,17.249,17.173,17.376,17.287,17.283,17.040,
            17.393,17.195,17.555,17.379,17.530,17.575,17.495,17.225,17.569,17.639,17.320,17.373,
            17.443,17.350,17.218,17.716,17.571,17.644,np.nan,np.nan,np.nan],
    'IMT': [356.30,564.05,525.90,267.70,633.15,281.60,369.90,383.00,420.10,377.70,775.30,493.20,
            471.20,798.45,586.50,450.10,457.55,488.10,571.80,712.55,793.80,490.00,480.00,400.00,
            543.00,453.55,394.50,380.00,400.70,330.00,np.nan,np.nan,np.nan],
    'Echo_Diameter': [4.30,2.00,3.70,5.40,3.50,2.45,np.nan,np.nan,np.nan,6.10,2.30,2.40,
                      np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,
                      5.20,3.00,3.90,7.10,4.30,3.40,5.20,3.50,4.10],
    'ECHO_velocity': [57.00,63.00,63.00,57.00,75.00,56.00,np.nan,np.nan,np.nan,53.60,46.00,56.00,
                      np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,
                      87.00,71.00,93.97,103.00,139.00,130.00,64.00,87.00,87.50],
    'CFD_Pressure': [-0.10,-1.41,0.16,-0.62,-1.23,-1.94,np.nan,np.nan,np.nan,-0.10,-0.43,2.56,
                     np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,
                     -0.17,-1.70,2.28,-1.52,-6.57,0.91,-0.38,-2.97,-0.72],
    'CFD_WSS': [8.28,159.86,45.53,82.34,98.03,130.24,np.nan,np.nan,np.nan,9.72,67.29,165.79,
                np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,
                19.91,176.49,110.83,171.40,539.85,111.55,39.22,191.19,186.88],
    'CFD_Q': [1.33,1.33,6.28,7.41,7.41,7.41,np.nan,np.nan,np.nan,1.62,1.62,-1.15,
              np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,
              2.62,2.62,7.81,14.47,14.47,5.90,2.64,2.64,5.17],
    'CFD_Velocity': [14.30,56.45,45.67,47.77,62.33,72.11,np.nan,np.nan,np.nan,14.53,29.15,83.05,
                     np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,
                     22.15,64.20,56.61,93.95,142.08,63.90,31.09,82.39,74.08],
    'cardiac_abnormal': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
})

# 2. 变量列表
numeric_cols = ['BFS', 'IMT', 'Echo_Diameter', 'ECHO_velocity', 
                'CFD_Pressure', 'CFD_WSS', 'CFD_Q', 'CFD_Velocity']

# 3. 绘图并保存为高分辨率图片
sns.set(style="whitegrid")
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(18, 15))
fig.suptitle('Feature Distribution by Plane and Cardiac Abnormality', fontsize=16)

for ax, col in zip(axes.flat, numeric_cols):
    sns.boxplot(data=data, x='plane', y=col, hue='cardiac_abnormal',
                palette='Set2', ax=ax)
    ax.set_title(col)
    ax.legend(title='cardiac_abnormal', loc='best')

plt.tight_layout(rect=[0, 0.03, 1, 0.97])
composite_save_path = "./Figure/Box by plane.png"
plt.savefig(composite_save_path, dpi=300, bbox_inches='tight')
plt.close()


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_selection import SelectKBest, f_classif
import os

# 确保输出目录存在
os.makedirs("./Figure", exist_ok=True)

planes = ['PA', 'DA', 'AO']
feature_cols = ['BFS', 'IMT', 'Echo_Diameter', 'ECHO_velocity',
                'CFD_Pressure', 'CFD_WSS', 'CFD_Q', 'CFD_Velocity']

# 可视化参数
fscore_threshold = 5
pvalue_threshold = 0.05

for p in planes:
    df_plane = data[data['plane'] == p].copy()

    # 提取变量和标签
    X = df_plane[feature_cols]
    y = df_plane['cardiac_abnormal']
    
    # 去除缺失
    mask = X.notnull().all(axis=1)
    X = X[mask]
    y = y[mask]

    if len(X) < 3:
        print(f"【{p}】数据量过少，跳过特征选择与绘图。")
        continue

    # 特征选择
    selector = SelectKBest(score_func=f_classif, k='all')
    selector.fit(X, y)
    
    # 结果整理
    scores_df = pd.DataFrame({
        'Feature': X.columns,
        'F_score': selector.scores_,
        'p_value': selector.pvalues_
    }).sort_values(by='F_score', ascending=True)

    # 添加颜色与显著性标记
    scores_df['Significant'] = scores_df['p_value'] < pvalue_threshold
    scores_df['Above_Threshold'] = scores_df['F_score'] >= fscore_threshold
    scores_df['Color'] = scores_df['Above_Threshold'].apply(
        lambda x: '#1f77b4' if x else '#cccccc'
    )

    # 绘图
    plt.figure(figsize=(9, 5))
    for i, row in enumerate(scores_df.itertuples()):
        plt.barh(
            y=i, width=row.F_score, color=row.Color, edgecolor='black'
        )
        if row.Significant:
            plt.text(row.F_score + 0.2, i, '*', color='red', fontsize=14, va='center')

    plt.yticks(ticks=range(len(scores_df)), labels=scores_df['Feature'])
    plt.xlabel('F-score')
    plt.title(f'Feature Importance for {p} (F-score with p<0.05 marked)')
    plt.grid(axis='x', linestyle='--', alpha=0.6)
    plt.tight_layout()
    
    # 保存PNG图片
    plt.savefig(f'./Figure/{p}_feature_importance.png', dpi=300)
    plt.close()  # 关闭图形避免内存泄漏
    print(f"{p}层面特征重要性图已保存为PNG格式")

    # 原版显示功能保留（可选）
    # plt.show()

PA层面特征重要性图已保存为PNG格式
DA层面特征重要性图已保存为PNG格式
AO层面特征重要性图已保存为PNG格式
