In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import mutual_info_classif
from sklearn.utils import resample
import matplotlib


matplotlib.rcParams['font.family'] = ['Microsoft JhengHei']

file_path = "C:/Users/User/Desktop/"
df = pd.read_csv(file_path, encoding="ISO-8859-1")


label_col = "TMT"
categorical_features = ["GEN", "Edu", "Smoke", "Drink", "S-HLTH", "S-HAP", "EX_TYPE", "O/C"]
numerical_features = ["Ht", "AGE", "Wt", "BMI", "BF", "BMR", "SMM", "SMM/Wt" , "GD-F", "GD-T", "GD-I", "STAI",
                      "M-HR", "Bruce", "Vo2", "SBP", "DBP", "R-HR", "MAP","SV", "CO", "SVR", "PWV", "RMSSD", "SDSD", "pNN50", "Hfnu", "Sympathetic", "SVRatio", "RSA", 
 "Level" ,]


y = df[label_col].values
X_processed = pd.DataFrame()


for col in categorical_features:
    le = LabelEncoder()
    X_processed[col] = le.fit_transform(df[col].astype(str))


for col in numerical_features:
    try:
        X_processed[col] = pd.qcut(df[col], q=5, duplicates='drop', labels=False)
    except Exception as e:
        print(f"⚠️ Binning failed for ：{col}，error：{e}")

）
X_processed = X_processed.fillna(0)
X_np = X_processed.values


n_iterations = 100
all_scores = []

for i in range(n_iterations):
    seed = 42 + i
    X_boot, y_boot = resample(X_np, y, random_state=seed)
    scores = mutual_info_classif(X_boot, y_boot, discrete_features=True, random_state=42)
    all_scores.append(scores)

all_scores = np.array(all_scores)
avg_scores = np.mean(all_scores, axis=0)
std_scores = np.std(all_scores, axis=0)


top_k = 20  #feature number
top_indices = np.argsort(avg_scores)[::-1][:top_k]

print("\n🔍 Top features (ranked by mean Mutual Information)：")
for i in top_indices:
    print(f"{X_processed.columns[i]}: Mean = {avg_scores[i]:.4f}, SD = {std_scores[i]:.4f}")


plt.figure(figsize=(12, 6))
plt.barh(range(top_k), avg_scores[top_indices][::-1], xerr=std_scores[top_indices][::-1], capsize=3)
plt.yticks(range(top_k), [X_processed.columns[i] for i in top_indices][::-1])
plt.xlabel("Mutual Information（Mean ± SD）")
plt.title(f"Top {top_k} Feature - Bootstrapped Mutual Information (n={n_iterations})")
plt.tight_layout()
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.show()
