# Single DLE

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier, Perceptron, PassiveAggressiveClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier
from sklearn.svm import SVC, NuSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB, ComplementNB
from sklearn.neural_network import MLPClassifier
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier
from scipy.special import expit
import glob, os

# ================= Calibration functions =================
def platt_cv(p, y, n_splits=5):
    out = np.zeros_like(p)
    skf = StratifiedKFold(n_splits, shuffle=True, random_state=42)
    for tr, te in skf.split(p, y):
        lr = LogisticRegression(solver='lbfgs', max_iter=1000)
        lr.fit(p[tr].reshape(-1,1), y[tr])
        out[te] = lr.predict_proba(p[te].reshape(-1,1))[:,1]
    return out

def isotonic_cv(p, y, n_splits=5):
    out = np.zeros_like(p)
    skf = StratifiedKFold(n_splits, shuffle=True, random_state=42)
    for tr, te in skf.split(p, y):
        iso = IsotonicRegression(out_of_bounds='clip')
        iso.fit(p[tr], y[tr])
        out[te] = iso.transform(p[te])
    return out

# ================= Candidate stacking models =================
candidate_models = {
    'LR': LogisticRegression(solver='lbfgs', max_iter=2000),
    'Ridge': RidgeClassifier(max_iter=2000),
    'SGD': SGDClassifier(max_iter=2000, tol=1e-5),
    'Perceptron': Perceptron(max_iter=2000),
    'PassiveAggressive': PassiveAggressiveClassifier(max_iter=2000),
    'RF': RandomForestClassifier(n_estimators=500, max_depth=5, random_state=6),
    'ExtraTrees': ExtraTreesClassifier(n_estimators=500, max_depth=5, random_state=6),
    'GBDT': GradientBoostingClassifier(n_estimators=500, learning_rate=1e-5),
    'AdaBoost': AdaBoostClassifier(n_estimators=500, learning_rate=1e-5),
    'Bagging': BaggingClassifier(n_estimators=500),
    'XGB': XGBClassifier(n_estimators=500, learning_rate=1e-5,
                          use_label_encoder=False, eval_metric='logloss'),
    'SVC_rbf': SVC(probability=True, kernel='rbf'),
    'SVC_linear': SVC(probability=True, kernel='linear'),
    'NuSVC': NuSVC(probability=True),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'GaussianNB': GaussianNB(),
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'MLP_small': MLPClassifier(hidden_layer_sizes=(32,16), max_iter=2000, random_state=6),
    'MLP_large': MLPClassifier(hidden_layer_sizes=(64,32), max_iter=2000, random_state=6)
}

# ================= Layer-wise Boosting models =================
layer_boosters = {
    'Layer_GBDT': GradientBoostingClassifier(
        n_estimators=300, learning_rate=0.05, max_depth=3),
    'Layer_XGB': XGBClassifier(
        n_estimators=300, learning_rate=0.05, max_depth=3,
        subsample=0.8, colsample_bytree=0.8,
        eval_metric='logloss', use_label_encoder=False)
}

# ================= Multi-layer stacking + Boosting =================
def multi_layer_stacking(df, prob_cols, label_col='label',
                          max_layers=10, topN=5):

    y = df[label_col].values
    base_probs = [df[c].values for c in prob_cols]

    # Initial calibration
    probs_platt = [platt_cv(p, y) for p in base_probs]
    probs_iso = [isotonic_cv(p, y) for p in base_probs]
    current_probs = np.vstack(probs_platt + probs_iso).T

    skf = StratifiedKFold(5, shuffle=True, random_state=42)
    best_overall_auc = 0
    best_overall_prob = None
    layer = 1

    while layer <= max_layers:
        print(f"\n=== Layer {layer} ===")
        layer_probs = {}
        layer_auc = {}

        # ---------- normal stacking models ----------
        for name, model in candidate_models.items():
            stack_prob = np.zeros_like(y, dtype=float)

            for tr, te in skf.split(current_probs, y):
                model.fit(current_probs[tr], y[tr])
                if hasattr(model, "predict_proba"):
                    stack_prob[te] = model.predict_proba(current_probs[te])[:,1]
                elif hasattr(model, "decision_function"):
                    stack_prob[te] = expit(model.decision_function(current_probs[te]))
                else:
                    stack_prob[te] = model.predict(current_probs[te])

            orig_prob = current_probs.mean(axis=1)
            ensemble_prob = 0.5 * stack_prob + 0.5 * orig_prob

            layer_probs[name] = ensemble_prob
            layer_auc[name] = roc_auc_score(y, ensemble_prob)
            print(f"{name}: AUC={layer_auc[name]:.4f}")

        # ---------- layer-wise boosting ----------
        for bname, booster in layer_boosters.items():
            boost_prob = np.zeros_like(y, dtype=float)

            for tr, te in skf.split(current_probs, y):
                booster.fit(current_probs[tr], y[tr])
                boost_prob[te] = booster.predict_proba(current_probs[te])[:,1]

            orig_prob = current_probs.mean(axis=1)
            ensemble_boost = 0.5 * boost_prob + 0.5 * orig_prob

            layer_probs[bname] = ensemble_boost
            layer_auc[bname] = roc_auc_score(y, ensemble_boost)
            print(f"{bname}: AUC={layer_auc[bname]:.4f}")

        # ---------- select best ----------
        best_model = max(layer_auc, key=lambda k: layer_auc[k])
        best_auc = layer_auc[best_model]
        print(f"✅ Layer {layer} best: {best_model} | AUC={best_auc:.4f}")

        if best_auc <= best_overall_auc:
            print("❌ AUC not improved. Stop.")
            break

        best_overall_auc = best_auc
        best_overall_prob = layer_probs[best_model]
        df[f'layer{layer}_best_{best_model}'] = best_overall_prob

        # ---------- top-N enter next layer ----------
        topN_models = sorted(layer_auc.items(),
                             key=lambda x: x[1], reverse=True)[:topN]
        topN_probs = np.vstack(
            [layer_probs[name] for name, _ in topN_models]).T

        topN_platt = [platt_cv(p, y) for p in topN_probs.T]
        topN_iso = [isotonic_cv(p, y) for p in topN_probs.T]
        current_probs = np.vstack(topN_platt + topN_iso).T

        layer += 1

    df['final_best_ensemble'] = best_overall_prob
    return df, best_overall_auc

# ================= Load datasets =================

# data1
data1 = pd.read_csv('OPENSMILE_probabilities.csv')
prob_cols1 = [c for c in data1.columns if c not in ['case','label']]
df1, auc1 = multi_layer_stacking(data1, prob_cols1)
df1.to_csv('data1_multi_layer_ensemble_boosting.csv', index=False)

# data2
data2 = pd.read_csv('./result/Normal single models results/Normal_seven_results.csv')
prob_cols2 = [c for c in data2.columns if c not in ['case','label']]
df2, auc2 = multi_layer_stacking(data2, prob_cols2)
df2.to_csv('data2_multi_layer_ensemble_boosting.csv', index=False)

# data3 (multiple CSV)
data3_folder = './result/text_model/'
files = glob.glob(os.path.join(data3_folder, '*.csv'))

dfs = []
for i, f in enumerate(files):
    tmp = pd.read_csv(f)
    dfs.append(tmp[['prob']].rename(columns={'prob': f'prob{i}'}))

labels = pd.read_csv(files[0])['label'].values
df3 = pd.concat(dfs, axis=1)
df3['label'] = labels

df3, auc3 = multi_layer_stacking(df3, [c for c in df3.columns if c != 'label'])
df3.to_csv('data3_multi_layer_ensemble_boosting.csv', index=False)

# data4
data4 = pd.read_csv('lingustic_probabilities.csv')
prob_cols4 = [c for c in data4.columns if c not in ['case','label']]
df4, auc4 = multi_layer_stacking(data4, prob_cols4)
df4.to_csv('data4_multi_layer_ensemble_boosting.csv', index=False)

print(f"\n✅ Final AUCs:")
print(f"data1={auc1:.4f}, data2={auc2:.4f}, data3={auc3:.4f}, data4={auc4:.4f}")
print("✅ All boosting multi-layer ensemble results saved.")


# multi DLE

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier, Perceptron, PassiveAggressiveClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier
from sklearn.svm import SVC, NuSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB, ComplementNB
from sklearn.neural_network import MLPClassifier
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier
from scipy.special import expit

# ================= Load existing model probabilities =================
data1 = pd.read_csv('OPENSMILE_probabilities.csv')
data2 = pd.read_csv('./result/Normal single models results/Normal_seven_results.csv')
data3 = pd.read_csv('./result/clinical_bert-base-cantonese_results.csv')
data4 = pd.read_csv('lingustic_probabilities.csv')

df = (
    data1[['case', 'Decision Tree', 'label']].rename(columns={'Decision Tree': 'prob1'})
    .merge(data2[['case', 'wave_prob']].rename(columns={'wave_prob': 'prob2'}), on='case')
    .merge(data3[['case', 'prob1']].rename(columns={'prob1': 'prob3'}), on='case')
    .merge(data4[['case', 'Extra Trees']].rename(columns={'Extra Trees': 'prob4'}), on='case')
)

y = df['label'].values

# ================= Calibration Functions =================
def platt_cv(p, y, n_splits=5):
    out = np.zeros_like(p)
    skf = StratifiedKFold(n_splits, shuffle=True, random_state=42)
    for tr, te in skf.split(p, y):
        lr = LogisticRegression(solver='lbfgs', max_iter=1000)
        lr.fit(p[tr].reshape(-1,1), y[tr])
        out[te] = lr.predict_proba(p[te].reshape(-1,1))[:,1]
    return out

def isotonic_cv(p, y, n_splits=5):
    out = np.zeros_like(p)
    skf = StratifiedKFold(n_splits, shuffle=True, random_state=42)
    for tr, te in skf.split(p, y):
        iso = IsotonicRegression(out_of_bounds='clip')
        iso.fit(p[tr], y[tr])
        out[te] = iso.transform(p[te])
    return out

# ================= Prepare model probabilities =================
existing_probs = [df[f'prob{i}'].values for i in range(1,5)]
probs_platt = [platt_cv(p, y) for p in existing_probs]
probs_iso = [isotonic_cv(p, y) for p in existing_probs]
all_probs = np.vstack(probs_platt + probs_iso).T

# ================= Candidate stacking models =================
candidate_models = {
    'LR': LogisticRegression(solver='lbfgs', max_iter=2000),
    'Ridge': RidgeClassifier(max_iter=2000),
    'SGD': SGDClassifier(max_iter=2000, tol=1e-5),
    'Perceptron': Perceptron(max_iter=2000),
    'PassiveAggressive': PassiveAggressiveClassifier(max_iter=2000),
    'RF': RandomForestClassifier(n_estimators=500, max_depth=5, random_state=6),
    'ExtraTrees': ExtraTreesClassifier(n_estimators=500, max_depth=5, random_state=6),
    'GBDT': GradientBoostingClassifier(n_estimators=500, learning_rate=1e-5),
    'AdaBoost': AdaBoostClassifier(n_estimators=500, learning_rate=1e-5),
    'Bagging': BaggingClassifier(n_estimators=500),
    'XGB': XGBClassifier(n_estimators=500, learning_rate=1e-5, use_label_encoder=False, eval_metric='logloss'),
    'SVC_rbf': SVC(probability=True, kernel='rbf'),
    'SVC_linear': SVC(probability=True, kernel='linear'),
    'NuSVC': NuSVC(probability=True),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'GaussianNB': GaussianNB(),
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'MLP_small': MLPClassifier(hidden_layer_sizes=(32,16), max_iter=2000, random_state=6),
    'MLP_large': MLPClassifier(hidden_layer_sizes=(64,32), max_iter=2000, random_state=6)
}

# ================= Automated multi-layer stacking =================
skf = StratifiedKFold(5, shuffle=True, random_state=6)
current_probs = all_probs
best_overall_auc = 0
best_overall_prob = None
layer = 1

while True:
    print(f"\n=== Layer {layer} ===")
    layer_probs = {}
    layer_auc = {}
    
    for name, model in candidate_models.items():
        stack_prob = np.zeros_like(y, dtype=float)
        for tr, te in skf.split(current_probs, y):
            model.fit(current_probs[tr], y[tr])
            if hasattr(model, "predict_proba"):
                stack_prob[te] = model.predict_proba(current_probs[te])[:,1]
            elif hasattr(model, "decision_function"):
                stack_prob[te] = expit(model.decision_function(current_probs[te]))
            else:
                stack_prob[te] = model.predict(current_probs[te])
        # 50% stacking + 50% 平均概率
        orig_prob = current_probs[:, :min(current_probs.shape[1],5)].mean(axis=1)
        ensemble_prob = 0.5 * stack_prob + 0.5 * orig_prob
        layer_probs[name] = ensemble_prob
        layer_auc[name] = roc_auc_score(y, ensemble_prob)
        print(f"{name}: AUC={layer_auc[name]:.4f}")
    
    # 找出本层最佳
    best_model = max(layer_auc, key=lambda k: layer_auc[k])
    best_auc = layer_auc[best_model]
    print(f"✅ Layer {layer} best model: {best_model} | AUC={best_auc:.4f}")
    
    # 如果 AUC 没提升，停止
    if best_auc <= best_overall_auc:
        print("\n❌ AUC no longer improved. Stop stacking.")
        break
    
    # 更新最优
    best_overall_auc = best_auc
    best_overall_prob = layer_probs[best_model]
    df[f'layer{layer}_best_{best_model}'] = best_overall_prob
    
    # 为下一层选择 topN 模型
    topN = min(5, len(layer_auc))
    topN_models = sorted(layer_auc.items(), key=lambda x: x[1], reverse=True)[:topN]
    topN_probs = np.vstack([layer_probs[name] for name, _ in topN_models]).T
    # Platt + Isotonic 校准
    topN_probs_platt = [platt_cv(p, y) for p in topN_probs.T]
    topN_probs_iso = [isotonic_cv(p, y) for p in topN_probs.T]
    current_probs = np.vstack(topN_probs_platt + topN_probs_iso).T
    
    layer += 1

# 保存最终最佳概率
df[['case','label']].copy()
df['final_best_ensemble'] = best_overall_prob
df.to_csv('automated_multi_layer_ensemble.csv', index=False)
print(f"\n✅ Final best ensemble saved → automated_multi_layer_ensemble.csv | AUC={best_overall_auc:.4f}")
