## Data Processing

In [1]:
import os
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, NuSVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import HistGradientBoostingClassifier, BaggingClassifier
from sklearn.naive_bayes import BernoulliNB, GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from imblearn.over_sampling import SMOTE, SMOTENC
from imblearn.ensemble import EasyEnsembleClassifier

from utils import save_predictions_to_csv, standardize_data, calculate_auc_score, compare_auc_scores
from natsort import natsorted
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from category_encoders import TargetEncoder

In [2]:
dataset_names=[]
X_trains=[]
y_trains=[]
X_tests=[]

for folder_name in natsorted(os.listdir("./Competition_data")):
    dataset_names.append(folder_name)
    X_trains.append(pd.read_csv(f"./Competition_data/{folder_name}/X_train.csv",header=0))
    y_trains.append(pd.read_csv(f"./Competition_data/{folder_name}/y_train.csv",header=0))
    X_tests.append(pd.read_csv(f"./Competition_data/{folder_name}/X_test.csv",header=0))

def preprocess_data(X_train, y_train, X_test):

    # 區分數值型和類別型特徵
    numeric_features = X_train.select_dtypes(include=['float']).columns
    categorical_features = X_train.select_dtypes(include=['int']).columns

    # 將類別型特徵轉換為索引
    categorical_feature_indices = [X_train.columns.get_loc(col) for col in categorical_features]

    # 類別不平衡處理
    if len(categorical_feature_indices) == 0:
        smote = SMOTE(random_state=40)
        X_train, y_train = smote.fit_resample(X_train, y_train)
    else:
        categorical_feature_indices_bool = [i in categorical_feature_indices for i in range(X_train.shape[1])]
        smote_nc = SMOTENC(categorical_features=categorical_feature_indices_bool, random_state=41)
        X_train, y_train = smote_nc.fit_resample(X_train, y_train)
    
    # 數值型特徵標準化
    numeric_transformer = MinMaxScaler()
    X_train_numeric = numeric_transformer.fit_transform(X_train[numeric_features])
    X_test_numeric = numeric_transformer.transform(X_test[numeric_features])

    # 類別型特徵目標編碼
    if len(categorical_features) > 0:
        categorical_transformer = TargetEncoder(cols=categorical_features)
        X_train_categorical = categorical_transformer.fit_transform(X_train[categorical_features], y_train)
        X_test_categorical = categorical_transformer.transform(X_test[categorical_features])
    else:
        X_train_categorical = np.empty((X_train_numeric.shape[0], 0))
        X_test_categorical = np.empty((X_test_numeric.shape[0], 0))

    # 合併處理後的特徵
    X_train = np.hstack((X_train_numeric, X_train_categorical))
    X_test = np.hstack((X_test_numeric, X_test_categorical))

    return X_train, y_train, X_test

In [None]:
validation_auc_scores = []

for i in range(len(X_trains)):
    X_train, y_train, X_test = preprocess_data(X_trains[i], y_trains[i].values.ravel(), X_tests[i])

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    test_predictions_all_folds = []
    fold_auc_scores = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
        X_train_fold = X_train[train_idx, :]
        y_train_fold = y_train[train_idx]
        X_val_fold = X_train[val_idx, :]
        y_val_fold = y_train[val_idx]

        # 提前訓練基模型
        rf = BaggingClassifier(estimator=RandomForestClassifier(), n_estimators=10, random_state=43, n_jobs=-1)
        rf.fit(X_train_fold, y_train_fold)
        rf_val_pred = rf.predict_proba(X_val_fold)[:, 1]

        etc = BaggingClassifier(estimator=ExtraTreesClassifier(), n_estimators=10, random_state=44, n_jobs=-1)
        etc.fit(X_train_fold, y_train_fold)
        etc_val_pred = etc.predict_proba(X_val_fold)[:, 1]

        xgb = XGBClassifier(
            n_estimators=200, max_depth=6, reg_alpha=0.1, reg_lambda=0.1, eval_metric='auc',
              n_jobs= -1, early_stopping_rounds=5, random_state=45
        )
        xgb.fit(X_train_fold, y_train_fold, eval_set=[(X_val_fold, y_val_fold)],verbose = 0)
        xgb_val_pred = xgb.predict_proba(X_val_fold)[:, 1]

        lgbm = LGBMClassifier(
            n_estimators=200, max_depth=6, reg_alpha=0.1, reg_lambda=0.1, min_child_samples = 5,
              min_split_gain=0.01, early_stopping_round=5, n_jobs=-1, verbose=-1, random_state=46
        )
        lgbm.fit(X_train_fold, y_train_fold, eval_metric='auc', eval_set=[(X_val_fold, y_val_fold)])
        lgbm_val_pred = lgbm.predict_proba(X_val_fold)[:, 1]

        gbc = GradientBoostingClassifier(n_estimators=200, max_depth=6, learning_rate=0.1, random_state=47)
        gbc.fit(X_train_fold, y_train_fold)
        gbc_val_pred = gbc.predict_proba(X_val_fold)[:, 1]

        adaboost = AdaBoostClassifier(n_estimators=200, learning_rate=0.1, random_state=48)
        adaboost.fit(X_train_fold, y_train_fold)
        adaboost_val_pred = adaboost.predict_proba(X_val_fold)[:, 1]

        catboost = CatBoostClassifier(iterations=200, learning_rate=0.1, depth=6, verbose=0, random_state=49)
        catboost.fit(X_train_fold,y_train_fold)
        catboost_val_pred = catboost.predict_proba(X_val_fold)[:, 1]

        hist_gb = HistGradientBoostingClassifier(max_iter=200, max_depth=6, learning_rate=0.1, random_state=50)
        hist_gb.fit(X_train_fold,y_train_fold)
        hist_gb_val_pred = hist_gb.predict_proba(X_val_fold)[:, 1]

        '''
        easy_ensemble = EasyEnsembleClassifier(n_estimators=10, random_state=51, n_jobs=-1)
        easy_ensemble.fit(X_train_fold,y_train_fold)
        easy_ensemble_val_pred = easy_ensemble.predict_proba(X_val_fold)[:, 1]
        '''

        # 不需要提前訓練的模型
        lg = LogisticRegression(max_iter=1000, random_state=52, n_jobs=-1)
        svm = SVC(kernel='rbf', C=0.5, gamma='auto', probability=True, random_state=53)
        nusvc = NuSVC(probability=True, random_state=54)
        mlp = MLPClassifier(hidden_layer_sizes=(100,50,), alpha=0.0001, learning_rate='adaptive', random_state=57)
        bnb = BernoulliNB()
        gnb = GaussianNB()
        lda = LinearDiscriminantAnalysis()

        # 堆疊模型
        stk = StackingClassifier(
            estimators=[('lg',lg),('svm',svm),('nusvc',nusvc),('mlp',mlp),('bnb',bnb),('gnb',gnb),('lda',lda)],
            final_estimator=lg,
            stack_method='predict_proba',
            cv=3,
            n_jobs=-1
        )

        stk.fit(X_train_fold, y_train_fold)
        stk_val_pred = stk.predict_proba(X_val_fold)[:, 1]

        rf_auc = roc_auc_score(y_val_fold, rf_val_pred)
        etc_auc = roc_auc_score(y_val_fold, etc_val_pred)
        xgb_auc = roc_auc_score(y_val_fold, xgb_val_pred)
        lgbm_auc = roc_auc_score(y_val_fold, lgbm_val_pred)
        gbc_auc = roc_auc_score(y_val_fold, gbc_val_pred)
        adaboost_auc = roc_auc_score(y_val_fold, adaboost_val_pred)
        catboost_auc = roc_auc_score(y_val_fold, catboost_val_pred)
        hist_gb_auc = roc_auc_score(y_val_fold, hist_gb_val_pred)
        #easy_ensemble_auc = roc_auc_score(y_val_fold, easy_ensemble_val_pred)
        stk_auc = roc_auc_score(y_val_fold, stk_val_pred)

        model_aucs = [rf_auc, etc_auc, xgb_auc, lgbm_auc, gbc_auc, adaboost_auc, catboost_auc, hist_gb_auc, stk_auc]
        
        normalized_weights = [auc / sum(model_aucs) for auc in model_aucs]

        # 組合基模型預測概率
        meta_features = np.column_stack([rf_val_pred, etc_val_pred, xgb_val_pred, lgbm_val_pred, gbc_val_pred, adaboost_val_pred,
                                          catboost_val_pred, hist_gb_val_pred, stk_val_pred])
        weighted_meta_features = np.average(meta_features, axis=1, weights=normalized_weights)
        meta_model = lg
        meta_model.fit(weighted_meta_features.reshape(-1,1), y_val_fold)

        # 驗證集預測
        y_val_pred = meta_model.predict_proba(weighted_meta_features.reshape(-1,1))[:, 1]
        val_auc = roc_auc_score(y_val_fold, y_val_pred)
        print(f"Validation AUC for fold {fold + 1}: {val_auc:.4f}")
        fold_auc_scores.append(val_auc)

        # 測試集預測
        rf_test_pred = rf.predict_proba(X_test)[:, 1]
        etc_test_pred = etc.predict_proba(X_test)[:, 1]
        xgb_test_pred = xgb.predict_proba(X_test)[:, 1]
        lgbm_test_pred = lgbm.predict_proba(X_test)[:, 1]
        gbc_test_pred = gbc.predict_proba(X_test)[:, 1]
        adaboost_test_pred = adaboost.predict_proba(X_test)[:, 1]
        catboost_test_pred = catboost.predict_proba(X_test)[:, 1]
        hist_gb_test_pred = hist_gb.predict_proba(X_test)[:, 1]
        #easy_ensemble_test_pred = easy_ensemble.predict_proba(X_test)[:, 1]
        stk_test_pred = stk.predict_proba(X_test)[:, 1]
        test_meta_features = np.column_stack([rf_test_pred, etc_test_pred, xgb_test_pred, lgbm_test_pred, gbc_test_pred, adaboost_test_pred,
                                               catboost_test_pred, hist_gb_test_pred, stk_test_pred])
        weighted_test_feature = np.average(test_meta_features, axis=1, weights=normalized_weights)
        y_test_pred = meta_model.predict_proba(test_meta_features.reshape(-1,1))[:, 1]
        test_predictions_all_folds.append(y_test_pred)

    # 計算加權測試集預測
    total_auc = sum(fold_auc_scores)
    fold_weights = [auc / total_auc for auc in fold_auc_scores]
    test_predictions_final = np.average(test_predictions_all_folds, axis=0, weights=fold_weights)

    # 計算平均驗證 AUC
    avg_val_auc = np.mean(fold_auc_scores)
    print(f"Average AUC for {i+1}'th Dataset: {avg_val_auc:.4f}")
    validation_auc_scores.append(avg_val_auc)

    # 儲存測試結果 CSV
    df = pd.DataFrame(test_predictions_final, columns=['y_predict_proba'])
    df.to_csv(f'./Competition_data/{dataset_names[i]}/y_predict.csv', index=False, header=True)

# 儲存 AUC 分數為 CSV 文件/
auc_list = pd.DataFrame(validation_auc_scores, columns=["Validation AUC"])
auc_list.to_csv('./validation_auc_scores_test.csv', index_label='Dataset_Index', header=True)

In [None]:
validation_auc_scores = []

for i in range(2,3):

    X_train, y_train, X_test = preprocess_data(X_trains[i], y_trains[i].values.ravel(), X_tests[i])

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    test_predictions_all_folds = []
    fold_auc_scores = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):

        X_train_fold = X_train[train_idx, :]
        y_train_fold = y_train[train_idx]
        X_val_fold = X_train[val_idx, :]
        y_val_fold = y_train[val_idx]

        # 提前訓練基模型
        #rf = BaggingClassifier(estimator=RandomForestClassifier(min_samples_leaf=3, min_samples_split=3), n_estimators=10, random_state=43, n_jobs=-1)
        rf = RandomForestClassifier(n_estimators=500, max_depth=20, min_samples_split=3, min_samples_leaf=3, n_jobs=-1, random_state=43)
        rf.fit(X_train_fold, y_train_fold)
        rf_val_pred = rf.predict_proba(X_val_fold)[:, 1]

        #etc = BaggingClassifier(estimator=ExtraTreesClassifier(min_samples_leaf=3, min_samples_split=3), n_estimators=10, random_state=44, n_jobs=-1)
        etc = ExtraTreesClassifier(n_estimators=500, max_depth=20, min_samples_split=3, min_samples_leaf=4 ,random_state=44, n_jobs=-1)
        etc.fit(X_train_fold, y_train_fold)
        etc_val_pred = etc.predict_proba(X_val_fold)[:, 1]

        xgb = XGBClassifier(
            n_estimators=200, max_depth=6, reg_alpha=0.1, reg_lambda=0.1, eval_metric='auc',
              n_jobs= -1, early_stopping_rounds=5, random_state=45
        )
        xgb.fit(X_train_fold, y_train_fold, eval_set=[(X_val_fold, y_val_fold)],verbose = 0)
        xgb_val_pred = xgb.predict_proba(X_val_fold)[:, 1]

        lgbm = LGBMClassifier(
            n_estimators=200, max_depth=6, reg_alpha=0.1, reg_lambda=0.1, min_child_samples = 5,
              min_split_gain=0.01, early_stopping_round=10, n_jobs=-1, verbose=-1, random_state=46
        )
        lgbm.fit(X_train_fold, y_train_fold, eval_metric='auc', eval_set=[(X_val_fold, y_val_fold)])
        lgbm_val_pred = lgbm.predict_proba(X_val_fold)[:, 1]

        gbc = GradientBoostingClassifier(n_estimators=200, max_depth=6, learning_rate=0.1, random_state=47)
        gbc.fit(X_train_fold, y_train_fold)
        gbc_val_pred = gbc.predict_proba(X_val_fold)[:, 1]

        adaboost = AdaBoostClassifier(n_estimators=200, learning_rate=0.1, random_state=48)
        adaboost.fit(X_train_fold, y_train_fold)
        adaboost_val_pred = adaboost.predict_proba(X_val_fold)[:, 1]

        catboost = CatBoostClassifier(iterations=200, learning_rate=0.1, depth=6, verbose=0, random_state=49)
        catboost.fit(X_train_fold,y_train_fold)
        catboost_val_pred = catboost.predict_proba(X_val_fold)[:, 1]

        hist_gb = HistGradientBoostingClassifier(max_iter=200, max_depth=6, learning_rate=0.1, early_stopping=True,  random_state=50)
        hist_gb.fit(X_train_fold,y_train_fold)
        hist_gb_val_pred = hist_gb.predict_proba(X_val_fold)[:, 1]

        # 不需要提前訓練的模型
        mlp = MLPClassifier(hidden_layer_sizes=(100, 50), learning_rate='adaptive', max_iter=1000, alpha=0.001, early_stopping=True, random_state=49)
        svm = SVC(kernel='rbf', C=0.5, gamma='auto', probability=True, random_state=50)
        lg = LogisticRegression(max_iter=500, random_state=51)
        bnb = BernoulliNB()
        gnb = GaussianNB()
        lda = LinearDiscriminantAnalysis()
        nusvc = NuSVC(probability=True, random_state=52)

        # 堆疊模型
        stacking_model = StackingClassifier(
            estimators=[('mlp', mlp),('svm', svm),('lg',lg),('bnb',bnb),('gnb',gnb),('lda',lda),('nusvc',nusvc)],
            final_estimator=lg,
            stack_method='predict_proba',
            cv=3,
            n_jobs=-1
        )

        stacking_model.fit(X_train_fold, y_train_fold)
        stk_val_pred = stacking_model.predict_proba(X_val_fold)[:, 1]

        # 組合基模型預測概率
        meta_features = np.column_stack([rf_val_pred, etc_val_pred, xgb_val_pred, lgbm_val_pred, gbc_val_pred,
                                          adaboost_val_pred,  catboost_val_pred, hist_gb_val_pred, stk_val_pred])
        meta_model = lg
        meta_model.fit(meta_features, y_val_fold)

        # 驗證集預測
        y_val_pred = meta_model.predict_proba(meta_features)[:, 1]
        val_auc = roc_auc_score(y_val_fold, y_val_pred)
        print(f"Validation AUC for fold {fold + 1}: {val_auc:.4f}")
        fold_auc_scores.append(val_auc)

        # 測試集預測
        rf_test_pred = rf.predict_proba(X_test)[:, 1]
        etc_test_pred = etc.predict_proba(X_test)[:, 1]
        xgb_test_pred = xgb.predict_proba(X_test)[:, 1]
        lgbm_test_pred = lgbm.predict_proba(X_test)[:, 1]
        gbc_test_pred = gbc.predict_proba(X_test)[:, 1]
        adaboost_test_pred = adaboost.predict_proba(X_test)[:, 1]
        stk_test_pred = stacking_model.predict_proba(X_test)[:, 1]
        catboost_test_pred = catboost.predict_proba(X_test)[:, 1]
        hist_gb_test_pred = hist_gb.predict_proba(X_test)[:, 1]
        test_meta_features = np.column_stack([rf_test_pred, etc_test_pred, xgb_test_pred, lgbm_test_pred,
                                              gbc_test_pred, adaboost_test_pred, catboost_test_pred, hist_gb_test_pred, stk_test_pred])
        y_test_pred = meta_model.predict_proba(test_meta_features)[:, 1]
        test_predictions_all_folds.append(y_test_pred)

    # 計算加權測試集預測
    total_auc = sum(fold_auc_scores)
    fold_weights = [auc / total_auc for auc in fold_auc_scores]
    test_predictions_final = np.average(test_predictions_all_folds, axis=0, weights=fold_weights)

    # 計算平均驗證 AUC
    avg_val_auc = np.mean(fold_auc_scores)
    print(f"Average AUC for {i+1}'th Dataset: {avg_val_auc:.4f}")
    validation_auc_scores.append(avg_val_auc)

    # 儲存測試結果 CSV
    df = pd.DataFrame(test_predictions_final, columns=['y_predict_proba'])
    df.to_csv(f'./Competition_data/{dataset_names[i]}/y_predict.csv', index=False, header=True)

# 儲存 AUC 分數為 CSV 文件
auc_list = pd.DataFrame(validation_auc_scores, columns=["Validation AUC"])
auc_list.to_csv('./validation_auc_scores.csv', index_label='Dataset_Index', header=True)

In [None]:
validation_auc_scores = []

for i in range(len(X_trains)):
    X_train, y_train, X_test = preprocess_data(X_trains[i], y_trains[i].values.ravel(), X_tests[i])

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    test_predictions_all_folds = []
    fold_auc_scores = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
        X_train_fold = X_train[train_idx, :]
        y_train_fold = y_train[train_idx]
        X_val_fold = X_train[val_idx, :]
        y_val_fold = y_train[val_idx]

        # 提前訓練基模型
        rf = BaggingClassifier(estimator=RandomForestClassifier(), n_estimators=10, random_state=43, n_jobs=-1)
        rf.fit(X_train_fold, y_train_fold)
        rf_val_pred = rf.predict_proba(X_val_fold)[:, 1]

        etc = BaggingClassifier(estimator=ExtraTreesClassifier(), n_estimators=10, random_state=44, n_jobs=-1) 
        etc.fit(X_train_fold, y_train_fold)
        etc_val_pred = etc.predict_proba(X_val_fold)[:, 1]

        svm = BaggingClassifier(estimator=SVC(), n_estimators=10, random_state=45, n_jobs=-1)
        svm.fit(X_train_fold,y_train_fold)
        svm_val_pred = svm.predict_proba(X_val_fold)[:, 1]

        nusvc = BaggingClassifier(estimator=NuSVC(), n_estimators=10, random_state=46, n_jobs=-1)
        nusvc.fit(X_train_fold, y_train_fold)
        nusvc_val_pred = nusvc.predict_proba(X_val_fold)[:, 1]

        mlp = BaggingClassifier(estimator=MLPClassifier(), n_estimators=10, random_state=47, n_jobs=-1)
        mlp.fit(X_train_fold,y_train_fold)
        mlp_val_pred = mlp.predict_proba(X_val_fold)[:,1]

        xgb = XGBClassifier(n_estimators=200, max_depth=6, reg_alpha=0.1, reg_lambda=0.1,
                             eval_metric='auc', early_stopping_rounds=5, random_state=48, n_jobs=-1)
        xgb.fit(X_train_fold, y_train_fold, eval_set=[(X_val_fold, y_val_fold)],verbose = 0)
        xgb_val_pred = xgb.predict_proba(X_val_fold)[:, 1]

        lgbm = LGBMClassifier(n_estimators=200, max_depth=6, reg_alpha=0.1, reg_lambda=0.1, min_child_samples=5,
                               min_split_gain=0.01, early_stopping_round=5, verbose=-1, random_state=49, n_jobs=-1)
        lgbm.fit(X_train_fold, y_train_fold, eval_metric='auc', eval_set=[(X_val_fold, y_val_fold)])
        lgbm_val_pred = lgbm.predict_proba(X_val_fold)[:, 1]

        gbc = GradientBoostingClassifier(n_estimators=200, max_depth=6, learning_rate=0.1, random_state=50)
        gbc.fit(X_train_fold, y_train_fold)
        gbc_val_pred = gbc.predict_proba(X_val_fold)[:, 1]

        adaboost = AdaBoostClassifier(n_estimators=200, learning_rate=0.1, random_state=51)
        adaboost.fit(X_train_fold, y_train_fold)
        adaboost_val_pred = adaboost.predict_proba(X_val_fold)[:, 1]

        # 不需要提前訓練的模型
        lg = LogisticRegression(max_iter=1000, random_state=52)
        bnb = BernoulliNB()
        gnb = GaussianNB()
        lda = LinearDiscriminantAnalysis()

        # 堆疊模型
        stacking_model = StackingClassifier(
            estimators=[('lg',lg),('bnb',bnb),('gnb',gnb),('lda',lda)],
            final_estimator=BaggingClassifier(estimator=MLPClassifier(), n_estimators=10, random_state=53, n_jobs=-1),
            stack_method='predict_proba',
            cv=3,
            n_jobs=-1
        )

        stacking_model.fit(X_train_fold, y_train_fold)
        stk_val_pred = stacking_model.predict_proba(X_val_fold)[:, 1]

        # 組合基模型預測概率
        meta_features = np.column_stack([rf_val_pred, etc_val_pred, svm_val_pred, nusvc_val_pred, mlp_val_pred,
                                          xgb_val_pred, lgbm_val_pred, gbc_val_pred, adaboost_val_pred, stk_val_pred])
        meta_model = BaggingClassifier(estimator=MLPClassifier(), n_estimators=10, random_state=54, n_jobs=-1)
        meta_model.fit(meta_features, y_val_fold)

        # 驗證集預測
        y_val_pred = meta_model.predict_proba(meta_features)[:, 1]
        val_auc = roc_auc_score(y_val_fold, y_val_pred)
        print(f"Validation AUC for fold {fold + 1}: {val_auc:.4f}")
        fold_auc_scores.append(val_auc)

        # 測試集預測
        rf_test_pred = rf.predict_proba(X_test)[:, 1]
        etc_test_pred = etc.predict_proba(X_test)[:, 1]
        svm_test_pred = svm.predict_proba(X_test)[:, 1]
        nusvc_test_pred = nusvc.predict_proba(X_test)[:, 1]
        mlp_test_pred = mlp.predict_proba(X_test)[:, 1]
        xgb_test_pred = xgb.predict_proba(X_test)[:, 1]
        lgbm_test_pred = lgbm.predict_proba(X_test)[:, 1]
        gbc_test_pred = gbc.predict_proba(X_test)[:, 1]
        adaboost_test_pred = adaboost.predict_proba(X_test)[:, 1]
        stk_test_pred = stacking_model.predict_proba(X_test)[:, 1]
        test_meta_features = np.column_stack([rf_test_pred, etc_test_pred, svm_test_pred, nusvc_test_pred, mlp_test_pred,
                                              xgb_test_pred, lgbm_test_pred, gbc_test_pred, adaboost_test_pred, stk_test_pred])
        y_test_pred = meta_model.predict_proba(test_meta_features)[:, 1]
        test_predictions_all_folds.append(y_test_pred)

    # 計算加權測試集預測
    total_auc = sum(fold_auc_scores)
    fold_weights = [auc / total_auc for auc in fold_auc_scores]
    test_predictions_final = np.average(test_predictions_all_folds, axis=0, weights=fold_weights)

    # 計算平均驗證 AUC
    avg_val_auc = np.mean(fold_auc_scores)
    print(f"Average AUC for {i+1}'th Dataset: {avg_val_auc:.4f}")
    validation_auc_scores.append(avg_val_auc)

    # 儲存測試結果 CSV
    df = pd.DataFrame(test_predictions_final, columns=['y_predict_proba'])
    df.to_csv(f'./Competition_data/{dataset_names[i]}/y_predict.csv', index=False, header=True)

# 儲存 AUC 分數為 CSV 文件
auc_list = pd.DataFrame(validation_auc_scores, columns=["Validation AUC"])
auc_list.to_csv('./validation_auc_scores.csv', index_label='Dataset_Index', header=True)

Validation AUC for fold 1: 0.9514
Validation AUC for fold 2: 0.9319
Validation AUC for fold 3: 0.9217
Validation AUC for fold 4: 0.9324
Validation AUC for fold 5: 0.9524
Average AUC for 1'th Dataset: 0.9379
Validation AUC for fold 1: 0.9997
Validation AUC for fold 2: 0.9982
Validation AUC for fold 3: 1.0000
Validation AUC for fold 4: 1.0000
Validation AUC for fold 5: 1.0000
Average AUC for 2'th Dataset: 0.9996
Validation AUC for fold 1: 0.9184
Validation AUC for fold 2: 1.0000
Validation AUC for fold 3: 0.9592
Validation AUC for fold 4: 0.9592
Validation AUC for fold 5: 1.0000
Average AUC for 3'th Dataset: 0.9673
Validation AUC for fold 1: 0.9943
Validation AUC for fold 2: 0.9974
Validation AUC for fold 3: 0.9963
Validation AUC for fold 4: 0.9970
Validation AUC for fold 5: 0.9984
Average AUC for 4'th Dataset: 0.9967
Validation AUC for fold 1: 0.9992
Validation AUC for fold 2: 0.9995
Validation AUC for fold 3: 0.9989
Validation AUC for fold 4: 1.0000
Validation AUC for fold 5: 0.9984
Av