# 필수 라이브러리 설치

In [8]:
# =========================================================
# 0. 필수 라이브러리 설치
# =========================================================
!pip install -q xgboost lightgbm catboost optuna scikit-learn pandas numpy torch

import pandas as pd
import numpy as np
import os
import warnings
import pickle
from tqdm.notebook import tqdm

# 모델 라이브러리
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.preprocessing import StandardScaler

# 기본 설정
warnings.filterwarnings('ignore')



# 공통 설정

In [9]:
class Config:
    """실행에 필요한 모든 설정값을 관리합니다."""
    # --- 기본 경로 설정 ---
    BASE_DRIVE_PATH = '/content/drive/MyDrive/'
    S1_DATA_PATH = os.path.join(BASE_DRIVE_PATH, "1014/data/")
    S2_DATA_PATH = os.path.join(BASE_DRIVE_PATH, "review_helpfulness/PADA/data/")
    S2_EMBEDDING_PATH = os.path.join(BASE_DRIVE_PATH, "review_helpfulness/PADA/embedding/")

    # --- 결과 저장 경로 ---
    SAVE_DIR = os.path.join(BASE_DRIVE_PATH, "review_helpfulness/PADA/results/fusion/ResidualGuard_Tuned")

    # --- 모델 및 플랫폼 리스트 ---
    PLATFORMS = ["Amazon", "Coursera", "Audible", "Hotel"]
    S1_MODELS = ["S1_XGBoost", "S1_LightGBM", "S1_MLP"]
    S2_MODELS = ["S2_XGBoost", "S2_LightGBM", "S2_CatBoost", "S2_MLP"]
    S2_EMBEDDING = "T5"

    # --- 데이터 공통 설정 ---
    TARGET_COLUMN = 'binary_helpfulness'
    TEST_SPLIT_RATIO = 0.2
    RANDOM_STATE = 42
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# --- Google Drive 마운트 ---
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print(f"✅ Google Drive 마운트 성공. DEVICE: {Config.DEVICE}")
except ImportError:
    print("⚠️ Google Colab 환경이 아닙니다. 로컬 경로를 확인해주세요.")

# --- S1 피처셋 정의 ---
S1_FEATURES = {
    "Amazon":  ['Average_Rating','Rating','Deviation_Of_Star_Ratings','Time_Lapsed','Price','Text_Length','Valence','Arousal','Title_Length','Num_of_Ratings','Is_Photo','Flesch_Reading_Ease','FOG_Index','Sentiment_Score','new_depth','new_breadth'],
    "Coursera":['Average_Rating','Rating','Deviation_Of_Star_Ratings','Time_Lapsed','Num_of_Reviews','Num_of_Enrolled','Num_of_top_instructor_courses','Num_of_top_instructor_learners','Text_Length','Valence','Arousal','Num_of_Ratings','Flesch_Reading_Ease','FOG_Index','Sentiment_Score','new_depth','new_breadth'],
    "Audible": ['Average_Rating','Rating','Deviation_Of_Star_Ratings','Time_Lapsed','Text_Length','Valence','Arousal','Title_Length','Num_of_Ratings','Flesch_Reading_Ease','FOG_Index','Sentiment_Score','new_depth','new_breadth'],
    "Hotel":   ['Average_Rating','Rating','Deviation_Of_Star_Ratings','Time_Lapsed','Text_Length','Valence','Arousal','Title_Length','Num_of_Ratings','Flesch_Reading_Ease','FOG_Index','Sentiment_Score','new_depth','new_breadth','Is_Photo','Hotel_Grade','Employee_Friendliness_Score','Facility_Score','Cleanliness_Score','Comfort_Score','Value_For_Money_Score','Location_Score']
}

# --- 모델별 최적 하이퍼파라미터 ---
BEST_PARAMS = {
    'S1_XGBoost': {
        "Amazon":   {'learning_rate': 0.0113, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.9508, 'colsample_bytree': 0.6807, 'reg_lambda': 1.715e-07, 'reg_alpha': 0.0095, 'n_estimators': 1800},
        "Coursera": {'learning_rate': 0.0110, 'max_depth': 12, 'min_child_weight': 3, 'subsample': 0.8639, 'colsample_bytree': 0.6403, 'reg_lambda': 3.224e-08, 'reg_alpha': 0.0019, 'n_estimators': 1400},
        "Audible":  {'learning_rate': 0.0175, 'max_depth': 11, 'min_child_weight': 1, 'subsample': 0.8034, 'colsample_bytree': 0.7860, 'reg_lambda': 0.0307, 'reg_alpha': 0.0004, 'n_estimators': 400},
        "Hotel":    {'learning_rate': 0.0419, 'max_depth': 12, 'min_child_weight': 8, 'subsample': 0.9986, 'colsample_bytree': 0.7078, 'reg_lambda': 0.7264, 'reg_alpha': 2.3864, 'n_estimators': 500}
    },
    'S1_LightGBM': {
        "Amazon":   {'n_estimators': 1050, 'learning_rate': 0.0178, 'num_leaves': 114, 'max_depth': 14, 'min_child_samples': 12, 'subsample': 0.7996, 'colsample_bytree': 0.6301, 'reg_alpha': 0.5316, 'reg_lambda': 2.0077},
        "Coursera": {'n_estimators': 402, 'learning_rate': 0.0292, 'num_leaves': 212, 'max_depth': -1, 'min_child_samples': 32, 'subsample': 0.8438, 'colsample_bytree': 0.8439, 'reg_alpha': 0.0778, 'reg_lambda': 4.9651},
        "Audible":  {'n_estimators': 779, 'learning_rate': 0.0113, 'num_leaves': 185, 'max_depth': 15, 'min_child_samples': 19, 'subsample': 0.8464, 'colsample_bytree': 0.6334, 'reg_alpha': 0.2720, 'reg_lambda': 0.7640},
        "Hotel":    {'n_estimators': 393, 'learning_rate': 0.0145, 'num_leaves': 101, 'max_depth': 16, 'min_child_samples': 10, 'subsample': 0.7079, 'colsample_bytree': 0.7784, 'reg_alpha': 0.6309, 'reg_lambda': 1.4099}
    },
    'S1_MLP': {
        "Amazon":   {"hidden_dim": 256, "depth": 3, "dropout": 0.1943, "lr": 0.0022, "weight_decay": 8.398e-06, "batch_size": 256},
        "Coursera": {"hidden_dim": 256, "depth": 3, "dropout": 0.4100, "lr": 0.00097, "weight_decay": 3.726e-07, "batch_size": 256},
        "Audible":  {"hidden_dim": 256, "depth": 4, "dropout": 0.1952, "lr": 0.0017, "weight_decay": 3.636e-05, "batch_size": 512},
        "Hotel":    {"hidden_dim": 128, "depth": 3, "dropout": 0.3104, "lr": 0.0014, "weight_decay": 5.660e-05, "batch_size": 512}
    },
    'S2_XGBoost': {
        "Amazon":   {'n_estimators': 1700, 'learning_rate': 0.0028, 'max_depth': 10, 'gamma': 1.08e-08, 'reg_alpha': 2.33e-05, 'reg_lambda': 0.00044, 'colsample_bytree': 0.649, 'subsample': 0.940},
        "Coursera": {'n_estimators': 1700, 'learning_rate': 0.0041, 'max_depth': 8, 'gamma': 4.03e-08, 'reg_alpha': 0.5148, 'reg_lambda': 0.0189, 'colsample_bytree': 0.937, 'subsample': 0.703},
        "Audible":  {'n_estimators': 1100, 'learning_rate': 0.0018, 'max_depth': 10, 'gamma': 5.64e-06, 'reg_alpha': 0.2292, 'reg_lambda': 2.48e-08, 'colsample_bytree': 0.649, 'subsample': 0.622},
        "Hotel":    {'n_estimators': 1900, 'learning_rate': 0.0017, 'max_depth': 6, 'gamma': 7.39e-05, 'reg_alpha': 1.04e-08, 'reg_lambda': 1.08e-07, 'colsample_bytree': 0.755, 'subsample': 0.707}
    },
    'S2_LightGBM': {
        "Amazon":   {'n_estimators': 1500, 'learning_rate': 0.0054, 'num_leaves': 292, 'max_depth': 12, 'reg_alpha': 9.664, 'reg_lambda': 0.0007, 'colsample_bytree': 0.826, 'subsample': 0.602, 'subsample_freq': 1},
        "Coursera": {'n_estimators': 1600, 'learning_rate': 0.0036, 'num_leaves': 79, 'max_depth': 11, 'reg_alpha': 1.007e-08, 'reg_lambda': 0.0011, 'colsample_bytree': 0.954, 'subsample': 0.936, 'subsample_freq': 4},
        "Audible":  {'n_estimators': 1000, 'learning_rate': 0.0044, 'num_leaves': 74, 'max_depth': 9, 'reg_alpha': 0.481, 'reg_lambda': 0.0258, 'colsample_bytree': 0.864, 'subsample': 0.937, 'subsample_freq': 7},
        "Hotel":    {'n_estimators': 1800, 'learning_rate': 0.0024, 'num_leaves': 273, 'max_depth': 8, 'reg_alpha': 1.342, 'reg_lambda': 0.0084, 'colsample_bytree': 0.628, 'subsample': 0.942, 'subsample_freq': 5}
    },
    'S2_CatBoost': {
        "Amazon":   {'iterations': 1400, 'learning_rate': 0.0161, 'depth': 9, 'l2_leaf_reg': 0.7522, 'subsample': 0.9061,'bootstrap_type': 'Bernoulli'},
        "Coursera": {'iterations': 1600, 'learning_rate': 0.0213, 'depth': 10, 'l2_leaf_reg': 3.7410, 'subsample': 0.6386,'bootstrap_type': 'Bernoulli'},
        "Audible":  {'iterations': 800, 'learning_rate': 0.0036, 'depth': 3, 'l2_leaf_reg': 0.6898, 'subsample': 0.9209,'bootstrap_type': 'Bernoulli'},
        "Hotel":    {'iterations': 700, 'learning_rate': 0.2952, 'depth': 3, 'l2_leaf_reg': 0.0179, 'subsample': 0.9086,'bootstrap_type': 'Bernoulli'}
    },
    'S2_MLP': {
        "Amazon":   {'n_layers': 3, 'n_units_l0': 266, 'droput_l0': 0.4325, 'n_units_l1': 291, 'droput_l1': 0.2865, 'n_units_l2': 56, 'droput_l2': 0.2233, 'learning_rate': 0.00093},
        "Coursera": {'n_layers': 3, 'n_units_l0': 110, 'droput_l0': 0.2652, 'n_units_l1': 417, 'droput_l1': 0.2195, 'n_units_l2': 378, 'droput_l2': 0.3935, 'learning_rate': 0.00176},
        "Audible":  {'n_layers': 2, 'n_units_l0': 495, 'droput_l0': 0.3344, 'n_units_l1': 87, 'droput_l1': 0.2786, 'learning_rate': 0.00210},
        "Hotel":    {'n_layers': 4, 'n_units_l0': 494, 'droput_l0': 0.2404, 'n_units_l1': 97, 'droput_l1': 0.1564, 'n_units_l2': 104, 'droput_l2': 0.2554, 'n_units_l3': 301, 'droput_l3': 0.3685, 'learning_rate': 0.00080}
    }
}
os.makedirs(Config.SAVE_DIR, exist_ok=True)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Google Drive 마운트 성공. DEVICE: cuda


In [10]:
# 헬퍼 함수 및 MLP 모델/데이터셋 클래스 정의

In [11]:

def _make_numeric_df(df: pd.DataFrame) -> pd.DataFrame:
    num = df.apply(pd.to_numeric, errors='coerce')
    med = num.median()
    return num.fillna(med)

class NumpyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)
    def __len__(self): return len(self.X)
    def __getitem__(self, i): return self.X[i], self.y[i]

class S1_MLP_Model(nn.Module):
    def __init__(self, in_dim, params):
        super().__init__()
        layers = []
        d = in_dim
        for _ in range(params['depth']):
            layers += [nn.Linear(d, params['hidden_dim']), nn.ReLU(), nn.Dropout(params['dropout'])]
            d = params['hidden_dim']
        layers += [nn.Linear(d, 1)]
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x)

class S2_MLP_Model(nn.Module):
    def __init__(self, in_dim, params):
        super().__init__()
        layers = []
        in_features = in_dim
        for i in range(params['n_layers']):
            out_features = params[f'n_units_l{i}']
            dropout_rate = params[f'droput_l{i}']
            layers.append(nn.Linear(in_features, out_features))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            in_features = out_features
        layers.append(nn.Linear(in_features, 1))
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x)

def train_pytorch_model(model, X_train, y_train, params, device):
    """PyTorch 모델을 학습시키고 예측 결과를 반환하는 범용 함수"""
    # 조기 종료를 위한 검증 세트 분리
    X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=Config.RANDOM_STATE, stratify=y_train)

    train_loader = DataLoader(NumpyDataset(X_tr, y_tr), batch_size=params.get("batch_size", 256), shuffle=True)
    val_loader = DataLoader(NumpyDataset(X_val, y_val), batch_size=1024, shuffle=False)

    lr = params.get("lr", params.get("learning_rate")) # 파라미터 이름 통일
    weight_decay = params.get("weight_decay", 1e-5)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.BCEWithLogitsLoss()

    best_pr_auc = -1
    epochs_no_improve = 0
    best_model_state = model.state_dict()

    for epoch in range(50): # Max 50 epochs
        model.train()
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        model.eval()
        val_preds = []
        with torch.no_grad():
            for features, _ in val_loader:
                features = features.to(device)
                outputs = model(features)
                val_preds.append(torch.sigmoid(outputs).cpu().numpy())

        pr_auc = average_precision_score(y_val, np.concatenate(val_preds))

        if pr_auc > best_pr_auc:
            best_pr_auc = pr_auc
            epochs_no_improve = 0
            best_model_state = model.state_dict()
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= 5: # Patience
            break

    model.load_state_dict(best_model_state)
    return model



# 모델별 예측 생성 함수

In [12]:
# 캐싱을 위한 딕셔너리
prediction_cache = {}

def get_predictions(platform, model_name, embedding_model=None):
    """
    주어진 모델과 플랫폼에 대한 예측 결과를 생성하거나 캐시에서 로드합니다.
    """
    cache_key = f"{platform}_{model_name}_{embedding_model or 'S1'}"
    if cache_key in prediction_cache:
        print(f"  -> Loading '{cache_key}' from cache.")
        return prediction_cache[cache_key]

    print(f"  -> Generating predictions for '{cache_key}'...")

    # --- 데이터 로드 ---
    is_s2 = "S2" in model_name
    if is_s2:
        csv_path = os.path.join(Config.S2_DATA_PATH, platform.lower(), f"{platform.lower()}.csv")
        embedding_path = os.path.join(Config.S2_EMBEDDING_PATH, f"{platform.lower()}_{embedding_model}.npy")
        df = pd.read_csv(csv_path)
        features = np.load(embedding_path)
    else: # S1
        csv_path = os.path.join(Config.S1_DATA_PATH, f"new_{platform.lower()}.csv")
        df = pd.read_csv(csv_path)
        feature_cols = [c for c in S1_FEATURES[platform] if c in df.columns]
        features = _make_numeric_df(df[feature_cols]).to_numpy()

    labels = df[Config.TARGET_COLUMN].values
    indices = np.arange(len(df))

    # --- 데이터 분할 (항상 동일하게) ---
    train_indices, test_indices, y_train, y_test = train_test_split(
        indices, labels, test_size=Config.TEST_SPLIT_RATIO, random_state=Config.RANDOM_STATE, stratify=labels
    )
    X_train, X_test = features[train_indices], features[test_indices]

    # --- 모델 학습 및 예측 ---
    params = BEST_PARAMS[model_name][platform]
    model = None

    if "XGBoost" in model_name:
        model = xgb.XGBClassifier(device=Config.DEVICE, objective='binary:logistic', random_state=Config.RANDOM_STATE, **params)
        model.fit(X_train, y_train, verbose=False)

    elif "LightGBM" in model_name:
        model = lgb.LGBMClassifier(device='gpu', objective='binary', random_state=Config.RANDOM_STATE, verbose=-1, **params)
        model.fit(X_train, y_train)

    elif "CatBoost" in model_name:
        model = cb.CatBoostClassifier(task_type='GPU', random_state=Config.RANDOM_STATE, verbose=0, **params)
        model.fit(X_train, y_train)

    elif "MLP" in model_name:
        scaler = StandardScaler().fit(X_train)
        X_train_scaled, X_test_scaled = scaler.transform(X_train), scaler.transform(X_test)
        input_dim = X_train.shape[1]

        if is_s2:
            model = S2_MLP_Model(input_dim, params).to(Config.DEVICE)
        else:
            model = S1_MLP_Model(input_dim, params).to(Config.DEVICE)

        model = train_pytorch_model(model, X_train_scaled, y_train, params, Config.DEVICE)

        model.eval()
        with torch.no_grad():
            preds = torch.sigmoid(model(torch.tensor(X_test_scaled, dtype=torch.float32).to(Config.DEVICE))).cpu().numpy().ravel()

        prediction_cache[cache_key] = (preds, y_test)
        return preds, y_test

    preds = model.predict_proba(X_test)[:, 1]
    prediction_cache[cache_key] = (preds, y_test)
    return preds, y_test




# ResidualGuard 튜닝 함수

In [13]:
def tune_residual_guard_alpha(s1_preds, s2_preds, y_true):
    best_alpha, best_prauc = -1, -1
    for alpha in np.arange(0, 1.01, 0.01):
        final_preds = alpha * s1_preds + (1 - alpha) * s2_preds
        prauc = average_precision_score(y_true, final_preds)
        if prauc > best_prauc:
            best_prauc, best_alpha = prauc, alpha
    return best_alpha, best_prauc



# 메인 실행 루프

In [None]:
all_results = []

for platform in tqdm(Config.PLATFORMS, desc="전체 플랫폼 진행"):
    for s1_model_name in tqdm(Config.S1_MODELS, desc=f"S1 Models ({platform})", leave=False):
        # S1 예측값 생성 (플랫폼별, S1 모델별로 한 번만 실행)
        s1_preds, y_true = get_predictions(platform, s1_model_name)

        for s2_model_name in tqdm(Config.S2_MODELS, desc=f"S2 Models ({s1_model_name})", leave=False):
            print(f"--- 튜닝 시작: [ {platform} ] S1: {s1_model_name}, S2: {s2_model_name} ({Config.S2_EMBEDDING}) ---")

            try:
                # S2 예측값 생성
                s2_preds, _ = get_predictions(platform, s2_model_name, embedding_model=Config.S2_EMBEDDING)

                # Alpha 튜닝
                best_alpha, best_prauc = tune_residual_guard_alpha(s1_preds, s2_preds, y_true)

                # 결과 기록
                result = {
                    "Platform": platform,
                    "S1_Model": s1_model_name,
                    "S2_Model": f"{s2_model_name} ({Config.S2_EMBEDDING})",
                    "Best_Alpha": round(best_alpha, 2),
                    "Best_PR_AUC": best_prauc
                }
                all_results.append(result)
                print(f"    ✅ 튜닝 완료 -> 최적 Alpha: {result['Best_Alpha']:.2f}, 최고 PR-AUC: {result['Best_PR_AUC']:.4f}\n")

            except Exception as e:
                print(f"    ❌ 오류 발생: {e}\n")



전체 플랫폼 진행:   0%|          | 0/4 [00:00<?, ?it/s]

S1 Models (Amazon):   0%|          | 0/3 [00:00<?, ?it/s]

  -> Generating predictions for 'Amazon_S1_XGBoost_S1'...


S2 Models (S1_XGBoost):   0%|          | 0/4 [00:00<?, ?it/s]

--- 튜닝 시작: [ Amazon ] S1: S1_XGBoost, S2: S2_XGBoost (T5) ---
  -> Generating predictions for 'Amazon_S2_XGBoost_T5'...
    ✅ 튜닝 완료 -> 최적 Alpha: 0.98, 최고 PR-AUC: 0.5038

--- 튜닝 시작: [ Amazon ] S1: S1_XGBoost, S2: S2_LightGBM (T5) ---
  -> Generating predictions for 'Amazon_S2_LightGBM_T5'...
    ✅ 튜닝 완료 -> 최적 Alpha: 0.98, 최고 PR-AUC: 0.5038

--- 튜닝 시작: [ Amazon ] S1: S1_XGBoost, S2: S2_CatBoost (T5) ---
  -> Generating predictions for 'Amazon_S2_CatBoost_T5'...
    ✅ 튜닝 완료 -> 최적 Alpha: 1.00, 최고 PR-AUC: 0.5037

--- 튜닝 시작: [ Amazon ] S1: S1_XGBoost, S2: S2_MLP (T5) ---
  -> Generating predictions for 'Amazon_S2_MLP_T5'...
    ✅ 튜닝 완료 -> 최적 Alpha: 0.94, 최고 PR-AUC: 0.5041

  -> Generating predictions for 'Amazon_S1_LightGBM_S1'...


S2 Models (S1_LightGBM):   0%|          | 0/4 [00:00<?, ?it/s]

--- 튜닝 시작: [ Amazon ] S1: S1_LightGBM, S2: S2_XGBoost (T5) ---
  -> Loading 'Amazon_S2_XGBoost_T5' from cache.
    ✅ 튜닝 완료 -> 최적 Alpha: 0.98, 최고 PR-AUC: 0.5096

--- 튜닝 시작: [ Amazon ] S1: S1_LightGBM, S2: S2_LightGBM (T5) ---
  -> Loading 'Amazon_S2_LightGBM_T5' from cache.
    ✅ 튜닝 완료 -> 최적 Alpha: 0.98, 최고 PR-AUC: 0.5096

--- 튜닝 시작: [ Amazon ] S1: S1_LightGBM, S2: S2_CatBoost (T5) ---
  -> Loading 'Amazon_S2_CatBoost_T5' from cache.
    ✅ 튜닝 완료 -> 최적 Alpha: 0.99, 최고 PR-AUC: 0.5095

--- 튜닝 시작: [ Amazon ] S1: S1_LightGBM, S2: S2_MLP (T5) ---
  -> Loading 'Amazon_S2_MLP_T5' from cache.
    ✅ 튜닝 완료 -> 최적 Alpha: 0.96, 최고 PR-AUC: 0.5097

  -> Generating predictions for 'Amazon_S1_MLP_S1'...


S2 Models (S1_MLP):   0%|          | 0/4 [00:00<?, ?it/s]

--- 튜닝 시작: [ Amazon ] S1: S1_MLP, S2: S2_XGBoost (T5) ---
  -> Loading 'Amazon_S2_XGBoost_T5' from cache.
    ✅ 튜닝 완료 -> 최적 Alpha: 0.73, 최고 PR-AUC: 0.4324

--- 튜닝 시작: [ Amazon ] S1: S1_MLP, S2: S2_LightGBM (T5) ---
  -> Loading 'Amazon_S2_LightGBM_T5' from cache.
    ✅ 튜닝 완료 -> 최적 Alpha: 0.70, 최고 PR-AUC: 0.4341

--- 튜닝 시작: [ Amazon ] S1: S1_MLP, S2: S2_CatBoost (T5) ---
  -> Loading 'Amazon_S2_CatBoost_T5' from cache.
    ✅ 튜닝 완료 -> 최적 Alpha: 0.76, 최고 PR-AUC: 0.4308

--- 튜닝 시작: [ Amazon ] S1: S1_MLP, S2: S2_MLP (T5) ---
  -> Loading 'Amazon_S2_MLP_T5' from cache.
    ✅ 튜닝 완료 -> 최적 Alpha: 0.74, 최고 PR-AUC: 0.4338



S1 Models (Coursera):   0%|          | 0/3 [00:00<?, ?it/s]

  -> Generating predictions for 'Coursera_S1_XGBoost_S1'...


S2 Models (S1_XGBoost):   0%|          | 0/4 [00:00<?, ?it/s]

--- 튜닝 시작: [ Coursera ] S1: S1_XGBoost, S2: S2_XGBoost (T5) ---
  -> Generating predictions for 'Coursera_S2_XGBoost_T5'...
    ✅ 튜닝 완료 -> 최적 Alpha: 1.00, 최고 PR-AUC: 0.5883

--- 튜닝 시작: [ Coursera ] S1: S1_XGBoost, S2: S2_LightGBM (T5) ---
  -> Generating predictions for 'Coursera_S2_LightGBM_T5'...


# 최종 결과 출력 및 저장

In [None]:
if all_results:
    results_df = pd.DataFrame(all_results)

    print("\n\n" + "="*80)
    print("🏆 최종 ResidualGuard 튜닝 결과 요약 🏆")
    print("="*80)
    print(results_df.to_string())

    save_path = os.path.join(Config.SAVE_DIR, "ResidualGuard_Tuning_Results_Final.csv")
    results_df.to_csv(save_path, index=False, encoding='utf-8-sig')

    print(f"\n\n💾 모든 결과가 다음 파일에 저장되었습니다:\n{save_path}")
else:
    print("\n\n❌ 처리된 결과가 없습니다. 파일 경로 및 입력값을 다시 확인해주세요.")