In [None]:
import pandas as pd
import torch
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import lightgbm as lgb
import torch.nn as nn 
import torch.nn.functional as F  
import safetensors
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer, 
    TrainingArguments,
    DataCollatorWithPadding,
    EarlyStoppingCallback
)
from datasets import Dataset
from tqdm import tqdm
from itertools import permutations
import warnings
warnings.filterwarnings('ignore')

# Listwise Ranking Loss 정의 (수정됨)
class ListwiseRankingLoss(nn.Module):
    def __init__(self, label_to_perm_dict, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.label_to_perm = label_to_perm_dict  # 🔥 매핑을 클래스 내부로!
        
    def forward(self, logits, labels):
        batch_size = logits.size(0)
        total_loss = 0
        
        for batch_idx in range(batch_size):
            batch_logits = logits[batch_idx] / self.temperature
            target_label = labels[batch_idx].item()
            target_permutation = self.label_to_perm[target_label]  # 🔥 self 사용
            
            remaining_positions = list(range(4))
            listwise_loss = 0
            
            for pos in range(4):
                correct_sentence = target_permutation[pos]
                if correct_sentence in remaining_positions:
                    position_probs = F.softmax(batch_logits, dim=0)
                    listwise_loss += -torch.log(position_probs[target_label] + 1e-8)
                    break
            
            total_loss += listwise_loss
        
        return total_loss / batch_size

# 공통 모델 학습 함수 (수정됨)
def train_single_model(config, train_dataset, valid_dataset, device, label_to_perm_dict, model_save_dir="./models"):
    """단일 모델 학습 및 저장"""
    import os
    os.makedirs(model_save_dir, exist_ok=True)
    
    print(f"\n🔥 {config['name']} 모델 학습 시작...")
    
    # 토크나이저 로드
    tokenizer = AutoTokenizer.from_pretrained(
        config['model_name'],
        cache_dir='C:/huggingface_cache'
    )
    
    # 모델 로드
    model = AutoModelForSequenceClassification.from_pretrained(
        config['model_name'],
        num_labels=24,
        cache_dir='C:/huggingface_cache'
    )
    model.to(device)
    
    # 데이터 토크나이징
    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            truncation=True,
            padding=True,
            max_length=512
        )
    
    tokenized_train = train_dataset.map(tokenize_function, batched=True)
    tokenized_valid = valid_dataset.map(tokenize_function, batched=True)
    
    tokenized_train = tokenized_train.remove_columns(["text", "original_sentences", "answer"])
    tokenized_valid = tokenized_valid.remove_columns(["text", "original_sentences", "answer"])
    
    # 학습 설정
    training_args = TrainingArguments(
        output_dir=f"{model_save_dir}/{config['name']}_results",
        learning_rate=config['learning_rate'],
        per_device_train_batch_size=config['batch_size'],
        per_device_eval_batch_size=64,
        num_train_epochs=config['epochs'],
        warmup_steps=config['warmup_steps'],
        weight_decay=config['weight_decay'],
        max_grad_norm=config['max_grad_norm'],
        gradient_accumulation_steps=2,
        dataloader_pin_memory=True,
        dataloader_num_workers=4,
        group_by_length=True,
        eval_strategy="steps",
        eval_steps=100,
        save_strategy="steps",
        save_steps=100,
        save_total_limit=3,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        greater_is_better=True,
        dataloader_drop_last=True,
        remove_unused_columns=False,
        logging_steps=50,
        report_to=None,
        seed=42,
        data_seed=42,
    )
    
    # Listwise Loss 적용 트레이너 (수정됨)
    class ListwiseTrainer(Trainer):
        def __init__(self, label_to_perm_dict, *args, **kwargs):
            super().__init__(*args, **kwargs)
            self.listwise_loss = ListwiseRankingLoss(label_to_perm_dict, temperature=1.0)  # 🔥 매핑 전달
        
        def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
            labels = inputs.get("labels")
            outputs = model(**inputs)
            logits = outputs.get('logits')
            loss = self.listwise_loss(logits, labels)
            return (loss, outputs) if return_outputs else loss
    
    # 트레이너 생성 (수정됨)
    trainer = ListwiseTrainer(
        label_to_perm_dict=label_to_perm_dict,  # 🔥 매핑 전달
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_valid,
        tokenizer=tokenizer,
        data_collator=DataCollatorWithPadding(
            tokenizer=tokenizer,
            padding=True,
            max_length=512,
            pad_to_multiple_of=8
        ),
        compute_metrics=lambda eval_pred: {
            "accuracy": accuracy_score(
                eval_pred.label_ids, 
                np.argmax(eval_pred.predictions, axis=1)
            )
        },
        callbacks=[EarlyStoppingCallback(
            early_stopping_patience=config['early_stopping_patience']
        )]
    )
    
    print(f"🎓 {config['name']} 설정:")
    print(f"   Learning Rate: {config['learning_rate']}")
    print(f"   Batch Size: {config['batch_size']}")
    print(f"   Epochs: {config['epochs']}")
    print(f"   Loss Function: ListMLE (Listwise Ranking)")
    
    # 학습 실행
    trainer.train()
    
    # 평가
    eval_results = trainer.evaluate()
    print(f"✅ {config['name']} 최종 성능:")
    print(f"   Accuracy: {eval_results['eval_accuracy']:.4f}")
    
    # 모델 저장
    model_save_path = f"{model_save_dir}/{config['name']}_final"
    trainer.save_model(model_save_path)
    tokenizer.save_pretrained(model_save_path)
    
    # 설정도 저장
    import pickle
    config_save_path = f"{model_save_path}/config.pkl"
    with open(config_save_path, 'wb') as f:
        pickle.dump(config, f)
    
    print(f"💾 {config['name']} 모델 저장 완료: {model_save_path}")
    
    return {
        'model': model,
        'trainer': trainer,
        'config': config,
        'save_path': model_save_path,
        'final_accuracy': eval_results['eval_accuracy'],
        'tokenizer': tokenizer
    }

# 모델 로드 함수
def load_trained_model(model_path, device):
    """저장된 모델 로드"""
    import pickle
    
    # 설정 로드
    config_path = f"{model_path}/config.pkl"
    with open(config_path, 'rb') as f:
        config = pickle.load(f)
    
    # 토크나이저 로드
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    # 모델 로드
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    model.to(device)
    model.eval()
    
    return {
        'model': model,
        'config': config,
        'tokenizer': tokenizer,
        'save_path': model_path
    }

print("✅ 공통 함수 및 클래스 정의 완료")


✅ 공통 함수 및 클래스 정의 완료


In [None]:
print("데이터 준비 중...")

# 순열 매핑 생성 (기존 코드와 동일)
def create_label_mappings():
    all_permutations = list(permutations([0, 1, 2, 3]))
    perm_to_label = {perm: idx for idx, perm in enumerate(all_permutations)}
    label_to_perm = {idx: perm for idx, perm in enumerate(all_permutations)}
    return perm_to_label, label_to_perm

perm_to_label, label_to_perm = create_label_mappings()

# 데이터 증강 함수 (기존과 동일)
def augment_roberta_data_advanced(train_df, perm_to_label, multiplier=4):
    def prepare_roberta_data(train_df, perm_to_label):
        processed_data = []
        for _, row in train_df.iterrows():
            sentences = [row[f"sentence_{i}"] for i in range(4)]
            answer_tuple = tuple([row[f"answer_{i}"] for i in range(4)])
            text = " [SEP] ".join(sentences)
            label = perm_to_label[answer_tuple]
            
            processed_data.append({
                "text": text,
                "label": label,
                "original_sentences": sentences,
                "answer": answer_tuple
            })
        return processed_data
    
    augmented_data = []
    original_data = prepare_roberta_data(train_df, perm_to_label)
    augmented_data.extend(original_data)
    
    for aug_round in range(multiplier - 1):
        for _, row in train_df.iterrows():
            sentences = [row[f"sentence_{i}"] for i in range(4)]
            original_answer = [row[f"answer_{i}"] for i in range(4)]
            
            if aug_round == 0:
                indices = list(range(4))
                np.random.shuffle(indices)
            elif aug_round == 1:
                shift = np.random.randint(1, 4)
                indices = [(i + shift) % 4 for i in range(4)]
            else:
                indices = list(range(4))
                i, j = np.random.choice(4, 2, replace=False)
                indices[i], indices[j] = indices[j], indices[i]
            
            shuffled_sentences = [sentences[i] for i in indices]
            new_answer = tuple([indices.index(original_answer[i]) for i in range(4)])
            
            text = " [SEP] ".join(shuffled_sentences)
            label = perm_to_label[new_answer]
            
            augmented_data.append({
                "text": text,
                "label": label,
                "original_sentences": shuffled_sentences,
                "answer": new_answer
            })
    
    return augmented_data

# 데이터 로드 및 증강
train_df = pd.read_csv('./train.csv')
print(f"원본 학습 데이터: {len(train_df)}개")

augmented_data = augment_roberta_data_advanced(train_df, perm_to_label, multiplier=4)
print(f"증강 후 데이터: {len(augmented_data)}개")

# 학습/검증 데이터 분할
train_data, valid_data = train_test_split(
    augmented_data, 
    test_size=0.2, 
    random_state=42,
    stratify=[item["label"] for item in augmented_data]
)

# 데이터셋 생성
train_dataset = Dataset.from_pandas(pd.DataFrame(train_data))
valid_dataset = Dataset.from_pandas(pd.DataFrame(valid_data))

print(f"학습 데이터: {len(train_data)}개")
print(f"검증 데이터: {len(valid_data)}개")
print("✅ 데이터 준비 완료")


데이터 준비 중...
원본 학습 데이터: 7351개
증강 후 데이터: 29404개
학습 데이터: 23523개
검증 데이터: 5881개
✅ 데이터 준비 완료


In [None]:
# GPU 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"디바이스: {device}")

# ELECTRA Small 설정
electra_config = {
    'name': 'electra_small_listwise',
    'model_name': 'monologg/koelectra-small-v3-discriminator',
    'learning_rate': 3e-5,
    'epochs': 60,
    'batch_size': 64,
    'warmup_steps': 300,
    'weight_decay': 0.01,
    'max_grad_norm': 1.0,
    'early_stopping_patience': 3
}

# ELECTRA 모델 학습 (이 셀만 실행해서 ELECTRA 모델만 학습 가능)
try:
    electra_model_info = train_single_model(
        electra_config, 
        train_dataset, 
        valid_dataset, 
        device, 
        model_save_dir="./saved_models"
    )
    print("🎉 ELECTRA 모델 학습 완료!")
except Exception as e:
    print(f"❌ ELECTRA 모델 학습 실패: {e}")
    electra_model_info = None


디바이스: cuda

🔥 electra_small_listwise 모델 학습 시작...


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 23523/23523 [00:03<00:00, 7760.72 examples/s]
Map: 100%|██████████| 5881/5881 [00:00<00:00, 8425.42 examples/s]


🎓 electra_small_listwise 설정:
   Learning Rate: 3e-05
   Batch Size: 64
   Epochs: 60
   Loss Function: ListMLE (Listwise Ranking)


Step,Training Loss,Validation Loss,Accuracy
100,3.1781,3.178211,0.04035
200,3.1459,3.1746,0.043784
300,3.0359,2.926933,0.10989
400,2.6676,2.617111,0.119334
500,2.5053,2.444952,0.124313
600,2.3618,2.35998,0.125
700,2.3366,2.322629,0.13307
800,2.2846,2.275527,0.143544
900,2.2486,2.240313,0.19351
1000,2.1845,2.159059,0.201065


✅ electra_small_listwise 최종 성능:
   Accuracy: 0.9674
💾 electra_small_listwise 모델 저장 완료: ./saved_models/electra_small_listwise_final
🎉 ELECTRA 모델 학습 완료!


In [4]:
# BERT Base 설정
bert_config = {
    'name': 'bert_base_listwise',
    'model_name': 'klue/bert-base',
    'learning_rate': 2e-5,
    'epochs': 40,
    'batch_size': 32,
    'warmup_steps': 400,
    'weight_decay': 0.01,
    'max_grad_norm': 1.0,
    'early_stopping_patience': 3
}

# BERT 모델 학습 (이 셀만 실행해서 BERT 모델만 학습 가능)
try:
    bert_model_info = train_single_model(
        bert_config, 
        train_dataset, 
        valid_dataset, 
        device, 
        model_save_dir="./saved_models"
    )
    print("🎉 BERT 모델 학습 완료!")
except Exception as e:
    print(f"❌ BERT 모델 학습 실패: {e}")
    bert_model_info = None


🔥 bert_base_listwise 모델 학습 시작...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at klue/bert-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 23523/23523 [00:02<00:00, 8105.71 examples/s]
Map: 100%|██████████| 5881/5881 [00:00<00:00, 8597.86 examples/s]


🎓 bert_base_listwise 설정:
   Learning Rate: 2e-05
   Batch Size: 32
   Epochs: 40
   Loss Function: ListMLE (Listwise Ranking)


Step,Training Loss,Validation Loss,Accuracy
100,3.2011,3.187009,0.047734
200,3.1033,2.961313,0.115385
300,2.2626,1.887782,0.39011
400,1.35,1.168133,0.505495
500,1.0027,0.911446,0.624657
600,0.8354,0.742969,0.71772
700,0.6612,0.581704,0.778674
800,0.4837,0.48302,0.81851
900,0.4408,0.427066,0.843578
1000,0.3551,0.318787,0.894574


✅ bert_base_listwise 최종 성능:
   Accuracy: 0.9845
💾 bert_base_listwise 모델 저장 완료: ./saved_models/bert_base_listwise_final
🎉 BERT 모델 학습 완료!


In [5]:
# RoBERTa Small 설정
roberta_config = {
    'name': 'roberta_small_listwise',
    'model_name': 'klue/roberta-small',
    'learning_rate': 2e-5,
    'epochs': 35,
    'batch_size': 48,
    'warmup_steps': 300,
    'weight_decay': 0.01,
    'max_grad_norm': 1.0,
    'early_stopping_patience': 3
}

# RoBERTa 모델 학습 (이 셀만 실행해서 RoBERTa 모델만 학습 가능)
try:
    roberta_model_info = train_single_model(
        roberta_config, 
        train_dataset, 
        valid_dataset, 
        device, 
        model_save_dir="./saved_models"
    )
    print("🎉 RoBERTa 모델 학습 완료!")
except Exception as e:
    print(f"❌ RoBERTa 모델 학습 실패: {e}")
    roberta_model_info = None


🔥 roberta_small_listwise 모델 학습 시작...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-small and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 23523/23523 [00:02<00:00, 8323.69 examples/s]
Map: 100%|██████████| 5881/5881 [00:00<00:00, 9117.77 examples/s]


🎓 roberta_small_listwise 설정:
   Learning Rate: 2e-05
   Batch Size: 48
   Epochs: 35
   Loss Function: ListMLE (Listwise Ranking)


Step,Training Loss,Validation Loss,Accuracy
100,3.1796,3.179907,0.043098
200,3.1651,3.035029,0.082933
300,2.369,2.030219,0.333963
400,1.592,1.362449,0.495879
500,1.1892,1.067274,0.558551
600,0.9557,0.891644,0.661058
700,0.8378,0.764784,0.726133
800,0.6747,0.651618,0.770604
900,0.5866,0.593504,0.781765
1000,0.5108,0.486221,0.830357


✅ roberta_small_listwise 최종 성능:
   Accuracy: 0.9681
💾 roberta_small_listwise 모델 저장 완료: ./saved_models/roberta_small_listwise_final
🎉 RoBERTa 모델 학습 완료!


In [6]:
print("저장된 모델들 로드 중...")

# 모델 경로들
model_paths = [
    "./saved_models/electra_small_listwise_final",
    "./saved_models/bert_base_listwise_final", 
    "./saved_models/roberta_small_listwise_final"
]

# 모델들 로드
loaded_models = []
for path in model_paths:
    try:
        model_info = load_trained_model(path, device)
        loaded_models.append(model_info)
        print(f"✅ 모델 로드 성공: {model_info['config']['name']}")
    except Exception as e:
        print(f"❌ 모델 로드 실패 ({path}): {e}")

print(f"총 {len(loaded_models)}개 모델 로드 완료")


저장된 모델들 로드 중...
✅ 모델 로드 성공: electra_small_listwise
✅ 모델 로드 성공: bert_base_listwise
✅ 모델 로드 성공: roberta_small_listwise
총 3개 모델 로드 완료


In [7]:
def generate_meta_features(models, data, device, n_folds=5):
    """메타 특성 생성"""
    print("🧠 메타 특성 생성 중...")
    
    df = pd.DataFrame(data)
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)
    meta_features = np.zeros((len(data), len(models) * 24))
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(df)):
        print(f"  Fold {fold + 1}/{n_folds} 처리 중...")
        
        val_data = df.iloc[val_idx]
        
        for model_idx, model_info in enumerate(models):
            model = model_info['model']
            tokenizer = model_info['tokenizer']
            model.eval()
            
            fold_predictions = []
            
            for _, row in val_data.iterrows():
                text = row['text']
                
                inputs = tokenizer(
                    text,
                    return_tensors="pt",
                    truncation=True,
                    padding=True,
                    max_length=512
                ).to(device)
                
                with torch.no_grad():
                    outputs = model(**inputs)
                    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
                    fold_predictions.append(probabilities.cpu().numpy()[0])
            
            start_col = model_idx * 24
            end_col = (model_idx + 1) * 24
            meta_features[val_idx, start_col:end_col] = np.array(fold_predictions)
    
    print("✅ 메타 특성 생성 완료")
    return meta_features

# 메타 특성 생성 (모델들이 로드된 후에만 실행)
if len(loaded_models) > 0:
    meta_features = generate_meta_features(loaded_models, train_data, device)
    meta_labels = [item['label'] for item in train_data]
    print(f"메타 특성 형태: {meta_features.shape}")
else:
    print("❌ 로드된 모델이 없어 메타 특성을 생성할 수 없습니다.")

🧠 메타 특성 생성 중...
  Fold 1/5 처리 중...
  Fold 2/5 처리 중...
  Fold 3/5 처리 중...
  Fold 4/5 처리 중...
  Fold 5/5 처리 중...
✅ 메타 특성 생성 완료
메타 특성 형태: (23523, 72)


In [8]:
def train_meta_model(meta_features, meta_labels):
    """메타 모델 학습 (매번 새로 학습)"""
    print("🎯 메타 모델 학습 중...")
    
    meta_model = lgb.LGBMClassifier(
        objective='multiclass',
        num_class=24,
        boosting_type='gbdt',
        num_leaves=31,
        learning_rate=0.05,
        feature_fraction=0.9,
        bagging_fraction=0.8,
        bagging_freq=5,
        verbose=0,
        random_state=42
    )
    
    meta_model.fit(meta_features, meta_labels)
    print("✅ 메타 모델 학습 완료")
    
    return meta_model

# 메타 모델 학습 (피클 저장 없이)
if len(loaded_models) > 0:
    meta_model = train_meta_model(meta_features, meta_labels)
    print("💡 메타 모델이 메모리에 준비됨 (저장 없음)")


🎯 메타 모델 학습 중...
✅ 메타 모델 학습 완료
💡 메타 모델이 메모리에 준비됨 (저장 없음)


In [9]:
def predict_with_ensemble(models, meta_model, test_texts, device):
    """앙상블 예측"""
    print("🔮 앙상블 예측 중...")
    
    # 각 모델별 예측
    test_meta_features = []
    
    for model_info in models:
        model = model_info['model']
        tokenizer = model_info['tokenizer']
        model.eval()
        
        model_predictions = []
        model_name = model_info['config']['name']
        
        for text in tqdm(test_texts, desc=f"{model_name} 예측"):
            inputs = tokenizer(
                text,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to(device)
            
            with torch.no_grad():
                outputs = model(**inputs)
                probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
                model_predictions.append(probabilities.cpu().numpy()[0])
        
        test_meta_features.append(np.array(model_predictions))
    
    # 메타 특성 결합
    final_meta_features = np.hstack(test_meta_features)
    
    # 메타 모델로 최종 예측
    final_predictions = meta_model.predict(final_meta_features)
    final_probabilities = meta_model.predict_proba(final_meta_features)
    
    return final_predictions, final_probabilities

# 테스트 데이터 로드 및 예측
if len(loaded_models) > 0 and 'meta_model' in locals():
    print("테스트 데이터 예측 시작...")
    
    test_df = pd.read_csv("./test.csv")
    print(f"테스트 데이터: {len(test_df)}개")
    
    # 테스트 데이터 전처리
    test_texts = []
    for _, row in test_df.iterrows():
        sentences = [row[f"sentence_{i}"] for i in range(4)]
        text = " [SEP] ".join(sentences)
        test_texts.append(text)
    
    # 예측 실행
    final_predictions, final_probabilities = predict_with_ensemble(
        loaded_models, meta_model, test_texts, device
    )
    
    # 결과를 순열로 변환
    predicted_orders = []
    confidences = []
    
    for i, pred_label in enumerate(final_predictions):
        predicted_order = list(label_to_perm[pred_label])
        confidence = np.max(final_probabilities[i])
        
        predicted_orders.append(predicted_order)
        confidences.append(confidence)
    
    avg_confidence = np.mean(confidences)
    print(f"평균 예측 신뢰도: {avg_confidence:.4f}")
    
    # 제출 파일 생성
    sample_submission = pd.read_csv("./sample_submission.csv")
    for i in range(4):
        sample_submission[f"answer_{i}"] = [pred[i] for pred in predicted_orders]
    
    submission_filename = "meta_ensemble_submission.csv"
    sample_submission.to_csv(submission_filename, index=False)
    
    print(f"✅ 제출 파일 저장 완료: {submission_filename}")
    print(f"🏆 메타 모델 앙상블 완료! (모델 수: {len(loaded_models)}개)")

else:
    if len(loaded_models) == 0:
        print("❌ 예측할 모델이 없습니다. 먼저 모델들을 학습해주세요.")
    else:
        print("❌ 메타 모델이 없습니다. 먼저 메타 모델을 학습해주세요.")

테스트 데이터 예측 시작...
테스트 데이터: 1780개
🔮 앙상블 예측 중...


electra_small_listwise 예측: 100%|██████████| 1780/1780 [00:18<00:00, 96.29it/s]
bert_base_listwise 예측: 100%|██████████| 1780/1780 [00:15<00:00, 112.85it/s]
roberta_small_listwise 예측: 100%|██████████| 1780/1780 [00:09<00:00, 184.41it/s]


평균 예측 신뢰도: 0.9697
✅ 제출 파일 저장 완료: meta_ensemble_submission.csv
🏆 메타 모델 앙상블 완료! (모델 수: 3개)
