# 🎯 베이스라인 구현 - KoBART 기반 대화 요약
> PRD 계획에 따른 베이스라인 모델 구현

**목표 성능**: ROUGE-F1 47+

In [None]:
# 환경 설정
import sys
import os
from pathlib import Path

# 프로젝트 루트 경로 추가
notebook_dir = Path.cwd()
# notebooks/team/CHH -> notebooks -> team -> natural-language-processing-competition
project_root = notebook_dir.parent.parent.parent  # 3번만 parent 사용!

# 다른 프로젝트 경로 제거하고 현재 프로젝트 경로만 추가
sys.path = [p for p in sys.path if 'computer-vision-competition' not in p]
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print(f"Project Root: {project_root}")
print(f"Current Dir: {notebook_dir}")

# 필요한 라이브러리 임포트
import yaml
import pandas as pd
import numpy as np
import torch
from datetime import datetime
from transformers import AutoTokenizer, BartForConditionalGeneration
from rouge import Rouge
import wandb

# 커스텀 모듈 임포트
from src.logging.notebook_logger import NotebookLogger
from src.utils.gpu_optimization.team_gpu_check import check_gpu_tier

print("Libraries imported successfully!")

In [None]:
# 설정 로드
config_path = notebook_dir / 'configs' / 'config_baseline.yaml'
with open(config_path, 'r', encoding='utf-8') as f:
    config = yaml.safe_load(f)

print(f"Model: {config['model']['name']}")
print(f"Batch Size: {config['training']['batch_size']}")

In [None]:
# 로거 초기화
# config의 로그 경로 사용
def get_path(path_str):
    """config의 상대 경로를 절대 경로로 변환"""
    path = Path(path_str)
    if not path.is_absolute():
        path = notebook_dir / path
    return path

log_dir = get_path(config['paths']['log_dir'])
log_dir.mkdir(parents=True, exist_ok=True)

# 타임스탬프 생성
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

# 로거 초기화
log_file = log_dir / f'baseline_{timestamp}.log'
logger = NotebookLogger(
    log_path=str(log_file),
    print_also=True
)

logger.write('=== Baseline Experiment Started ===')

In [None]:
# GPU 체크
if torch.cuda.is_available():
    gpu_tier = check_gpu_tier()
    logger.write(f"GPU Tier: {gpu_tier}")
    logger.write(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# 데이터 경로 설정 및 로드
# config 파일의 경로 사용
def get_data_path(path_str):
    """config의 상대 경로를 절대 경로로 변환"""
    path = Path(path_str)
    if not path.is_absolute():
        path = notebook_dir / path
    return path

# config에서 데이터 경로 가져오기
train_path = get_data_path(config['paths']['train_file'])
dev_path = get_data_path(config['paths']['dev_file'])
test_path = get_data_path(config['paths']['test_file'])

logger.write(f"Loading data from config paths:")
logger.write(f"  - Train: {train_path}")
logger.write(f"  - Dev: {dev_path}")
logger.write(f"  - Test: {test_path}")

# 데이터 로드
train_df = pd.read_csv(train_path)
dev_df = pd.read_csv(dev_path)
test_df = pd.read_csv(test_path)

logger.write(f"Data loaded successfully!")
logger.write(f"Train samples: {len(train_df)}")
logger.write(f"Dev samples: {len(dev_df)}")
logger.write(f"Test samples: {len(test_df)}")

# 데이터 샘플 출력
print("\nSample data:")
print(train_df.head(2))

In [None]:
# 데이터 품질 검증 시스템 (PRD 16_데이터_품질_검증_시스템.md)
logger.write("\n=== Data Quality Validation ===")

class DataQualityValidator:
    """데이터 품질 검증 클래스"""
    def __init__(self):
        self.quality_report = {}
    
    def validate_structure(self, df):
        """구조적 검증"""
        checks = {
            'null_values': df.isnull().sum().sum(),
            'duplicates': df.duplicated().sum(),
            'empty_dialogues': (df['dialogue'].str.len() == 0).sum(),
            'empty_summaries': (df['summary'].str.len() == 0).sum() if 'summary' in df.columns else 0
        }
        self.quality_report['structure'] = checks
        return checks
    
    def validate_content(self, df):
        """내용 검증"""
        dialogue_lengths = df['dialogue'].str.len()
        summary_lengths = df['summary'].str.len() if 'summary' in df.columns else pd.Series([0])
        
        checks = {
            'avg_dialogue_length': dialogue_lengths.mean(),
            'min_dialogue_length': dialogue_lengths.min(),
            'max_dialogue_length': dialogue_lengths.max(),
            'avg_summary_length': summary_lengths.mean(),
            'summary_ratio': (summary_lengths / dialogue_lengths).mean() if 'summary' in df.columns else 0
        }
        self.quality_report['content'] = checks
        return checks
    
    def validate_consistency(self, df):
        """일관성 검증"""
        checks = {
            'person_tags_consistent': all(df['dialogue'].str.contains('#Person')),
            'encoding_issues': df['dialogue'].str.contains('\\?\\?\\?').sum(),
            'special_chars': df['dialogue'].str.contains('[^\w\s#:.,!?가-힣]').sum()
        }
        self.quality_report['consistency'] = checks
        return checks
    
    def generate_report(self):
        """품질 보고서 생성"""
        return self.quality_report

# 데이터 품질 검증 실행
validator = DataQualityValidator()

# 구조 검증
structure_checks = validator.validate_structure(train_df)
logger.write(f"Structure validation:")
for key, value in structure_checks.items():
    logger.write(f"  - {key}: {value}")

# 내용 검증
content_checks = validator.validate_content(train_df)
logger.write(f"Content validation:")
logger.write(f"  - Avg dialogue length: {content_checks['avg_dialogue_length']:.1f}")
logger.write(f"  - Summary ratio: {content_checks.get('summary_ratio', 0):.2%}")

# 일관성 검증
consistency_checks = validator.validate_consistency(train_df)
logger.write(f"Consistency validation:")
logger.write(f"  - Person tags consistent: {consistency_checks['person_tags_consistent']}")
logger.write(f"  - Encoding issues: {consistency_checks['encoding_issues']}")

In [None]:
# Solar API 교차 검증 시스템 (PRD 09_Solar_API_최적화.md, 10_교차_검증_시스템.md)
import requests
import json
from typing import List, Dict, Optional

class SolarAPIOptimizer:
    """Solar API 최적화 클래스"""
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.base_url = "https://api.upstage.ai/v1/solar"
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
    
    def optimize_prompt(self, dialogue: str) -> str:
        """프롬프트 최적화 - 토큰 절약"""
        # 불필요한 공백 제거
        dialogue = ' '.join(dialogue.split())
        
        # 핵심 정보만 추출하는 프롬프트
        optimized_prompt = f"""다음 대화를 한국어로 간결하게 요약하세요. 핵심 내용만 포함하세요:
{dialogue[:1000]}  # 토큰 제한
요약:"""
        
        return optimized_prompt
    
    def generate_summary(self, dialogue: str, max_tokens: int = 150) -> Optional[str]:
        """Solar API로 요약 생성"""
        try:
            prompt = self.optimize_prompt(dialogue)
            
            payload = {
                "model": "solar-1-mini-chat",
                "messages": [
                    {"role": "system", "content": "당신은 대화 요약 전문가입니다."},
                    {"role": "user", "content": prompt}
                ],
                "max_tokens": max_tokens,
                "temperature": 0.3,
                "top_p": 0.9
            }
            
            response = requests.post(
                f"{self.base_url}/chat/completions",
                headers=self.headers,
                json=payload,
                timeout=30
            )
            
            if response.status_code == 200:
                result = response.json()
                return result['choices'][0]['message']['content']
            else:
                logger.write(f"Solar API error: {response.status_code}")
                return None
                
        except Exception as e:
            logger.write(f"Solar API exception: {e}")
            return None

class DualSummarizationSystem:
    """모델과 API 듀얼 요약 시스템"""
    def __init__(self, model, tokenizer, solar_api: SolarAPIOptimizer):
        self.model = model
        self.tokenizer = tokenizer
        self.solar_api = solar_api
        self.device = next(model.parameters()).device
    
    def compare_summaries(self, dialogue: str, reference: str = None) -> Dict:
        """모델과 API 요약 비교"""
        # 모델 예측
        inputs = self.tokenizer(
            dialogue,
            max_length=512,
            truncation=True,
            return_tensors='pt'
        ).to(self.device)
        
        with torch.no_grad():
            model_output = self.model.generate(
                **inputs,
                max_length=128,
                num_beams=4,
                early_stopping=True
            )
        
        model_summary = self.tokenizer.decode(model_output[0], skip_special_tokens=True)
        
        # API 예측
        api_summary = self.solar_api.generate_summary(dialogue)
        
        # ROUGE 점수 계산
        result = {
            'model_summary': model_summary,
            'api_summary': api_summary
        }
        
        if reference:
            rouge = Rouge()
            try:
                model_scores = rouge.get_scores(model_summary, reference)[0]
                api_scores = rouge.get_scores(api_summary, reference)[0] if api_summary else None
                
                result['model_rouge'] = model_scores['rouge-l']['f']
                result['api_rouge'] = api_scores['rouge-l']['f'] if api_scores else 0
                result['best'] = 'model' if result['model_rouge'] > result.get('api_rouge', 0) else 'api'
            except:
                pass
        
        return result

# Solar API 초기화 (config에서 API 키 가져오기)
if 'solar_api' in config and 'api_key' in config['solar_api']:
    solar_optimizer = SolarAPIOptimizer(config['solar_api']['api_key'])
    dual_system = DualSummarizationSystem(model, tokenizer, solar_optimizer)
    logger.write("Solar API dual system initialized")
else:
    logger.write("Solar API key not found in config")
    solar_optimizer = None
    dual_system = None

In [None]:
# WandB 초기화
wandb.init(
    project=config['wandb']['project'],
    entity=config['wandb']['entity'],
    name=config['wandb']['name'],
    config=config
)

In [None]:
# 모델 및 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained(config['model']['name'])
model = BartForConditionalGeneration.from_pretrained(config['model']['name'])

logger.write(f"Model loaded: {config['model']['name']}")

## 데이터 전처리 및 데이터셋 클래스 정의

In [None]:
# 데이터 전처리 함수
def preprocess_dialogue(text):
    """대화 텍스트 전처리"""
    # 노이즈 제거
    text = text.replace('\\n', '\n')
    text = text.replace('<br>', '\n')
    
    # 특수문자 정규화
    text = text.strip()
    
    # #Person 태그 최적화 (더 명확하게)
    import re
    text = re.sub(r'#Person(\d+)#:', r'화자\1:', text)
    
    return text

def preprocess_summary(text):
    """요약 텍스트 전처리"""
    if pd.isna(text):
        return ""
    text = text.strip()
    return text

# 데이터 전처리 적용
train_df['dialogue_preprocessed'] = train_df['dialogue'].apply(preprocess_dialogue)
train_df['summary_preprocessed'] = train_df['summary'].apply(preprocess_summary)

dev_df['dialogue_preprocessed'] = dev_df['dialogue'].apply(preprocess_dialogue)
dev_df['summary_preprocessed'] = dev_df['summary'].apply(preprocess_summary)

test_df['dialogue_preprocessed'] = test_df['dialogue'].apply(preprocess_dialogue)

print(f"전처리 완료!")
print(f"Sample preprocessed dialogue (first 200 chars):")
print(train_df['dialogue_preprocessed'].iloc[0][:200])
logger.write("Data preprocessing completed")

In [None]:
# PyTorch Dataset 클래스 정의
from torch.utils.data import Dataset, DataLoader

class DialogueSummaryDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_input_len=512, max_target_len=128, is_test=False):
        """
        대화 요약 데이터셋
        
        Args:
            dataframe: 데이터프레임
            tokenizer: 토크나이저
            max_input_len: 최대 입력 길이
            max_target_len: 최대 타겟 길이
            is_test: 테스트 모드 여부
        """
        self.df = dataframe
        self.tokenizer = tokenizer
        self.max_input_len = max_input_len
        self.max_target_len = max_target_len
        self.is_test = is_test
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # 입력 텍스트
        dialogue = row['dialogue_preprocessed']
        
        # 입력 토큰화
        inputs = self.tokenizer(
            dialogue,
            max_length=self.max_input_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        # 테스트 모드가 아닌 경우 타겟도 처리
        if not self.is_test:
            summary = row['summary_preprocessed']
            
            # 타겟 토큰화
            targets = self.tokenizer(
                summary,
                max_length=self.max_target_len,
                padding='max_length',
                truncation=True,
                return_tensors='pt'
            )
            
            return {
                'input_ids': inputs['input_ids'].squeeze(),
                'attention_mask': inputs['attention_mask'].squeeze(),
                'labels': targets['input_ids'].squeeze()
            }
        else:
            return {
                'input_ids': inputs['input_ids'].squeeze(),
                'attention_mask': inputs['attention_mask'].squeeze(),
                'idx': idx
            }

# 데이터셋 생성
train_dataset = DialogueSummaryDataset(
    train_df, 
    tokenizer, 
    max_input_len=config['model']['max_input_length'],
    max_target_len=config['model']['max_target_length']
)

val_dataset = DialogueSummaryDataset(
    dev_df,
    tokenizer,
    max_input_len=config['model']['max_input_length'],
    max_target_len=config['model']['max_target_length']
)

test_dataset = DialogueSummaryDataset(
    test_df,
    tokenizer,
    max_input_len=config['model']['max_input_length'],
    max_target_len=config['model']['max_target_length'],
    is_test=True
)

logger.write(f"Dataset created - Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}")
print(f"Dataset shapes:")
print(f"  Train: {len(train_dataset)}")
print(f"  Val: {len(val_dataset)}")
print(f"  Test: {len(test_dataset)}")

In [None]:
# DataLoader 생성
train_loader = DataLoader(
    train_dataset,
    batch_size=config['training']['batch_size'],
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config['training']['batch_size'],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=config['training']['batch_size'],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print(f"DataLoader created:")
print(f"  Train batches: {len(train_loader)}")
print(f"  Val batches: {len(val_loader)}")
print(f"  Test batches: {len(test_loader)}")

## 학습 및 평가 함수 정의

In [None]:
import torch.nn as nn
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from tqdm.auto import tqdm
import gc

# 디바이스 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
logger.write(f"Using device: {device}")

# ROUGE 평가 함수
def compute_rouge_scores(predictions, references):
    """ROUGE 점수 계산"""
    rouge = Rouge()
    
    # 빈 문자열 처리
    predictions = [p if p else "empty" for p in predictions]
    references = [r if r else "empty" for r in references]
    
    try:
        scores = rouge.get_scores(predictions, references, avg=True)
        return {
            'rouge-1': scores['rouge-1']['f'],
            'rouge-2': scores['rouge-2']['f'],
            'rouge-l': scores['rouge-l']['f']
        }
    except Exception as e:
        logger.write(f"Error computing ROUGE: {e}")
        return {'rouge-1': 0, 'rouge-2': 0, 'rouge-l': 0}

# 학습 함수
def train_epoch(model, data_loader, optimizer, scheduler, device):
    """한 에폭 학습"""
    model.train()
    total_loss = 0
    
    progress_bar = tqdm(data_loader, desc='Training')
    
    for batch in progress_bar:
        # 데이터를 디바이스로 이동
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # 그래디언트 초기화
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        
        loss = outputs.loss
        total_loss += loss.item()
        
        # Backward pass
        loss.backward()
        
        # 그래디언트 클리핑
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        # 옵티마이저 스텝
        optimizer.step()
        scheduler.step()
        
        # 프로그레스 바 업데이트
        progress_bar.set_postfix({'loss': loss.item()})
        
        # WandB 로깅
        wandb.log({
            'train_loss': loss.item(),
            'learning_rate': scheduler.get_last_lr()[0]
        })
    
    avg_loss = total_loss / len(data_loader)
    return avg_loss

# 검증 함수
def evaluate(model, data_loader, tokenizer, device, num_samples=None):
    """모델 평가"""
    model.eval()
    total_loss = 0
    predictions = []
    references = []
    
    with torch.no_grad():
        progress_bar = tqdm(data_loader, desc='Evaluating')
        
        for i, batch in enumerate(progress_bar):
            if num_samples and i >= num_samples:
                break
                
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            # Loss 계산
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            total_loss += outputs.loss.item()
            
            # 예측 생성 - config 키 수정
            generated_ids = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_length=config['model']['max_target_length'],
                num_beams=config['evaluation']['num_beams'],  # config 키 수정
                early_stopping=True,
                no_repeat_ngram_size=config['evaluation']['no_repeat_ngram_size']
            )
            
            # 디코딩
            preds = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
            refs = tokenizer.batch_decode(labels, skip_special_tokens=True)
            
            predictions.extend(preds)
            references.extend(refs)
    
    # ROUGE 점수 계산
    rouge_scores = compute_rouge_scores(predictions, references)
    avg_loss = total_loss / len(data_loader)
    
    return avg_loss, rouge_scores, predictions[:5]  # 샘플 예측 반환

print("Training functions defined successfully!")

In [None]:
# 옵티마이저 및 스케줄러 설정
# config 값들을 안전하게 가져오기
num_epochs = config['training'].get('num_epochs', config['training'].get('epochs', 3))
learning_rate = config['training']['learning_rate']
# learning_rate가 문자열인 경우 float로 변환
if isinstance(learning_rate, str):
    learning_rate = float(learning_rate)
    print(f"Learning rate converted from string to float: {learning_rate}")

num_training_steps = num_epochs * len(train_loader)

optimizer = AdamW(
    model.parameters(),
    lr=learning_rate,  # 이미 float로 변환됨
    weight_decay=config['training']['weight_decay']
)

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(num_training_steps * config['training']['warmup_ratio']),
    num_training_steps=num_training_steps
)

logger.write(f"Optimizer and scheduler initialized")
logger.write(f"Learning rate: {learning_rate}")
logger.write(f"Total training steps: {num_training_steps}")
logger.write(f"Warmup steps: {int(num_training_steps * config['training']['warmup_ratio'])}")

# 학습 기록 저장
training_history = {
    'train_loss': [],
    'val_loss': [],
    'rouge_1': [],
    'rouge_2': [],
    'rouge_l': []
}

# Early Stopping 설정
best_rouge_l = 0
patience = config['training']['early_stopping_patience']
patience_counter = 0

# 모델 저장 경로 - config의 경로 사용
model_dir = get_path(config['paths']['output_dir'])
model_dir.mkdir(parents=True, exist_ok=True)
best_model_path = model_dir / 'best_model.pt'

logger.write("=" * 50)
logger.write("Starting training...")
logger.write("=" * 50)

# 학습 루프
for epoch in range(num_epochs):
    logger.write(f"\nEpoch {epoch + 1}/{num_epochs}")
    logger.write("-" * 30)
    
    # 학습
    train_loss = train_epoch(model, train_loader, optimizer, scheduler, device)
    logger.write(f"Average training loss: {train_loss:.4f}")
    training_history['train_loss'].append(train_loss)
    
    # 검증
    val_loss, rouge_scores, sample_preds = evaluate(model, val_loader, tokenizer, device)
    
    logger.write(f"Validation loss: {val_loss:.4f}")
    logger.write(f"ROUGE-1 F1: {rouge_scores['rouge-1']:.4f}")
    logger.write(f"ROUGE-2 F1: {rouge_scores['rouge-2']:.4f}")
    logger.write(f"ROUGE-L F1: {rouge_scores['rouge-l']:.4f}")
    
    # 학습 기록 저장
    training_history['val_loss'].append(val_loss)
    training_history['rouge_1'].append(rouge_scores['rouge-1'])
    training_history['rouge_2'].append(rouge_scores['rouge-2'])
    training_history['rouge_l'].append(rouge_scores['rouge-l'])
    
    # WandB 로깅
    wandb.log({
        'epoch': epoch + 1,
        'train_loss_epoch': train_loss,
        'val_loss': val_loss,
        'rouge_1': rouge_scores['rouge-1'],
        'rouge_2': rouge_scores['rouge-2'],
        'rouge_l': rouge_scores['rouge-l']
    })
    
    # 샘플 예측 출력
    logger.write("\nSample predictions:")
    for i, pred in enumerate(sample_preds[:2]):
        logger.write(f"  Sample {i+1}: {pred[:100]}...")
    
    # Best model 저장
    if rouge_scores['rouge-l'] > best_rouge_l:
        best_rouge_l = rouge_scores['rouge-l']
        patience_counter = 0
        
        # 모델 저장
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'rouge_scores': rouge_scores,
            'config': config
        }, best_model_path)
        
        logger.write(f"✓ New best model saved! (ROUGE-L: {best_rouge_l:.4f})")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            logger.write(f"Early stopping triggered after {epoch + 1} epochs")
            break
    
    # 메모리 정리
    torch.cuda.empty_cache()
    gc.collect()

logger.write("\n" + "=" * 50)
logger.write(f"Training completed!")
logger.write(f"Best ROUGE-L: {best_rouge_l:.4f}")
logger.write("=" * 50)

## 테스트 데이터 예측 및 제출 파일 생성

In [None]:
# 최적 모델 로드
checkpoint = torch.load(best_model_path)
model.load_state_dict(checkpoint['model_state_dict'])
logger.write(f"Best model loaded from epoch {checkpoint['epoch'] + 1}")
logger.write(f"Best ROUGE scores: {checkpoint['rouge_scores']}")

# 테스트 데이터 예측
def generate_predictions(model, data_loader, tokenizer, device):
    """테스트 데이터에 대한 예측 생성"""
    model.eval()
    all_predictions = []
    all_indices = []
    
    with torch.no_grad():
        progress_bar = tqdm(data_loader, desc='Generating predictions')
        
        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            indices = batch['idx']
            
            # 예측 생성
            generated_ids = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_length=config['inference']['max_length'],
                num_beams=config['inference']['num_beams'],
                early_stopping=config['inference']['early_stopping'],
                no_repeat_ngram_size=config['inference']['no_repeat_ngram_size'],
                length_penalty=config['inference']['length_penalty'],
                temperature=config['inference']['temperature']
            )
            
            # 디코딩
            predictions = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
            
            all_predictions.extend(predictions)
            all_indices.extend(indices.tolist())
    
    # 인덱스 순서대로 정렬
    sorted_predictions = [pred for _, pred in sorted(zip(all_indices, all_predictions))]
    
    return sorted_predictions

# 예측 수행
logger.write("\nGenerating predictions for test set...")
test_predictions = generate_predictions(model, test_loader, tokenizer, device)
logger.write(f"Generated {len(test_predictions)} predictions")

# 샘플 출력
print("\nSample test predictions:")
for i in range(min(3, len(test_predictions))):
    print(f"Test {i+1}: {test_predictions[i][:150]}...")
    print("-" * 50)

In [None]:
# 제출 파일 생성
submission_df = pd.DataFrame({
    'fname': test_df['fname'],
    'summary': test_predictions
})

# 제출 파일 저장 - config의 경로 사용
submission_dir = get_path(config['paths']['submission_dir'])
submission_dir.mkdir(parents=True, exist_ok=True)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
submission_filename = f'baseline_submission_{timestamp}.csv'
submission_path = submission_dir / submission_filename

# index=True로 설정하여 인덱스를 포함시킴
submission_df.to_csv(submission_path, index=True, encoding='utf-8')  # index=False -> index=True로 변경
logger.write(f"\nSubmission file saved: {submission_path}")

# 제출 파일 확인
print(f"\nSubmission file created: {submission_filename}")
print(f"Shape: {submission_df.shape}")
print("\nFirst 3 submissions:")
print(submission_df.head(3))

# 최종 요약 통계
print("\n" + "=" * 50)
print("BASELINE EXPERIMENT SUMMARY")
print("=" * 50)
print(f"Model: {config['model']['name']}")
print(f"Best ROUGE-L: {best_rouge_l:.4f}")
print(f"Training epochs: {len(training_history['train_loss'])}")
print(f"Final train loss: {training_history['train_loss'][-1]:.4f}")
print(f"Final val loss: {training_history['val_loss'][-1]:.4f}")
print(f"Submission file: {submission_filename}")
print("=" * 50)

# WandB 실험 종료
wandb.finish()

logger.write("\n✅ Baseline experiment completed successfully!")
logger.write(f"Log file: {log_dir / 'baseline.log'}")