## Library

In [None]:
import os
import sys
import hydra
import torch
import warnings
from pathlib import Path
from omegaconf import OmegaConf

warnings.filterwarnings('ignore')
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

# src import
from utils import info as log_info
from data import Preprocess, prepare_train_dataset, prepare_test_dataset
from model import load_tokenizer_and_model_for_train, load_tokenizer_and_model_for_test
from train import load_trainer_for_train
from inference import inference
from check_gpu import get_device

## Utils Load

In [None]:
# Hydra 초기화 (노트북 환경에서 필요)
hydra.core.global_hydra.GlobalHydra.instance().clear()
hydra.initialize(config_path="../conf", job_name="dialogue_summarization_notebook")
cfg = hydra.compose(config_name="config") # 기본 설정 
device = get_device() # GPU

# 로드된 설정 확인
log_info(OmegaConf.to_yaml(cfg))
log_info(f"PyTorch version: {torch.__version__}")

## Model

In [None]:
# 토크나이저 및 모델 로드 (학습용)
log_info("Loading model and tokenizer for training...")
generate_model_train, tokenizer_train = load_tokenizer_and_model_for_train(cfg)

# 데이터 준비 (학습용)
log_info("Preparing training data...")
preprocessor_train = Preprocess(cfg.tokenizer.bos_token, cfg.tokenizer.eos_token)
train_inputs_dataset, val_inputs_dataset = prepare_train_dataset(cfg, preprocessor_train, tokenizer_train)

# 트레이너 로드 및 학습
log_info("Starting model training...")
trainer = load_trainer_for_train(cfg, generate_model_train, tokenizer_train, train_inputs_dataset, val_inputs_dataset)
trainer.train()

# 모델 학습이 완료된 후 wandb를 종료합니다.
# Jupyter 환경에서는 wandb.finish()를 명시적으로 호출하는 것이 좋습니다.
import wandb
use_wandb = os.getenv('USE_WANDB', '').lower() == 'true'
if cfg.training.report_to == 'wandb' and use_wandb:
    wandb.finish()

log_info("Training complete.")

## Inference

In [None]:
log_info("Starting inference...")
output_df = inference(cfg)
log_info("Inference complete. Output saved to CSV.")
log_info(output_df.head())