# Colab 环境准备与训练
依次运行下面的单元，在 Google Colab 中准备运行环境并启动 `train.py`。

In [None]:
# 环境准备：检测 GPU、升级 pip、克隆仓库并安装依赖
import os
import pathlib
import shutil
import subprocess
import sys

try:
    subprocess.run(["nvidia-smi"], check=True)
except FileNotFoundError:
    print("nvidia-smi 未找到，当前环境可能不支持 GPU")
except subprocess.CalledProcessError:
    print("nvidia-smi 调用失败，GPU 可能不可用")

subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], check=True)

REPO_URL = os.environ.get("SUMMARIZATION_REPO_URL", "https://github.com/<your-account>/summarization.git")
PROJECT_ROOT = pathlib.Path("/content/summarization")

if PROJECT_ROOT.exists():
    shutil.rmtree(PROJECT_ROOT)

subprocess.run(["git", "clone", REPO_URL, str(PROJECT_ROOT)], check=True)
os.chdir(PROJECT_ROOT)
print(f"切换到项目目录: {PROJECT_ROOT}")

subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)

import torch
print(f"PyTorch 版本: {torch.__version__}")
print(f"CUDA 是否可用: {torch.cuda.is_available()}")

## 启动训练
在完成环境准备后，直接导入 `train.py` 的入口函数并启动训练。可根据需要调整配置或启用 Tiny 模式进行快速验证。

In [None]:
# 运行训练脚本
import argparse
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd()
sys.path.insert(0, str(PROJECT_ROOT / "src"))

from train import main

args = argparse.Namespace(
    config="configs/seq2seq_attn.yaml",
    data_dir=None,
    vocab_path=None,
    save_dir="./checkpoints",
    batch_size=None,
    max_src_len=None,
    max_tgt_len=None,
    epochs=None,
    lr=None,
    num_workers=2,
)

main(args)