In [None]:
# =========================================================
# Cell 1: Setup Environment, Mount Drive, and Clone Repo
# =========================================================
from google.colab import drive
import os
from huggingface_hub import login
from google.colab import userdata

print("🚀 Staring setup...")

# 1. 挂载Google Drive用于持久化存储
drive.mount('/content/drive')

# 2. 定义Google Drive中的项目路径，用于存放模型和数据
# 所有重要文件都会保存在这里，不会因Colab会话结束而丢失
PROJECT_PATH_GDRIVE = "/content/drive/MyDrive/GSM8K_Router_Project"
os.makedirs(PROJECT_PATH_GDRIVE, exist_ok=True)
print(f"✅ Google Drive path set to: {PROJECT_PATH_GDRIVE}")

# 3. 克隆你的GitHub仓库
GITHUB_REPO_URL = "https://github.com/YourUsername/Your_Project_Folder.git" # <--- 务必修改为你的仓库地址
PROJECT_NAME = "Your_Project_Folder" # <--- 你的仓库文件夹名

if os.path.exists(PROJECT_NAME):
    !rm -rf {PROJECT_NAME} # 如果已存在，先删除旧的，确保代码最新

print(f"🔄 Cloning repository from GitHub...")
!git clone {GITHUB_REPO_URL}
os.chdir(PROJECT_NAME) # 进入项目目录
print(f"✅ Successfully cloned and current directory is: {os.getcwd()}")


# 4. 安装依赖库
print("\n📦 Installing required libraries...")
!pip install transformers datasets accelerate bitsandbytes sentencepiece pandas scipy -q
print("✅ Libraries installed.")

# 5. Hugging Face认证
print("\n🔑 Authenticating with Hugging Face...")
try:
    hf_token = userdata.get('HF_TOKEN') # 从Colab Secrets获取Token
    login(token=hf_token)
    print("✅ HuggingFace login successful.")
except Exception as e:
    print(f"⚠️ Could not login to HuggingFace. Please set up HF_TOKEN in Colab Secrets. Error: {e}")

print("\n🎉 Setup complete! You are ready to run the training or evaluation.")

In [ ]:
# =========================================================
# Cell 2: Run the Training Script
# =========================================================
import os

# 将Google Drive的项目路径作为环境变量传递给Python脚本
# 这样脚本内部就能知道要把模型和数据保存在哪里
os.environ['PROJECT_PATH_GDRIVE'] = PROJECT_PATH_GDRIVE

print("🔥" * 20)
print("Starting the training process... (This may take several hours)")
print(f"Output files (model and data) will be saved to: {PROJECT_PATH_GDRIVE}")
print("🔥" * 20)

# 使用 '!' 直接在Colab中执行你的Python训练脚本
!python train_router.py

print("\n🎉 Training process finished!")
print(f"Please check your Google Drive at '{PROJECT_PATH_GDRIVE}' for 'router_model.pth' and 'router_training_data.jsonl'.")

In [ ]:
# =========================================================
# Cell 3: Run the Evaluation Script
# =========================================================
import os

# 同样地，传递Google Drive的项目路径
os.environ['PROJECT_PATH_GDRIVE'] = PROJECT_PATH_GDRIVE

print("📊" * 20)
print("Starting the evaluation process...")
print(f"The script will load the trained model from: {os.path.join(PROJECT_PATH_GDRIVE, 'router_model.pth')}")
print("📊" * 20)

!python evaluate_system.py

print("\n🎉 Evaluation process finished!")