In [24]:
# =============================================================================
# Colab启动 - 健壮版（处理目录问题）
# =============================================================================
import os
import sys

# 1. 先切换到/content目录（确保在正确的位置）
try:
    os.chdir('/content')
    print(f"✅ 当前目录: {os.getcwd()}")
except:
    print("⚠️  /content 目录不可用，使用 /tmp")
    os.chdir('/tmp')

# 2. 清理旧目录
print("\n🧹 清理旧文件...")
!rm -rf TimeSeriesForecast

# 3. 克隆代码
print("\n📥 克隆代码...")
!git clone https://github.com/Haiming123319/TimeSeriesForecast.git

# 4. 切换到项目目录
os.chdir('TimeSeriesForecast')
print(f"✅ 项目目录: {os.getcwd()}")

# 5. 验证文件
print("\n📁 验证文件:")
key_files = ['generate_optimized_aemo_data.py', 'three_stage_training.py', 'run.py']
for f in key_files:
    exists = os.path.exists(f)
    print(f"  {'✅' if exists else '❌'} {f}")

# 6. 安装依赖
print("\n📦 安装依赖...")
!pip install -q torch numpy pandas scikit-learn matplotlib einops transformers statsmodels scipy

# 7. 修复代码
print("\n🔧 修复导入问题...")

# 修复 sktime
try:
    with open('data_provider/data_loader.py', 'r') as f:
        content = f.read()

    if 'from sktime.datasets import load_from_tsfile_to_dataframe' in content and 'try:' not in content[:content.find('from sktime')]:
        content = content.replace(
            'from sktime.datasets import load_from_tsfile_to_dataframe',
            'try:\n    from sktime.datasets import load_from_tsfile_to_dataframe\nexcept ImportError:\n    load_from_tsfile_to_dataframe = None'
        )
        with open('data_provider/data_loader.py', 'w') as f:
            f.write(content)
        print("  ✅ 修复 data_loader.py")
except Exception as e:
    print(f"  ⚠️  data_loader.py: {e}")

# 修复 patoolib
try:
    with open('data_provider/m4.py', 'r') as f:
        content = f.read()

    if 'import patoolib' in content and 'try:' not in content[:content.find('import patoolib')]:
        content = content.replace(
            'import patoolib',
            'try:\n    import patoolib\nexcept ImportError:\n    patoolib = None'
        )
        with open('data_provider/m4.py', 'w') as f:
            f.write(content)
        print("  ✅ 修复 m4.py")
except Exception as e:
    print(f"  ⚠️  m4.py: {e}")

# 8. 验证环境
print("\n✅ 环境验证:")
import torch
print(f"  PyTorch: {torch.__version__}")
print(f"  CUDA: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"  GPU: {torch.cuda.get_device_name(0)}")
else:
    print("  设备: CPU/MPS")

print("\n🎉 环境设置完成！可以继续下一步")

✅ 当前目录: /content

🧹 清理旧文件...

📥 克隆代码...
Cloning into 'TimeSeriesForecast'...
remote: Enumerating objects: 2168, done.[K
remote: Counting objects: 100% (75/75), done.[K
remote: Compressing objects: 100% (47/47), done.[K
remote: Total 2168 (delta 39), reused 60 (delta 27), pack-reused 2093 (from 1)[K
Receiving objects: 100% (2168/2168), 78.47 MiB | 14.24 MiB/s, done.
Resolving deltas: 100% (1484/1484), done.
✅ 项目目录: /content/TimeSeriesForecast

📁 验证文件:
  ✅ generate_optimized_aemo_data.py
  ✅ three_stage_training.py
  ✅ run.py

📦 安装依赖...

🔧 修复导入问题...
  ✅ 修复 data_loader.py
  ✅ 修复 m4.py

✅ 环境验证:
  PyTorch: 2.8.0+cu126
  CUDA: True
  GPU: Tesla T4

🎉 环境设置完成！可以继续下一步


In [25]:
# 代码块2: 生成数据
import os

# 确保在正确目录
if not os.getcwd().endswith('TimeSeriesForecast'):
    os.chdir('/content/TimeSeriesForecast')

print(f"当前目录: {os.getcwd()}")
print("\n🔧 生成优化数据...")

!python3 generate_optimized_aemo_data.py

# 验证
import pandas as pd
data_dir = './data/AEMO_optimized'

if os.path.exists(data_dir):
    files = [f for f in os.listdir(data_dir) if f.endswith('.csv')]
    print(f"\n✅ 生成了 {len(files)} 个文件:")

    for freq in ['30min', '15min', '5min']:
        freq_files = [f for f in files if freq in f]
        if freq_files:
            print(f"\n  {freq}: {len(freq_files)} 个")
            sample = freq_files[0]
            df = pd.read_csv(f'{data_dir}/{sample}')
            print(f"    样例: {sample} ({len(df)} 行)")
else:
    print("❌ 数据目录未创建")

当前目录: /content/TimeSeriesForecast

🔧 生成优化数据...
🔧 生成优化的AEMO数据
策略：
  - 30分钟数据：15个月（足够覆盖季节性）
  - 5分钟数据：6个月（高频，避免过旧数据）
  - 15分钟数据：从5分钟降采样

[1/5] 处理 NSW...
  生成 NSW 30min 数据: 15个月 = 457天 = 21936条记录
  ✅ 保存 ./data/AEMO_optimized/NSW_30min.csv
  生成 NSW 5min 数据: 6个月 = 183天 = 52704条记录
  ✅ 保存 ./data/AEMO_optimized/NSW_5min.csv
  ✅ 保存 ./data/AEMO_optimized/NSW_15min.csv (从5min降采样)

[2/5] 处理 QLD...
  生成 QLD 30min 数据: 15个月 = 457天 = 21936条记录
  ✅ 保存 ./data/AEMO_optimized/QLD_30min.csv
  生成 QLD 5min 数据: 6个月 = 183天 = 52704条记录
  ✅ 保存 ./data/AEMO_optimized/QLD_5min.csv
  ✅ 保存 ./data/AEMO_optimized/QLD_15min.csv (从5min降采样)

[3/5] 处理 VIC...
  生成 VIC 30min 数据: 15个月 = 457天 = 21936条记录
  ✅ 保存 ./data/AEMO_optimized/VIC_30min.csv
  生成 VIC 5min 数据: 6个月 = 183天 = 52704条记录
  ✅ 保存 ./data/AEMO_optimized/VIC_5min.csv
  ✅ 保存 ./data/AEMO_optimized/VIC_15min.csv (从5min降采样)

[4/5] 处理 SA...
  生成 SA 30min 数据: 15个月 = 457天 = 21936条记录
  ✅ 保存 ./data/AEMO_optimized/SA_30min.csv
  生成 SA 5min 数据: 6个月 = 183天 = 52704条记录
  ✅ 保存 ./data/

In [26]:
# 安装缺失的 reformer_pytorch
!pip install -q reformer_pytorch

print("✅ reformer_pytorch 安装完成！")

# 验证
try:
    from reformer_pytorch import LSHSelfAttention
    print("✅ reformer_pytorch 导入成功")
except:
    print("❌ 导入失败，请重启Runtime")

✅ reformer_pytorch 安装完成！
✅ reformer_pytorch 导入成功


In [27]:
import torch, platform
print("cuda?", torch.cuda.is_available())
print("device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "cpu")
print("torch:", torch.__version__, "python:", platform.python_version())


cuda? True
device: Tesla T4
torch: 2.8.0+cu126 python: 3.12.12


In [None]:
import os
os.chdir('/content/TimeSeriesForecast')

print("🚀 开始三阶段训练...")
!python3 three_stage_training.py

🚀 开始三阶段训练...
🚀 AEMO时序预测 - 三阶段优化训练
策略说明:
  阶段1: 模型筛选（2州 × 4模型 = 8个实验，约1-2小时）
  阶段2: 扩展验证（5州 × 2频率 × 2模型 = 20个实验，约3-4小时）
  阶段3: 步长扩展（5州 × 3步长 × 1模型 = 15个实验，约2-3小时）
  总计: 约43个实验，相比原方案（120个）减少64%
✅ cuDNN benchmark 已启用

🖥️  设备: GPU (CUDA: Tesla T4)
💾 显存: 14.7 GB
⚡ AMP: 已启用 (FP16混合精度)
👷 Workers: 2 (优化Colab性能)

🎯 阶段1: 模型筛选（快速对比4个模型）
配置:
  - 州: NSW (体量大), SA (波动大)
  - 频率: 30min
  - 预测步长: 24 (12小时)
  - 训练轮数: 8 epochs, 早停patience=2
  - 目标: 选出前2个最优模型

[1/8] 🚀 NSW | DLinear
--------------------------------------------------------------------------------
Using GPU
Args in experiment:
[1mBasic Config[0m
  Task Name:          long_term_forecast  Is Training:        1                   
  Model ID:           stage1_NSW_DLinear  Model:              DLinear             

[1mData Loader[0m
  Data:               custom              Root Path:          ./data/AEMO_optimized/
  Data Path:          NSW_30min.csv       Features:           M                   
  Target:             Price               Freq

In [None]:
import os
import pandas as pd

os.chdir('/content/TimeSeriesForecast')

# 读取结果
if os.path.exists('./three_stage_results/all_results.csv'):
    df = pd.read_csv('./three_stage_results/all_results.csv')
    df_success = df[df['success'] == True]

    print(f"📊 完成 {len(df_success)}/{len(df)} 个实验")

    if len(df_success) > 0 and 'mae' in df_success.columns:
        df_success = df_success.sort_values('mae')
        print("\n🏆 Top 5 最佳结果:")
        print(df_success[['model', 'state', 'freq', 'pred_len', 'mae']].head().to_string(index=False))
else:
    print("⚠️  结果文件不存在，训练可能还未完成")

In [None]:
from google.colab import files
import os

os.chdir('/content/TimeSeriesForecast')

!zip -r results.zip ./three_stage_results/ ./results/ ./checkpoints/ -x "*.pyc"

print("📥 下载结果...")
files.download('results.zip')