# SAEB-Flow — Astex Diverse 85 Benchmark

**From GitHub → Kaggle → Auto-packaged results**  
每次執行都從 GitHub 拉最新代碼，清理舊檔，跑完自動打包成 zip 下載。

In [None]:
# ═══════════════════════════════════════════════
# Cell 1: 環境設定 & 清理舊檔
# ═══════════════════════════════════════════════
import os, shutil, subprocess, sys

WORK = '/kaggle/working'
REPO_DIR = f'{WORK}/MaxFlow'
SRC_DIR  = f'{REPO_DIR}/src'
PDB_DIR  = '/kaggle/input/astex-diverse'  # Kaggle Dataset

# --- 清理舊執行結果 ---
for old in [f'{WORK}/results', f'{WORK}/plots', f'{WORK}/cache', REPO_DIR]:
    if os.path.exists(old):
        shutil.rmtree(old)
        print(f'Removed: {old}')

os.makedirs(f'{WORK}/results', exist_ok=True)
os.makedirs(f'{WORK}/plots',   exist_ok=True)
print('Workspace clean.')

In [None]:
# ═══════════════════════════════════════════════
# Cell 2: 從 GitHub 拉最新代碼
# ═══════════════════════════════════════════════
GITHUB_REPO = 'https://github.com/DragX0826/MaxFlow.git'

result = subprocess.run(
    ['git', 'clone', '--depth=1', GITHUB_REPO, REPO_DIR],
    capture_output=True, text=True
)
print(result.stdout or result.stderr)

# 取得 git hash 供紀錄
git_hash = subprocess.check_output(
    ['git', '-C', REPO_DIR, 'rev-parse', '--short', 'HEAD']
).decode().strip()
print(f'Cloned: {GITHUB_REPO}  @{git_hash}')

# 加入 Python path
if SRC_DIR not in sys.path:
    sys.path.insert(0, SRC_DIR)
print(f'Source: {SRC_DIR}')

In [None]:
# ═══════════════════════════════════════════════
# Cell 3: 安裝依賴
# ═══════════════════════════════════════════════
subprocess.run(
    ['pip', 'install', '-r', f'{SRC_DIR}/requirements.txt', '-q'],
    check=True
)
print('Dependencies ready.')

In [None]:
# ═══════════════════════════════════════════════
# Cell 4: 確認 Astex PDB Dataset 存在
# ═══════════════════════════════════════════════
import glob
pdb_files = glob.glob(f'{PDB_DIR}/*.pdb')
print(f'Astex PDB Dataset: {len(pdb_files)} files found at {PDB_DIR}')
if len(pdb_files) == 0:
    print('WARNING: No PDB files found! Add the Kaggle dataset "astex-diverse".')
    print('         (Upload data/astex_pdb/ from running scripts/download_astex.py locally)')

In [None]:
# ═══════════════════════════════════════════════
# Cell 5: 執行 Benchmark (順序模式，Kaggle-safe)
# ═══════════════════════════════════════════════
cmd = [
    sys.executable, f'{SRC_DIR}/run_benchmark.py',
    '--bench_astex',
    '--pdb_dir',    PDB_DIR,
    '--steps',      '300',
    '--batch_size', '16',
    '--mode',       'inference',
    '--kaggle',
    '--seed',       '42',
    '--output_dir', f'{WORK}/results',
]

print('Starting benchmark...')
proc = subprocess.Popen(
    cmd,
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
    text=True, bufsize=1
)
# 即時輸出 log
lines = []
for line in proc.stdout:
    line = line.rstrip()
    lines.append(line)
    print(line)
proc.wait()
print(f'\nBenchmark exit code: {proc.returncode}')

In [None]:
# ═══════════════════════════════════════════════
# Cell 6: 自動打包結果
# ═══════════════════════════════════════════════
# 把 plots 從 SRC_DIR/plots 移到 WORK/plots
for src_plot_dir in [f'{SRC_DIR}/plots', 'plots']:
    if os.path.exists(src_plot_dir):
        for f in glob.glob(f'{src_plot_dir}/*'):
            shutil.copy(f, f'{WORK}/plots/')

pack_cmd = [
    sys.executable, f'{SRC_DIR}/scripts/pack_results.py',
    '--src',    WORK,
    '--output', WORK,
    '--label',  f'astex85_s300_b16_git{git_hash}',
]
proc2 = subprocess.run(pack_cmd, capture_output=True, text=True)
print(proc2.stdout)

# 列出所有可下載檔案
print('\n=== Files available for download (/kaggle/working/) ===')
for f in sorted(os.listdir(WORK)):
    size_mb = os.path.getsize(f'{WORK}/{f}') / 1e6
    print(f'  {f:50s}  {size_mb:.2f} MB')

In [None]:
# ═══════════════════════════════════════════════
# Cell 7: 結果總覽
# ═══════════════════════════════════════════════
import pandas as pd
import numpy as np
import IPython.display as display

csv_path = f'{WORK}/results/benchmark_results.csv'
if os.path.exists(csv_path):
    df = pd.read_csv(csv_path)
    arr = df['best_rmsd'].values
    
    print(f'\n══════════════════════════════════════')
    print(f'  SAEB-Flow  |  Astex Diverse 85')
    print(f'  SR@2A  : {(arr<2.0).mean()*100:.1f}%  ({(arr<2.0).sum()}/{len(arr)})')
    print(f'  SR@5A  : {(arr<5.0).mean()*100:.1f}%  ({(arr<5.0).sum()}/{len(arr)})')
    print(f'  Median : {np.median(arr):.2f}A  |  Mean: {arr.mean():.2f}A')
    print(f'  git    : {git_hash}')
    print(f'══════════════════════════════════════\n')
    
    display.display(df.sort_values('best_rmsd').reset_index(drop=True))
else:
    print('No results CSV found.')

# 顯示圖片
for png in sorted(glob.glob(f'{WORK}/plots/*.png')):
    print(f'\n>>> {os.path.basename(png)}')
    display.display(display.Image(png, width=800))