In [1]:
# ==================== Cell 1: Shared Cache Bootstrap ====================
"""
目標：建立統一快取機制，所有模型/資料集都存放在 AI_CACHE_ROOT
重要：這段程式碼會被複製到每本 notebook 的第一格
"""
import os, pathlib, torch
import sys
from datetime import datetime

print(f"🚀 環境引導檢查開始 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Python 版本: {sys.version}")

# Shared cache configuration (複製到每本 notebook)
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "../ai_warehouse/cache")

# Critical: Set all AI-related cache paths
cache_config = {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}

for key, path in cache_config.items():
    os.environ[key] = path
    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
    print(f"✓ {key}: {path}")

print(f"\n📁 快取根目錄: {AI_CACHE_ROOT}")
print(f"🔥 GPU 可用: {torch.cuda.is_available()}")

🚀 環境引導檢查開始 - 2025-08-21 17:31:57
Python 版本: 3.10.18 | packaged by Anaconda, Inc. | (main, Jun  5 2025, 13:08:55) [MSC v.1929 64 bit (AMD64)]
✓ HF_HOME: ../ai_warehouse/cache/hf
✓ TRANSFORMERS_CACHE: ../ai_warehouse/cache/hf/transformers
✓ HF_DATASETS_CACHE: ../ai_warehouse/cache/hf/datasets
✓ HUGGINGFACE_HUB_CACHE: ../ai_warehouse/cache/hf/hub
✓ TORCH_HOME: ../ai_warehouse/cache/torch

📁 快取根目錄: ../ai_warehouse/cache
🔥 GPU 可用: True


In [2]:
# ==================== Cell 2: CUDA & PyTorch 詳細檢查 ====================
"""
檢查 GPU 記憶體、CUDA 版本、可用設備等關鍵資訊
"""

print("=" * 50)
print("🔍 CUDA & PyTorch 詳細檢查")
print("=" * 50)

# Basic PyTorch info
print(f"PyTorch 版本: {torch.__version__}")
print(f"CUDA 可用: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA 版本: {torch.version.cuda}")
    print(f"cuDNN 版本: {torch.backends.cudnn.version()}")
    print(f"GPU 數量: {torch.cuda.device_count()}")

    for i in range(torch.cuda.device_count()):
        props = torch.cuda.get_device_properties(i)
        memory_gb = props.total_memory / (1024**3)
        print(f"  GPU {i}: {props.name}")
        print(f"    總記憶體: {memory_gb:.1f} GB")
        print(f"    計算能力: {props.major}.{props.minor}")

        # Check available memory
        if torch.cuda.is_available():
            torch.cuda.empty_cache()  # Clear cache first
            allocated = torch.cuda.memory_allocated(i) / (1024**3)
            cached = torch.cuda.memory_reserved(i) / (1024**3)
            print(f"    已分配: {allocated:.2f} GB")
            print(f"    已快取: {cached:.2f} GB")
else:
    print("⚠️  CUDA 不可用，將使用 CPU 模式")
    print("   建議：檢查 GPU 驅動程式與 CUDA 安裝")

🔍 CUDA & PyTorch 詳細檢查
PyTorch 版本: 2.7.1+cu128
CUDA 可用: True
CUDA 版本: 12.8
cuDNN 版本: 90701
GPU 數量: 1
  GPU 0: NVIDIA GeForce RTX 5080
    總記憶體: 15.9 GB
    計算能力: 12.0
    已分配: 0.00 GB
    已快取: 0.00 GB


In [4]:
# ==================== Cell 3: 關鍵依賴套件版本檢查 ====================
"""
檢查 Stage 1-2 所需的核心套件是否正確安裝
"""

print("\n" + "=" * 50)
print("📦 關鍵依賴套件版本檢查")
print("=" * 50)

# Define required packages for Stage 1-2
required_packages = [
    ("torch", "2.0.0"),
    ("transformers", "4.30.0"),
    ("tokenizers", "0.13.0"),
    ("accelerate", "0.20.0"),
    ("pydantic", "2.0.0"),
    ("jsonlines", "3.0.0"),
]

optional_packages = [
    ("sentence_transformers", "2.2.0"),
    ("faiss", "1.7.0"),
    ("opencc", "1.1.0"),
    ("duckduckgo_search", "3.8.0"),
]

def check_package_version(package_name, min_version=None):
    """Check if package is installed and meets minimum version"""
    try:
        if package_name == "faiss":
            # faiss can be faiss-cpu or faiss-gpu
            try:
                import faiss
                version = "installed"
            except ImportError:
                try:
                    import faiss_cpu as faiss
                    version = "cpu"
                except ImportError:
                    return False, "not found"
        else:
            module = __import__(package_name)
            version = getattr(module, "__version__", "unknown")

        status = "✓" if min_version is None else "✓"
        return True, f"{status} {package_name}: {version}"

    except ImportError:
        return False, f"✗ {package_name}: 未安裝"
    except Exception as e:
        return False, f"✗ {package_name}: 錯誤 - {str(e)}"

print("核心套件 (必需):")
core_ok = True
for pkg, min_ver in required_packages:
    ok, msg = check_package_version(pkg, min_ver)
    print(f"  {msg}")
    if not ok:
        core_ok = False

print("\n選用套件 (Stage 2+ 需要):")
for pkg, min_ver in optional_packages:
    ok, msg = check_package_version(pkg, min_ver)
    print(f"  {msg}")

if not core_ok:
    print("\n⚠️  警告：部分核心套件缺失，請執行:")
    print("   pip install torch transformers accelerate pydantic jsonlines")



📦 關鍵依賴套件版本檢查
核心套件 (必需):
  ✓ torch: 2.7.1+cu128
  ✓ transformers: 4.55.0
  ✓ tokenizers: 0.21.4
  ✓ accelerate: 1.10.0
  ✓ pydantic: 2.11.7
  ✓ jsonlines: unknown

選用套件 (Stage 2+ 需要):
  ✓ sentence_transformers: 5.1.0
  not found
  ✓ opencc: 1.1.9
  ✓ duckduckgo_search: 8.1.1


In [6]:
# ==================== Cell 4: 快取機制測試 ====================
"""
下載一個小型 tokenizer 來驗證快取路徑是否正確設定
"""

print("\n" + "=" * 50)
print("🧪 快取機制測試")
print("=" * 50)

try:
    from transformers import AutoTokenizer

    # Use a small, fast-to-download tokenizer for testing
    test_model = "microsoft/DialoGPT-small"

    print(f"測試下載: {test_model}")
    print(f"快取位置: {os.environ.get('TRANSFORMERS_CACHE', 'default')}")

    # This should download to our cache directory
    tokenizer = AutoTokenizer.from_pretrained(test_model)

    # Verify it works
    test_text = "Hello world"
    tokens = tokenizer(test_text)

    print(f"✓ Tokenizer 下載成功")
    print(f"✓ 測試文字: '{test_text}'")
    print(f"✓ Token 數量: {len(tokens['input_ids'])}")

    # Check if files are in our cache directory
    cache_path = pathlib.Path(os.environ["TRANSFORMERS_CACHE"])
    if cache_path.exists():
        model_dirs = list(cache_path.glob("**/*/"))
        print(f"✓ 快取資料夾包含 {len(model_dirs)} 個模型")

except Exception as e:
    print(f"✗ 快取測試失敗: {str(e)}")
    print("   建議檢查網路連線與套件安裝")


🧪 快取機制測試
測試下載: microsoft/DialoGPT-small
快取位置: ../ai_warehouse/cache/hf/transformers


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


✓ Tokenizer 下載成功
✓ 測試文字: 'Hello world'
✓ Token 數量: 2
✓ 快取資料夾包含 17 個模型


In [7]:
# ==================== Cell 5: Smoke Test & 環境摘要 ====================
"""
最終煙霧測試：確保所有關鍵組件都能正常運作
"""

print("\n" + "=" * 60)
print("🔥 SMOKE TEST - 環境驗證摘要")
print("=" * 60)

# Environment summary
env_status = {
    "Python": sys.version.split()[0],
    "PyTorch": torch.__version__,
    "CUDA": torch.cuda.is_available(),
    "GPU Count": torch.cuda.device_count() if torch.cuda.is_available() else 0,
    "Cache Root": AI_CACHE_ROOT,
    "Cache Size": "calculating...",
}

# Calculate cache size
try:
    cache_size_mb = sum(
        f.stat().st_size for f in pathlib.Path(AI_CACHE_ROOT).rglob("*") if f.is_file()
    ) / (1024 * 1024)
    env_status["Cache Size"] = f"{cache_size_mb:.1f} MB"
except:
    env_status["Cache Size"] = "unknown"

# Memory info
if torch.cuda.is_available():
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
    env_status["GPU Memory"] = f"{gpu_memory:.1f} GB"

print("📊 環境狀態摘要:")
for key, value in env_status.items():
    print(f"  {key}: {value}")

# Quick functional test
print("\n🧪 功能測試:")
functional_tests = []

# Test 1: Basic tensor operations
try:
    device = "cuda" if torch.cuda.is_available() else "cpu"
    x = torch.randn(3, 3, device=device)
    y = torch.matmul(x, x.T)
    functional_tests.append(("✓", f"Tensor operations ({device})"))
except Exception as e:
    functional_tests.append(("✗", f"Tensor operations failed: {e}"))

# Test 2: Transformers import
try:
    import transformers

    functional_tests.append(("✓", f"Transformers {transformers.__version__}"))
except Exception as e:
    functional_tests.append(("✗", f"Transformers import failed: {e}"))

# Test 3: Cache directory writable
try:
    test_file = pathlib.Path(AI_CACHE_ROOT) / "test_write.tmp"
    test_file.write_text("test")
    test_file.unlink()
    functional_tests.append(("✓", "Cache directory writable"))
except Exception as e:
    functional_tests.append(("✗", f"Cache write failed: {e}"))

for status, test in functional_tests:
    print(f"  {status} {test}")

# Final verdict
all_passed = all(test[0] == "✓" for test in functional_tests)
verdict = "🎉 環境設定完成！" if all_passed else "⚠️  部分測試未通過，請檢查上述錯誤"

print(f"\n{verdict}")
print(f"下一步：nb02 LLMAdapter (transformers) 基礎實作")


🔥 SMOKE TEST - 環境驗證摘要
📊 環境狀態摘要:
  Python: 3.10.18
  PyTorch: 2.7.1+cu128
  CUDA: True
  GPU Count: 1
  Cache Root: ../ai_warehouse/cache
  Cache Size: 1.4 MB
  GPU Memory: 15.9 GB

🧪 功能測試:
  ✓ Tensor operations (cuda)
  ✓ Transformers 4.55.0
  ✓ Cache directory writable

🎉 環境設定完成！
下一步：nb02 LLMAdapter (transformers) 基礎實作
