In [None]:
# 1. 導入必要套件
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
import shap
from sklearn.model_selection import cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

# 設定顯示選項
pd.set_option('display.max_columns', None)
plt.rcParams['figure.figsize'] = (12, 6)
sns.set_style('whitegrid')

print("✓ 套件導入完成")

In [None]:
# 2. 載入聚合結果
results_df = pd.read_csv('results_all.csv')
print(f"✓ 載入 {len(results_df)} 筆樣本")
print(f"\n欄位 ({len(results_df.columns)}):")
print(results_df.columns.tolist())
print(f"\n前 3 筆資料:")
print(results_df.head(3))

In [None]:
# 3. 特徵工程：分離參數與測量指標

# 可用的特徵候選（避免把目標欄位或識別欄位放入）
candidate_features = [
    'build_R', 'build_L', 'build_B', 'build_M',
    'search_K', 'search_L', 'search_W', 'search_T', 'search_io_limit',
    'num_queries', 'dataset_size', 'vector_dim', 'actual_cached_nodes'
]

# 僅保留存在且為數值的欄位
feature_cols = [
    c for c in candidate_features
    if c in results_df.columns and pd.api.types.is_numeric_dtype(results_df[c])
]
X = results_df[feature_cols].copy()

# 目標指標（排除名稱型欄位）
target_metrics = {
    # 基本性能
    'qps': 'QPS (查詢/秒)',
    'mean_latency_us': '平均延遲 (µs)',
    'latency_p50_us': 'P50 延遲 (µs)',
    'latency_p75_us': 'P75 延遲 (µs)',
    'latency_p90_us': 'P90 延遲 (µs)',
    'latency_p95_us': 'P95 延遲 (µs)',
    'latency_p99_us': 'P99 延遲 (µs)',
    'latency_p999_us': 'P99.9 延遲 (µs)',
    'latency_max_us': '最大延遲 (µs)',

    # IO 數量
    'ios_mean': '平均 IO 數',
    'ios_p50': 'IO P50',
    'ios_p75': 'IO P75',
    'ios_p90': 'IO P90',
    'ios_p95': 'IO P95',
    'ios_p99': 'IO P99',
    'ios_max': 'IO 最大值',

    # IO 時間
    'io_us_mean': 'IO 平均時間 (µs)',
    'io_us_p50': 'IO P50 (µs)',
    'io_us_p75': 'IO P75 (µs)',
    'io_us_p90': 'IO P90 (µs)',
    'io_us_p95': 'IO P95 (µs)',
    'io_us_p99': 'IO P99 (µs)',
    'io_us_max': 'IO 最大值 (µs)',

    # CPU 時間
    'cpu_us_mean': 'CPU 平均時間 (µs)',
    'cpu_us_p50': 'CPU P50 (µs)',
    'cpu_us_p75': 'CPU P75 (µs)',
    'cpu_us_p90': 'CPU P90 (µs)',
    'cpu_us_p95': 'CPU P95 (µs)',
    'cpu_us_p99': 'CPU P99 (µs)',
    'cpu_us_max': 'CPU 最大值 (µs)',

    # Sort 時間
    'sort_us_mean': 'Sort 平均時間 (µs)',
    'sort_us_p50': 'Sort P50 (µs)',
    'sort_us_p75': 'Sort P75 (µs)',
    'sort_us_p90': 'Sort P90 (µs)',
    'sort_us_p95': 'Sort P95 (µs)',
    'sort_us_p99': 'Sort P99 (µs)',
    'sort_us_max': 'Sort 最大值 (µs)',

    # 讀取量
    'read_size_mean': '讀取量平均 (bytes)',
    'read_size_p50': '讀取量 P50 (bytes)',
    'read_size_p75': '讀取量 P75 (bytes)',
    'read_size_p90': '讀取量 P90 (bytes)',
    'read_size_p95': '讀取量 P95 (bytes)',
    'read_size_p99': '讀取量 P99 (bytes)',
    'read_size_max': '讀取量最大 (bytes)',

    # 比較次數
    'compares_mean': '比較次數平均',
    'compares_p50': '比較次數 P50',
    'compares_p75': '比較次數 P75',
    'compares_p90': '比較次數 P90',
    'compares_p95': '比較次數 P95',
    'compares_p99': '比較次數 P99',
    'compares_max': '比較次數最大',

    # Recall
    'recall_mean': 'Recall 平均',
    'recall_p0': 'Recall P0',
    'recall_p1': 'Recall P1',
    'recall_p5': 'Recall P5',
    'recall_p10': 'Recall P10',
    'recall_p25': 'Recall P25',
    'recall_p50': 'Recall P50',
    'recall_p75': 'Recall P75',
    'recall_p90': 'Recall P90',
    'recall_max': 'Recall 最大',

    # Cache hit rate
    'cache_hit_rate_mean': 'Cache hit 平均',
    'cache_hit_rate_p0': 'Cache hit P0',
    'cache_hit_rate_p1': 'Cache hit P1',
    'cache_hit_rate_p5': 'Cache hit P5',
    'cache_hit_rate_p10': 'Cache hit P10',
    'cache_hit_rate_p25': 'Cache hit P25',
    'cache_hit_rate_p50': 'Cache hit P50',
    'cache_hit_rate_p75': 'Cache hit P75',
    'cache_hit_rate_p90': 'Cache hit P90',
    'cache_hit_rate_max': 'Cache hit 最大',

    # Hops / Visited
    'hop_mean': 'Hop 平均',
    'hop_p50': 'Hop P50',
    'hop_p75': 'Hop P75',
    'hop_p90': 'Hop P90',
    'hop_p95': 'Hop P95',
    'hop_p99': 'Hop P99',
    'hop_max': 'Hop 最大',
    'visited_mean': 'Visited 平均',
    'visited_p50': 'Visited P50',
    'visited_p75': 'Visited P75',
    'visited_p90': 'Visited P90',
    'visited_p95': 'Visited P95',
    'visited_p99': 'Visited P99',
    'visited_max': 'Visited 最大',

    # Out degree (未來會有值)
    'out_degree_mean': 'Out degree 平均',
    'out_degree_p0': 'Out degree P0',
    'out_degree_p1': 'Out degree P1',
    'out_degree_p5': 'Out degree P5',
    'out_degree_p10': 'Out degree P10',
    'out_degree_p25': 'Out degree P25',
    'out_degree_p50': 'Out degree P50',
    'out_degree_p75': 'Out degree P75',
    'out_degree_p90': 'Out degree P90',
    'out_degree_p95': 'Out degree P95',
    'out_degree_p99': 'Out degree P99',
    'out_degree_max': 'Out degree 最大'
}

print(f"✓ 特徵數量: {len(feature_cols)}")
print(f"  特徵: {feature_cols}")
print("")
print(f"✓ 目標指標數量: {len(target_metrics)}")
for metric, desc in target_metrics.items():
    if metric in results_df.columns:
        print(f"  {metric:20s} ({desc})")
        print(f"    範圍: [{results_df[metric].min():.4f}, {results_df[metric].max():.4f}]")



In [None]:
# 4. 數據品質檢查
print("=== 數據缺失檢查 ===")
cols_to_check = feature_cols + list(target_metrics.keys())
if cols_to_check:
    missing = results_df[cols_to_check].isnull().sum()
    if missing.sum() > 0:
        print("")
        print("缺失欄位:")
        print(missing[missing > 0])
    else:
        print("✓ 無缺失值")
else:
    print("⚠ 沒有可用欄位進行檢查")

print("")
print("=== 特徵統計 ===")
if feature_cols:
    print(X.describe())
else:
    print("⚠ 沒有可用特徵")



In [None]:
# 5. 訓練多個 XGBoost 模型
print("\n" + "="*60)
print("開始訓練 XGBoost 模型")
print("="*60)

models = {}  # 儲存訓練好的模型
cv_scores = {}  # 交叉驗證分數

for metric, metric_name in target_metrics.items():
    if metric not in results_df.columns:
        print(f"⚠ 跳過 {metric} (資料不存在)")
        continue
    
    y = results_df[metric].values
    
    # 處理任何 NaN 或無效值
    valid_idx = ~np.isnan(y) & np.isfinite(y)
    X_valid = X[valid_idx]
    y_valid = y[valid_idx]
    
    if len(y_valid) < 10:
        print(f"⚠ {metric}: 有效樣本過少 ({len(y_valid)})，跳過")
        continue
    
    # 訓練模型
    model = xgb.XGBRegressor(
        n_estimators=100,
        max_depth=5,
        learning_rate=0.1,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        verbosity=0
    )
    
    # 交叉驗證
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    cv_r2 = cross_val_score(model, X_valid, y_valid, cv=kfold, scoring='r2')
    cv_rmse = -cross_val_score(model, X_valid, y_valid, cv=kfold, scoring='neg_root_mean_squared_error')
    
    # 在全部資料上訓練（用於 SHAP）
    model.fit(X_valid, y_valid)
    
    models[metric] = model
    cv_scores[metric] = {'r2': cv_r2, 'rmse': cv_rmse}
    
    print(f"\n✓ {metric_name}")
    print(f"  有效樣本: {len(y_valid)}")
    print(f"  R² (5-fold): {cv_r2.mean():.4f} ± {cv_r2.std():.4f}")
    print(f"  RMSE (5-fold): {cv_rmse.mean():.4f} ± {cv_rmse.std():.4f}")


In [None]:
# 6. 特徵重要性分析
print("\n" + "="*60)
print("XGBoost 內建特徵重要性")
print("="*60)

feature_importance_all = {}

for metric, model in models.items():
    importance = pd.DataFrame({
        'feature': feature_cols,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    feature_importance_all[metric] = importance
    
    print(f"\n{metric}:")
    for _, row in importance.iterrows():
        bar = '█' * int(row['importance'] * 50)
        print(f"  {row['feature']:20s} {bar} {row['importance']:.4f}")

In [None]:
# 7. SHAP 特徵重要性分析
print("\n" + "="*60)
print("SHAP 特徵重要性 (TreeExplainer)")
print("="*60)

shap_values_all = {}

for metric, model in models.items():
    # 取得有效資料
    y = results_df[metric].values
    valid_idx = ~np.isnan(y) & np.isfinite(y)
    X_valid = X[valid_idx]
    
    # 計算 SHAP 值
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_valid)
    shap_values_all[metric] = (shap_values, X_valid, explainer)
    
    # 計算平均 |SHAP| 作為特徵重要性
    mean_abs_shap = np.abs(shap_values).mean(axis=0)
    shap_importance = pd.DataFrame({
        'feature': feature_cols,
        'mean_abs_shap': mean_abs_shap
    }).sort_values('mean_abs_shap', ascending=False)
    
    print(f"\n{metric}:")
    for _, row in shap_importance.iterrows():
        bar = '█' * int(row['mean_abs_shap'] * 100)
        print(f"  {row['feature']:20s} {bar} {row['mean_abs_shap']:.4f}")

In [None]:
# 8. SHAP 摘要圖
print("\n生成 SHAP 摘要圖...")

# 只對前 3 個指標繪圖（避免太多圖表）
metrics_to_plot = list(models.keys())[:3]

for i, metric in enumerate(metrics_to_plot):
    shap_values, X_valid, explainer = shap_values_all[metric]
    
    plt.figure(figsize=(10, 6))
    shap.summary_plot(shap_values, X_valid, plot_type='bar', show=False)
    plt.title(f"SHAP Feature Importance - {metric}", fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()
    
print("✓ SHAP 圖表完成")

In [None]:
# 9. SHAP 依賴圖（最重要的 2 個特徵）
print("\n生成 SHAP 依賴圖...")

for metric in metrics_to_plot:
    shap_values, X_valid, explainer = shap_values_all[metric]
    
    # 計算特徵重要性排名
    mean_abs_shap = np.abs(shap_values).mean(axis=0)
    top_2_features = np.argsort(mean_abs_shap)[-2:][::-1]
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    for idx, feat_idx in enumerate(top_2_features):
        shap.dependence_plot(
            feat_idx, shap_values, X_valid,
            feature_names=feature_cols,
            ax=axes[idx],
            show=False
        )
        axes[idx].set_title(f"{metric} - {feature_cols[feat_idx]}", fontweight='bold')
    
    plt.suptitle(f"SHAP 依賴圖 - {metric}", fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.show()

print("✓ 依賴圖完成")

In [None]:
# 10. 預測 vs 實際比較
print("\n" + "="*60)
print("模型預測準確度")
print("="*60)

for metric, model in models.items():
    y = results_df[metric].values
    valid_idx = ~np.isnan(y) & np.isfinite(y)
    X_valid = X[valid_idx]
    y_valid = y[valid_idx]
    
    y_pred = model.predict(X_valid)
    
    from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
    r2 = r2_score(y_valid, y_pred)
    mae = mean_absolute_error(y_valid, y_pred)
    rmse = np.sqrt(mean_squared_error(y_valid, y_pred))
    
    print(f"\n{metric}:")
    print(f"  R² 分數: {r2:.4f}")
    print(f"  平均絕對誤差 (MAE): {mae:.4f}")
    print(f"  均方根誤差 (RMSE): {rmse:.4f}")

In [None]:
# 11. 參數敏感度排序總結
print("\n" + "="*60)
print("參數敏感度排序 (跨所有指標)")
print("="*60)

# 聚合所有指標的特徵重要性
aggregated_importance = {feat: [] for feat in feature_cols}

for metric, model in models.items():
    importance_dict = dict(zip(feature_cols, model.feature_importances_))
    for feat in feature_cols:
        aggregated_importance[feat].append(importance_dict[feat])

# 計算平均重要性
avg_importance = {feat: np.mean(values) for feat, values in aggregated_importance.items()}
sorted_importance = sorted(avg_importance.items(), key=lambda x: x[1], reverse=True)

print("\n平均特徵重要性排名 (XGBoost):")
for rank, (feat, imp) in enumerate(sorted_importance, 1):
    bar = '█' * int(imp * 50)
    print(f"  {rank}. {feat:20s} {bar} {imp:.4f}")

# 計算 SHAP 平均重要性
print("\n平均特徵重要性排名 (SHAP):")
aggregated_shap = {feat: [] for feat in feature_cols}

for metric, (shap_values, X_valid, _) in shap_values_all.items():
    mean_abs_shap = np.abs(shap_values).mean(axis=0)
    for feat_idx, feat in enumerate(feature_cols):
        aggregated_shap[feat].append(mean_abs_shap[feat_idx])

avg_shap = {feat: np.mean(values) for feat, values in aggregated_shap.items()}
sorted_shap = sorted(avg_shap.items(), key=lambda x: x[1], reverse=True)

for rank, (feat, imp) in enumerate(sorted_shap, 1):
    bar = '█' * int(imp * 100)
    print(f"  {rank}. {feat:20s} {bar} {imp:.4f}")

In [None]:
# 12. 結論與建議
print("\n" + "="*60)
print("分析結論")
print("="*60)

print("""
✓ XGBoost 模型成功訓練完成
  - 模型使用 5-fold 交叉驗證評估
  - SHAP 值解釋模型決策過程

主要發現:
1. 特徵重要性排名顯示了各參數對性能的影響
2. SHAP 依賴圖揭示了參數與指標的非線性關係
3. 交叉驗證分數評估了模型的泛化能力

建議:
- 優化排名靠前的參數以提升性能
- 考慮參數間的交互作用（可通過 SHAP 分析）
- 在實際應用中驗證模型預測
""")

print("\n✓ 分析完成！")