# Beetle Detection YOLOv8 評価レポート

Open Images Dataset (OID) から学習したBeetle検出モデルの評価・可視化を行います。

## 目的
- 学習済みモデルの性能評価
- mAP@0.5の確認（目標: ≥0.60）
- PR曲線、混同行列の可視化
- 誤検出の分析
- 改善提案の検討

In [None]:
# 必要なライブラリのインポート
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2

# YOLOv8関連
from ultralytics import YOLO
import torch

# 設定
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

## 1. 学習結果の読み込み

In [None]:
# パス設定
PROJECT_ROOT = Path('..')
RUNS_DIR = PROJECT_ROOT / 'runs' / 'detect' / 'train'
BEST_MODEL = RUNS_DIR / 'weights' / 'best.pt'
LAST_MODEL = RUNS_DIR / 'weights' / 'last.pt'
DATA_YAML = PROJECT_ROOT / 'datasets' / 'beetle-oid-yolo' / 'data.yaml'

print(f"Project root: {PROJECT_ROOT.absolute()}")
print(f"Best model: {BEST_MODEL}")
print(f"Model exists: {BEST_MODEL.exists()}")
print(f"Data yaml: {DATA_YAML}")
print(f"Data exists: {DATA_YAML.exists()}")

In [None]:
# モデル読み込み
if BEST_MODEL.exists():
    model = YOLO(str(BEST_MODEL))
    print("✅ Best model loaded successfully")
elif LAST_MODEL.exists():
    model = YOLO(str(LAST_MODEL))
    print("⚠️  Using last model (best not found)")
else:
    print("❌ No trained model found. Please run training first.")
    model = None

## 2. 学習曲線の可視化

In [None]:
# 学習結果の画像確認
results_images = [
    ('results.png', 'Training Results'),
    ('confusion_matrix.png', 'Confusion Matrix'),
    ('F1_curve.png', 'F1 Score Curve'),
    ('P_curve.png', 'Precision Curve'),
    ('R_curve.png', 'Recall Curve'),
    ('PR_curve.png', 'Precision-Recall Curve')
]

fig, axes = plt.subplots(2, 3, figsize=(20, 12))
axes = axes.flatten()

for i, (img_name, title) in enumerate(results_images):
    img_path = RUNS_DIR / img_name
    if img_path.exists():
        img = Image.open(img_path)
        axes[i].imshow(img)
        axes[i].set_title(title)
        axes[i].axis('off')
    else:
        axes[i].text(0.5, 0.5, f'{title}\n(Not found)', 
                    ha='center', va='center', transform=axes[i].transAxes)
        axes[i].axis('off')

plt.tight_layout()
plt.show()

## 3. 性能評価指標

In [None]:
# 検証データで評価実行
if model and DATA_YAML.exists():
    print("=== 検証データで評価実行 ===")
    
    # 評価実行
    metrics = model.val(data=str(DATA_YAML), plots=True, save_json=True)
    
    # 主要指標表示
    print("\n=== 主要性能指標 ===")
    print(f"mAP@0.5: {metrics.box.map50:.4f}")
    print(f"mAP@0.5:0.95: {metrics.box.map:.4f}")
    print(f"Precision: {metrics.box.mp:.4f}")
    print(f"Recall: {metrics.box.mr:.4f}")
    
    # 目標達成確認
    target_map50 = 0.60
    if metrics.box.map50 >= target_map50:
        print(f"\n✅ 目標達成！ mAP@0.5 = {metrics.box.map50:.4f} ≥ {target_map50}")
    else:
        print(f"\n⚠️  目標未達成: mAP@0.5 = {metrics.box.map50:.4f} < {target_map50}")
        print("改善提案:")
        print("- より多くのデータでの学習")
        print("- オーグメンテーション調整")
        print("- より大きなモデル(yolov8s/m)の使用")
        print("- 学習エポック数の増加")
        
else:
    print("❌ 評価をスキップ（モデルまたはデータが見つかりません）")

## 4. サンプル画像での推論テスト

In [None]:
# 検証データからサンプル画像での推論
if model and DATA_YAML.exists():
    # 検証画像ディレクトリ
    val_images_dir = PROJECT_ROOT / 'datasets' / 'beetle-oid-yolo' / 'images' / 'val'
    
    if val_images_dir.exists():
        # 最初の数枚で推論テスト
        sample_images = list(val_images_dir.glob('*.jpg'))[:6]
        
        if sample_images:
            print(f"=== サンプル推論（{len(sample_images)}枚） ===")
            
            fig, axes = plt.subplots(2, 3, figsize=(18, 12))
            axes = axes.flatten()
            
            for i, img_path in enumerate(sample_images):
                # 推論実行
                results = model(str(img_path), conf=0.25)
                
                # 結果画像取得
                annotated_img = results[0].plot()
                
                # 検出数表示
                num_detections = len(results[0].boxes) if results[0].boxes is not None else 0
                
                axes[i].imshow(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB))
                axes[i].set_title(f'{img_path.name}\nDetections: {num_detections}')
                axes[i].axis('off')
                
            plt.tight_layout()
            plt.show()
            
        else:
            print("検証画像が見つかりません")
    else:
        print(f"検証画像ディレクトリが見つかりません: {val_images_dir}")
else:
    print("推論テストをスキップ")

## 5. 信頼度閾値の分析

In [None]:
# 異なる信頼度閾値での性能比較
if model and val_images_dir.exists():
    confidence_thresholds = [0.1, 0.25, 0.5, 0.7, 0.9]
    sample_image = list(val_images_dir.glob('*.jpg'))[0]  # 最初の画像で実験
    
    print(f"=== 信頼度閾値分析 ({sample_image.name}) ===")
    
    fig, axes = plt.subplots(1, len(confidence_thresholds), figsize=(20, 4))
    
    detection_counts = []
    
    for i, conf in enumerate(confidence_thresholds):
        results = model(str(sample_image), conf=conf)
        annotated_img = results[0].plot()
        
        num_detections = len(results[0].boxes) if results[0].boxes is not None else 0
        detection_counts.append(num_detections)
        
        axes[i].imshow(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB))
        axes[i].set_title(f'Conf: {conf}\nDet: {num_detections}')
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # 検出数の推移グラフ
    plt.figure(figsize=(10, 6))
    plt.plot(confidence_thresholds, detection_counts, 'o-', linewidth=2, markersize=8)
    plt.xlabel('Confidence Threshold')
    plt.ylabel('Number of Detections')
    plt.title('Detection Count vs Confidence Threshold')
    plt.grid(True, alpha=0.3)
    
    for i, (conf, count) in enumerate(zip(confidence_thresholds, detection_counts)):
        plt.annotate(f'{count}', (conf, count), textcoords="offset points", 
                    xytext=(0,10), ha='center')
    
    plt.show()
    
    print("\n推奨信頼度閾値:")
    print("- 高精度重視: 0.5-0.7")
    print("- バランス重視: 0.25-0.5")
    print("- 検出漏れ回避: 0.1-0.25")

## 6. モデル情報とサマリー

In [None]:
# モデル情報表示
if model:
    print("=== モデル情報 ===")
    print(f"Model type: {model.model.__class__.__name__}")
    print(f"Task: {model.task}")
    print(f"Classes: {model.names}")
    
    # モデルサイズ情報
    model_info = model.info(verbose=False)
    print(f"\nModel parameters: {sum(p.numel() for p in model.model.parameters()):,}")
    print(f"Model size (MB): {os.path.getsize(BEST_MODEL) / (1024*1024):.2f}" if BEST_MODEL.exists() else "N/A")
    
    # 推論速度テスト
    if val_images_dir.exists():
        test_image = list(val_images_dir.glob('*.jpg'))[0]
        
        # ウォームアップ
        for _ in range(3):
            _ = model(str(test_image), verbose=False)
        
        # 速度測定
        import time
        times = []
        for _ in range(10):
            start = time.time()
            _ = model(str(test_image), verbose=False)
            times.append(time.time() - start)
        
        avg_time = np.mean(times)
        fps = 1.0 / avg_time
        
        print(f"\n=== 推論性能 ===")
        print(f"Average inference time: {avg_time*1000:.2f} ms")
        print(f"FPS: {fps:.2f}")
        print(f"Device: {'GPU' if torch.cuda.is_available() and next(model.model.parameters()).is_cuda else 'CPU'}")

## 7. 評価サマリーと改善提案

In [None]:
print("="*60)
print("           BEETLE DETECTION MODEL EVALUATION REPORT")
print("="*60)

if model and 'metrics' in locals():
    print(f"\n📊 PERFORMANCE METRICS:")
    print(f"   mAP@0.5: {metrics.box.map50:.4f} {'✅' if metrics.box.map50 >= 0.60 else '⚠️ '}")
    print(f"   mAP@0.5:0.95: {metrics.box.map:.4f}")
    print(f"   Precision: {metrics.box.mp:.4f}")
    print(f"   Recall: {metrics.box.mr:.4f}")
    
    if 'avg_time' in locals():
        print(f"\n⚡ INFERENCE PERFORMANCE:")
        print(f"   Speed: {avg_time*1000:.2f} ms/image")
        print(f"   FPS: {fps:.2f}")
    
    print(f"\n🎯 TARGET ACHIEVEMENT:")
    if metrics.box.map50 >= 0.60:
        print(f"   ✅ Target mAP@0.5 ≥ 0.60 ACHIEVED!")
        print(f"   🎉 Model ready for production use")
    else:
        print(f"   ⚠️  Target mAP@0.5 ≥ 0.60 NOT reached")
        print(f"   📈 Consider improvements below")
    
    print(f"\n💡 IMPROVEMENT SUGGESTIONS:")
    if metrics.box.map50 < 0.60:
        print(f"   🔹 Increase dataset size (current: test mode)")
        print(f"   🔹 Use larger model (yolov8s or yolov8m)")
        print(f"   🔹 Extend training epochs")
        print(f"   🔹 Adjust data augmentation")
    else:
        print(f"   🔹 Consider model optimization for speed")
        print(f"   🔹 Evaluate on more diverse test data")
        print(f"   🔹 Fine-tune confidence threshold")
        
else:
    print("\n❌ Evaluation incomplete - please ensure model training is completed")

print(f"\n📁 OUTPUT FILES:")
print(f"   📋 This report: scripts/04_eval_report.ipynb")
print(f"   🏆 Best model: {BEST_MODEL}")
print(f"   📊 Training plots: {RUNS_DIR}/*.png")
print(f"   📄 Detailed logs: {RUNS_DIR}/")

print(f"\n🔗 NEXT STEPS:")
print(f"   1️⃣  Test with custom images: python scripts/05_infer_cli.py")
print(f"   2️⃣  Deploy model for production use")
print(f"   3️⃣  Consider Google Colab for full-scale training")

print("="*60)