# RSNA Aneurysm Detection Training - exp0001

**実験概要**: ResNet50 ベースラインモデル

**実行環境**: 
- Google Colab (GPU)
- PyTorch with Mixed Precision
- W&B Experiment Tracking

In [None]:
# Google Colab環境セットアップ
import os
import sys

# GPU確認
!nvidia-smi

# リポジトリクローン（初回のみ）
repo_name = "DATASCIENCE-TEM"
repo_url = os.environ.get("REPO_URL", "https://github.com/YOUR_USERNAME/DATASCIENCE-TEM.git")

if not os.path.exists(f"/content/{repo_name}"):
    print(f"Cloning {repo_url}...")
    !git clone {repo_url}
    %cd /content/{repo_name}
else:
    print(f"Repository {repo_name} already exists, updating...")
    %cd /content/{repo_name}
    !git pull origin main || true

# 実験ディレクトリへ移動（常に明示）
%cd /content/DATASCIENCE-TEM/kaggle-projects/rsna-aneurysm/experiments/exp0001

In [None]:
# 依存関係インストール（Colab向け）
# - PyTorch は cu121 ホイールを明示（Colab の CUDA12 系に整合）
# - torch 以外の依存のみ個別に追加
!pip -q install --upgrade pip

# PyTorch 2.4.1 + cu121（torchvision/torchaudio も整合）
!pip -q install --index-url https://download.pytorch.org/whl/cu121 \
  torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1

# プロジェクト依存（torch 以外）
!pip -q install timm==1.0.16 albumentations==2.0.8 opencv-python-headless==4.10.0.84 \
  nibabel>=5.2.1 pydicom>=2.4.4 monai>=1.4.0 lightning==2.4.0 accelerate==1.8.1

In [None]:
# API認証設定
from google.colab import userdata

os.environ["WANDB_API_KEY"] = userdata.get("WANDB_API_KEY")
os.environ["KAGGLE_USERNAME"] = userdata.get("KAGGLE_USERNAME")
os.environ["KAGGLE_KEY"] = userdata.get("KAGGLE_KEY")

# Google Drive マウント（成果物保存用）
from google.colab import drive

drive.mount("/content/drive")

In [None]:
# Kaggle CLI 用の認証ファイル作成（環境変数から生成）
import os, json, pathlib, subprocess
kdir = pathlib.Path.home().joinpath('.kaggle')
kdir.mkdir(parents=True, exist_ok=True)
with open(kdir.joinpath('kaggle.json'), 'w') as f:
    f.write(json.dumps({'username': os.environ.get('KAGGLE_USERNAME', ''), 'key': os.environ.get('KAGGLE_KEY', '')}))
kjson = kdir.joinpath('kaggle.json')
try:
    subprocess.run(['chmod', '600', str(kjson)], check=False)
except Exception as e:
    print('chmod failed:', e)
print('Kaggle credentials prepared at ~/.kaggle/kaggle.json')


In [None]:
# データ準備（Kaggleダウンロード→メタ抽出→PNG変換→学習用メタ生成）
from pathlib import Path
import os, sys, subprocess

# 作業パス確認（このノートは exp0001 直下で動かす想定）
print('CWD:', os.getcwd())

# Safe path detection for different environments (fixes Google Colab IndexError)
current_dir = Path.cwd()
print(f"Current directory: {current_dir}")

if current_dir.name == 'content' or '/content' in str(current_dir):
    # Google Colab environment - navigate to project root
    print("Detected Google Colab environment")
    RSNA_DIR = current_dir / 'DATASCIENCE-TEM' / 'kaggle-projects' / 'rsna-aneurysm'
    if not RSNA_DIR.exists():
        # Alternative Colab path if cloned differently
        RSNA_DIR = current_dir / 'kaggle-projects' / 'rsna-aneurysm'
elif current_dir.name == 'exp0001':
    # Local development environment - go up two levels
    print("Detected local development environment")
    RSNA_DIR = current_dir.parents[1]
else:
    # Fallback with safe path handling
    print(f"Unknown environment, using fallback logic")
    try:
        RSNA_DIR = current_dir.parents[1] if len(current_dir.parents) > 1 else current_dir.parent
    except IndexError:
        # If parents[1] fails, assume we're in a restricted environment
        RSNA_DIR = current_dir / 'DATASCIENCE-TEM' / 'kaggle-projects' / 'rsna-aneurysm'
        if not RSNA_DIR.exists():
            RSNA_DIR = current_dir / 'kaggle-projects' / 'rsna-aneurysm'

print(f"RSNA project directory: {RSNA_DIR}")
print(f"Directory exists: {RSNA_DIR.exists()}")

# rsna-aneurysm ルートと絶対パスを定義
RAW_DIR = RSNA_DIR / 'data' / 'raw'
PROC_DIR = RSNA_DIR / 'data' / 'processed'
RAW_DIR.mkdir(parents=True, exist_ok=True)
PROC_DIR.mkdir(parents=True, exist_ok=True)


def run_module(mod: str, args: list[str], cwd: Path) -> None:
    """python -m 実行でstderrも表示して失敗理由を可視化"""
    cmd = [sys.executable, '-m', mod, *args]
    print('Running:', ' '.join(map(str, cmd)), '| cwd=', str(cwd))
    try:
        res = subprocess.run(cmd, cwd=str(cwd), check=True, capture_output=True, text=True)
        if res.stdout:
            print(res.stdout)
        if res.stderr:
            print(res.stderr)
    except subprocess.CalledProcessError as e:
        print('STDOUT:\n', e.stdout)
        print('STDERR:\n', e.stderr)
        raise

# 1) Kaggle データダウンロード（存在すればスキップ）
# 初期開発用: まずCSVファイルのみダウンロード（高速）
if not (RAW_DIR / 'train.csv').exists():
    print("Downloading essential CSV files first (fast download)...")
    run_module('scripts.download_data', [
        '--competition', 'rsna-intracranial-aneurysm-detection',
        '--output', str(RAW_DIR),
        '--files-only'  # CSVファイルのみダウンロード
    ], cwd=RSNA_DIR)
    
    # 画像データが必要な場合は以下のコメントを外す（時間がかかるため注意）
    # print("Downloading large image datasets (this may take 10+ minutes)...")
    # run_module('scripts.download_data', [
    #     '--competition', 'rsna-intracranial-aneurysm-detection', 
    #     '--output', str(RAW_DIR)
    # ], cwd=RSNA_DIR)
else:
    print('Skip download: train.csv found')

# 画像データが存在しない場合の警告表示
if not (RAW_DIR / 'train_images').exists() and not (RAW_DIR / 'train_images.zip').exists():
    print("\n" + "="*60)
    print("⚠️  IMAGE DATA NOT DOWNLOADED")
    print("="*60)
    print("Only CSV files were downloaded for fast development.")
    print("To download images, uncomment the lines above or run:")
    print(f"python -m scripts.download_data --competition rsna-intracranial-aneurysm-detection --output {RAW_DIR}")
    print("="*60 + "\n")

# 2) DICOM メタデータ抽出（画像データがある場合のみ）
if (RAW_DIR / 'train_images').exists() and not (PROC_DIR / 'train_metadata.csv').exists():
    run_module('scripts.dicom_utils', [
        'extract-metadata',
        '--input', str(RAW_DIR / 'train_images'),
        '--output', str(PROC_DIR / 'train_metadata.csv')
    ], cwd=RSNA_DIR)
elif not (RAW_DIR / 'train_images').exists():
    print('Skip metadata extract: train_images directory not found')
else:
    print('Skip metadata extract: train_metadata.csv found')

# 3) DICOM → PNG 変換（画像データがある場合のみ）
if (RAW_DIR / 'train_images').exists() and not any(PROC_DIR.glob('*.png')):
    print('Converting DICOM to PNG images...')
    run_module('scripts.dicom_utils', [
        'convert-images',
        '--input', str(RAW_DIR / 'train_images'),
        '--output', str(PROC_DIR),
        '--format', 'png', '--target-size', '512', '512', '--window-center', '40', '--window-width', '80'
    ], cwd=RSNA_DIR)
elif not (RAW_DIR / 'train_images').exists():
    print('Skip convert: train_images directory not found')
else:
    print('Skip convert: PNG already exists in processed dir')

# 4) 学習用メタ生成（train.csvがあれば実行）
if not (PROC_DIR / 'train.csv').exists():
    if (PROC_DIR / 'train_metadata.csv').exists():
        run_module('scripts.create_metadata', [
            '--train-csv', str(RAW_DIR / 'train.csv'),
            '--metadata', str(PROC_DIR / 'train_metadata.csv'),
            '--output', str(PROC_DIR / 'train_processed.csv')
        ], cwd=RSNA_DIR)
        # cp の代わりにPythonでコピー
        import shutil
        shutil.copy2(PROC_DIR / 'train_processed.csv', PROC_DIR / 'train.csv')
    else:
        print('Creating simple training metadata from train.csv only...')
        # メタデータなしで基本的な学習用データを作成
        import pandas as pd
        train_csv = pd.read_csv(RAW_DIR / 'train.csv')
        # 必要最小限の前処理
        train_csv.to_csv(PROC_DIR / 'train.csv', index=False)
else:
    print('Skip create_metadata: processed/train.csv found')

print('Data preparation completed.')

In [None]:
# AMP/compile ユーティリティ
import torch, os, random, numpy as np

# 乱数固定と matmul 精度（2.x 推奨）
def seed_everything(seed: int = 42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.set_float32_matmul_precision("medium")

# autocast の dtype を自動選択（bf16優先）
from torch.amp import autocast

def autocast_kwargs():
    if torch.cuda.is_available():
        use_bf16 = getattr(torch.cuda, "is_bf16_supported", lambda: False)()
        return dict(enabled=True, device_type="cuda",
                    dtype=(torch.bfloat16 if use_bf16 else torch.float16))
    return dict(enabled=False, device_type="cpu")

# torch.compile を環境変数でON/OFF

def maybe_compile(model):
    if os.getenv("USE_COMPILE", "0") == "1" and hasattr(torch, "compile"):
        try:
            return torch.compile(model, mode="max-autotune")
        except Exception as e:
            print(f"[warn] torch.compile disabled: {e}")
    return model


In [None]:
# 必要ライブラリのインポート
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
import json
import warnings
from pathlib import Path
from datetime import datetime
import subprocess
import os

# PyTorch関連
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.models as models
from torch.amp import autocast
from torch.cuda.amp import GradScaler

# 画像処理
import cv2
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pydicom

# ML・評価指標
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
import wandb

warnings.filterwarnings("ignore")
plt.style.use("seaborn-v0_8")

In [None]:
# 設定読み込み
with open("config.yaml", "r") as f:
    cfg = yaml.safe_load(f)

print(f"Experiment ID: {cfg['experiment']['id']}")
print(f"Description: {cfg['experiment']['description']}")
print(f"Model: {cfg['model']['architecture']}")
print(f"Image Size: {cfg['data']['image_size']}")

In [None]:
# シード設定（再現性確保）
def set_seed(seed):
    seed_everything(seed)
    torch.backends.cudnn.deterministic = cfg["environment"]["deterministic"]
    torch.backends.cudnn.benchmark = not cfg["environment"]["deterministic"]


set_seed(cfg["environment"]["seed"])
device = torch.device(cfg["environment"]["device"])
print(f"Using device: {device}")

In [None]:
# Git SHA取得（バージョン管理）
def get_git_sha():
    try:
        return subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("ascii").strip()
    except:
        return "unknown"


git_sha = get_git_sha()
print(f"Git SHA: {git_sha}")

# Git SHA保存
with open("git_sha.txt", "w") as f:
    f.write(git_sha)

In [None]:
# データ読み込み
data_dir = Path(cfg["paths"]["data_dir"])
print(f"Data directory: {data_dir}")

# データファイルの存在確認
train_csv_path = data_dir / "processed" / "train.csv"
if not train_csv_path.exists():
    print(f"Warning: {train_csv_path} not found. Creating sample data for testing...")
    # サンプルデータ作成（14ラベル: present + 13 loc_*）
    n = 100
    sample = {
        "image_id": [f"img_{i:04d}" for i in range(n)],
        "PatientID": [f"patient_{i // 10:03d}" for i in range(n)],
        "present": np.random.choice([0, 1], n, p=[0.7, 0.3]),
    }
    for j in range(13):
        sample[f"loc_{j+1:02d}"] = np.random.choice([0, 1], n, p=[0.9, 0.1])
    sample_df = pd.DataFrame(sample)

    # 保存ディレクトリ作成
    (data_dir / "processed").mkdir(parents=True, exist_ok=True)
    sample_df.to_csv(train_csv_path, index=False)
    print(f"Sample data saved to {train_csv_path}")

# データ読み込み
train_df = pd.read_csv(train_csv_path)
print(f"Training data shape: {train_df.shape}")
if cfg["data"]["target_column"] in train_df.columns:
    print("Target distribution (single target):")
    print(train_df[cfg["data"]["target_column"]].value_counts())
elif "present" in train_df.columns:
    print("Target distribution (present):")
    print(train_df["present"].value_counts())
else:
    print("Target columns will be inferred later.")

In [None]:
# ==== CV: present で層化、study_id/PatientID でグループ ====
from sklearn.model_selection import StratifiedGroupKFold
import re

# present 列を簡易検出（後段の詳細検出セルが未実行でも動くように）
if 'present_col' not in locals():
    pres = [c for c in train_df.columns if re.fullmatch(r"(aneurysm_)?present", c, flags=re.I)]
    if not pres:
        pres = [c for c in train_df.columns if c.lower() in {"present","aneurysm_present","target"}]
    assert len(pres) >= 1, "present 列が見つかりません（train.csv を確認してください）"
    present_col = pres[0]

group_col = "study_id" if "study_id" in train_df.columns else ("PatientID" if "PatientID" in train_df.columns else cfg["cv"]["group_column"])
y_strat = train_df[present_col].astype(int)

sgkf = StratifiedGroupKFold(n_splits=cfg["cv"]["n_folds"], shuffle=True, random_state=cfg["cv"]["seed"])
train_df["fold"] = -1
for fold, (_, val_idx) in enumerate(sgkf.split(train_df, y_strat, groups=train_df[group_col])):
    train_df.loc[val_idx, "fold"] = fold

# 進捗確認
print(train_df.groupby("fold")[present_col].value_counts().unstack(fill_value=0))
train_df[[group_col, "fold", present_col]].to_csv("cv_folds.csv", index=False)
print("CV splits saved to cv_folds.csv")

In [None]:
# ==== 14ラベル（present + 13 locations）を自動検出 ====
import re

def infer_label_columns(df):
    pres = [c for c in df.columns if re.fullmatch(r"(aneurysm_)?present", c, flags=re.I)]
    if not pres:
        pres = [c for c in df.columns if c.lower() in {"present","aneurysm_present","target"}]
    assert len(pres) >= 1, "present 列が見つかりません（train.csv を確認してください）"
    present_col = pres[0]

    binary_cols = []
    for c in df.columns:
        if c == present_col:
            continue
        s = df[c].dropna()
        if len(s) == 0:
            continue
        try:
            u = set(pd.Series(s).astype(float).round().astype(int).unique().tolist())
        except Exception:
            continue
        if u.issubset({0,1}):
            binary_cols.append(c)

    prefer = [c for c in binary_cols if re.search(r"(loc|location|artery|aca|mca|ica|pcom|acom|basilar|pca|sca|va|cavernous)", c, flags=re.I)]
    loc_cols = prefer if len(prefer) >= 13 else binary_cols
    assert len(loc_cols) >= 13, f"部位ラベルが不足（検出 {len(loc_cols)} 列）。train.csv の列名を確認してください。"
    loc_cols = loc_cols[:13]

    return present_col, loc_cols

present_col, loc_cols = infer_label_columns(train_df)
label_cols = [present_col] + loc_cols
num_classes = len(label_cols)
print("Detected label columns:", label_cols)


In [None]:
# データ拡張設定
def get_transforms(mode="train"):
    if mode == "train":
        transform = A.Compose(
            [
                A.Resize(cfg["data"]["image_size"][0], cfg["data"]["image_size"][1]),
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.3),
                A.Rotate(limit=10, p=0.5),
                A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.5),
                A.CLAHE(p=0.3),
                A.Normalize(
                    mean=cfg["data"]["normalization"]["mean"], std=cfg["data"]["normalization"]["std"], max_pixel_value=255.0
                ),
                ToTensorV2(),
            ]
        )
    else:
        transform = A.Compose(
            [
                A.Resize(cfg["data"]["image_size"][0], cfg["data"]["image_size"][1]),
                A.Normalize(
                    mean=cfg["data"]["normalization"]["mean"], std=cfg["data"]["normalization"]["std"], max_pixel_value=255.0
                ),
                ToTensorV2(),
            ]
        )
    return transform

In [None]:
# Dataset定義
class AneurysmDataset(Dataset):
    def __init__(self, df, image_dir, transform=None, mode="train", label_cols=None):
        self.df = df.reset_index(drop=True)
        self.image_dir = Path(image_dir)
        self.transform = transform
        self.mode = mode
        self.label_cols = label_cols or []

        if not self.image_dir.exists():
            print(f"Warning: Image directory {self.image_dir} does not exist. Creating dummy images for testing...")
            self.image_dir.mkdir(parents=True, exist_ok=True)
            self._create_dummy_images()

    def _create_dummy_images(self):
        for idx, row in self.df.iterrows():
            image_path = self.image_dir / f"{row['image_id']}.png"
            if not image_path.exists():
                dummy_image = np.random.randint(0, 256, (512, 512, 3), dtype=np.uint8)
                cv2.imwrite(str(image_path), dummy_image)
        print(f"Created {len(self.df)} dummy images")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = self.image_dir / f"{row['image_id']}.png"

        image = cv2.imread(str(image_path))
        if image is None:
            image = np.random.randint(0, 256, (512, 512, 3), dtype=np.uint8)
        else:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image=image)["image"]

        if self.mode != "test":
            label = torch.tensor(row[self.label_cols].values.astype("float32"))
            return image, label
        else:
            return image


print("Dataset class defined")

In [None]:
# モデル定義
from torchvision.models import resnet50, ResNet50_Weights

class AneurysmModel(nn.Module):
    def __init__(self, model_name="resnet50", num_classes=14, pretrained=True):
        super().__init__()
        if model_name == "resnet50":
            self.backbone = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2 if pretrained else None)
            in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
        else:
            raise ValueError("Only resnet50 is implemented in this baseline.")

        self.classifier = nn.Sequential(
            nn.Dropout(cfg["model"]["dropout"]),
            nn.Linear(in_features, cfg["model"]["hidden_dim"]),
            nn.ReLU(),
            nn.Dropout(cfg["model"]["dropout"]),
            nn.Linear(cfg["model"]["hidden_dim"], num_classes),
        )

    def forward(self, x):
        feat = self.backbone(x)
        return self.classifier(feat)


# モデル初期化テスト
model = AneurysmModel(
    model_name=cfg["model"]["architecture"], num_classes=num_classes, pretrained=cfg["model"]["pretrained"]
)
model = model.to(device)
model = maybe_compile(model)
print(f"Model created: {cfg['model']['architecture']}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

In [None]:
# W&B初期化
run = wandb.init(
    project=cfg["logging"]["wandb"]["project"],
    name=cfg["logging"]["wandb"]["name"],
    config=cfg,
    tags=cfg["logging"]["wandb"]["tags"],
    notes=cfg["logging"]["wandb"]["notes"],
)

# W&B URL保存
wandb_url = f"https://wandb.ai/{wandb.run.entity}/{wandb.run.project}/runs/{wandb.run.id}"
with open("wandb_run.txt", "w") as f:
    f.write(f"URL: {wandb_url}\n")
    f.write(f"Run ID: {wandb.run.id}\n")

print(f"W&B run: {wandb_url}")

In [None]:
# 学習・評価関数（14ラベル対応, weighted AUC）
def train_epoch(model, dataloader, optimizer, criterion, scaler, device):
    model.train()
    total_loss = 0
    for batch_idx, (images, labels) in enumerate(dataloader):
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        with autocast(**autocast_kwargs()):
            outputs = model(images)
            loss = criterion(outputs, labels)
        if cfg["environment"]["mixed_precision"]:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()
        total_loss += loss.item()
        if batch_idx % 50 == 0:
            print(f"Batch {batch_idx}/{len(dataloader)}, Loss: {loss.item():.4f}")
    return total_loss / len(dataloader)


def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    preds_all, targs_all = [], []
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)
            with autocast(**autocast_kwargs()):
                outputs = model(images)
                loss = criterion(outputs, labels)
            total_loss += loss.item()
            preds_all.append(torch.sigmoid(outputs).cpu().numpy())
            targs_all.append(labels.cpu().numpy())
    avg_loss = total_loss / len(dataloader)
    preds = np.concatenate(preds_all, axis=0)
    targs = np.concatenate(targs_all, axis=0)
    aucs = []
    for j in range(preds.shape[1]):
        y_true = targs[:, j]
        y_pred = preds[:, j]
        if len(np.unique(y_true)) < 2:
            aucs.append(np.nan)
        else:
            aucs.append(roc_auc_score(y_true, y_pred))
    aucs = np.array(aucs, dtype=float)
    weights = np.array([13] + [1]*13, dtype=float)[: preds.shape[1]]
    valid = ~np.isnan(aucs)
    weighted_auc = np.average(aucs[valid], weights=weights[valid]) if valid.any() else np.nan
    return avg_loss, float(weighted_auc), preds, targs

In [None]:
# K-Fold Cross Validation（14ラベル対応）
fold_scores = []
oof_predictions = np.zeros((len(train_df), num_classes), dtype=np.float32)
oof_targets = train_df[label_cols].values.astype("float32")

# モデル保存ディレクトリ作成
Path("model").mkdir(exist_ok=True)

for fold in range(cfg["cv"]["n_folds"]):
    print(f"\n{'=' * 50}")
    print(f"FOLD {fold + 1}/{cfg['cv']['n_folds']}")
    print(f"{'=' * 50}")

    # データ分割
    train_fold_df = train_df[train_df["fold"] != fold]
    valid_fold_df = train_df[train_df["fold"] == fold]

    print(f"Train: {len(train_fold_df)}, Valid: {len(valid_fold_df)}")

    # Dataset・DataLoader作成（label_cols を渡す）
    train_dataset = AneurysmDataset(train_fold_df, cfg["paths"]["processed_data"], transform=get_transforms("train"), label_cols=label_cols)
    valid_dataset = AneurysmDataset(valid_fold_df, cfg["paths"]["processed_data"], transform=get_transforms("valid"), label_cols=label_cols)

    # DataLoader: Colab向けに調整
    num_workers = min(4, os.cpu_count() or 2)
    use_pin = torch.cuda.is_available()
    use_persist = num_workers > 0

    train_loader = DataLoader(
        train_dataset,
        batch_size=cfg["train"]["batch_size"],
        shuffle=True,
        num_workers=num_workers,
        pin_memory=use_pin,
        persistent_workers=use_persist,
    )
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=cfg["train"]["batch_size"],
        shuffle=False,
        num_workers=num_workers,
        pin_memory=use_pin,
        persistent_workers=use_persist,
    )

    # モデル・最適化設定
    model = AneurysmModel(
        model_name=cfg["model"]["architecture"], num_classes=num_classes, pretrained=cfg["model"]["pretrained"]
    ).to(device)
    model = maybe_compile(model)

    optimizer_type = cfg["train"]["optimizer"]["type"]
    if optimizer_type == "AdamW":
        optimizer = optim.AdamW(
            model.parameters(), lr=cfg["train"]["optimizer"]["lr"], weight_decay=cfg["train"]["optimizer"]["weight_decay"]
        )
    else:
        optimizer = optim.Adam(
            model.parameters(), lr=cfg["train"]["optimizer"]["lr"], weight_decay=cfg["train"]["optimizer"]["weight_decay"]
        )

    criterion = nn.BCEWithLogitsLoss()
    scaler = GradScaler(enabled=cfg["environment"]["mixed_precision"])

    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, T_0=cfg["train"]["scheduler"]["T_0"], eta_min=cfg["train"]["scheduler"]["eta_min"]
    )

    # Early Stopping
    best_auc = 0.0
    patience_counter = 0

    # 学習ループ
    for epoch in range(cfg["train"]["epochs"]):
        train_loss = train_epoch(model, train_loader, optimizer, criterion, scaler, device)
        valid_loss, valid_auc, valid_preds, valid_targets = validate_epoch(model, valid_loader, criterion, device)

        scheduler.step()

        print(f"Epoch {epoch + 1}/{cfg['train']['epochs']}")
        print(f"Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid AUC: {valid_auc:.4f}")

        wandb.log(
            {
                f"fold_{fold}_train_loss": train_loss,
                f"fold_{fold}_valid_loss": valid_loss,
                f"fold_{fold}_valid_auc": valid_auc,
                "epoch": epoch,
                "learning_rate": optimizer.param_groups[0]["lr"],
            }
        )

        if valid_auc > best_auc:
            best_auc = valid_auc
            patience_counter = 0
            torch.save(model.state_dict(), f"model/fold_{fold}_best.pth")
            best_preds = valid_preds.copy()
        else:
            patience_counter += 1

        if patience_counter >= cfg["train"]["early_stopping"]["patience"]:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    # OOF予測保存（行列）
    valid_indices = valid_fold_df.index
    oof_predictions[valid_indices] = best_preds

    fold_scores.append(best_auc)
    print(f"Fold {fold + 1} Best AUC: {best_auc:.4f}")

    # メモリクリア
    del model, optimizer, train_loader, valid_loader
    torch.cuda.empty_cache()

# CV結果集計
cv_mean = np.mean(fold_scores)
cv_std = np.std(fold_scores)

# OOF weighted AUC（列ごとAUCの重み付き平均）
aucs = []
for j in range(oof_predictions.shape[1]):
    y_true = oof_targets[:, j]
    y_pred = oof_predictions[:, j]
    if len(np.unique(y_true)) < 2:
        aucs.append(np.nan)
    else:
        aucs.append(roc_auc_score(y_true, y_pred))
aucs = np.array(aucs, dtype=float)
weights = np.array([13] + [1]*13, dtype=float)[: oof_predictions.shape[1]]
valid = ~np.isnan(aucs)
oof_auc = np.average(aucs[valid], weights=weights[valid]) if valid.any() else np.nan

print(f"\n{'=' * 50}")
print(f"CV RESULTS")
print(f"{'=' * 50}")
print(f"Fold Scores: {fold_scores}")
print(f"CV Mean: {cv_mean:.4f} ± {cv_std:.4f}")
print(f"OOF AUC (weighted): {oof_auc:.4f}")

wandb.log({"cv_mean_auc": cv_mean, "cv_std_auc": cv_std, "oof_auc": float(oof_auc)})

In [None]:
# OOF結果保存（14ラベル、列名付き）
oof_df = pd.DataFrame({"index": train_df.index, "fold": train_df["fold"]})
for i, c in enumerate(label_cols):
    oof_df[f"y_true_{c}"] = oof_targets[:, i]
    oof_df[f"y_pred_{c}"] = oof_predictions[:, i]
oof_df.to_csv("oof_predictions.csv", index=False)

# メトリクス保存
metrics = {
    "experiment_id": cfg["experiment"]["id"],
    "cv_mean_auc": float(cv_mean),
    "cv_std_auc": float(cv_std),
    "oof_auc": float(oof_auc),
    "fold_scores": [float(x) for x in fold_scores],
    "git_sha": git_sha,
    "wandb_url": wandb_url,
    "timestamp": datetime.now().isoformat(),
}

with open("metrics.json", "w") as f:
    json.dump(metrics, f, indent=2)

print("Results saved:")
print("- oof_predictions.csv")
print("- metrics.json")
print(f"- model/fold_*_best.pth ({cfg['cv']['n_folds']} files)")
print("- wandb_run.txt")
print("- git_sha.txt")

In [None]:
# Google Drive に成果物バックアップ
backup_dir = f"/content/drive/MyDrive/rsna-aneurysm/{cfg['experiment']['id']}"
!mkdir -p "{backup_dir}"
!cp oof_predictions.csv "{backup_dir}/"
!cp metrics.json "{backup_dir}/"
!cp -r model "{backup_dir}/"
!cp wandb_run.txt "{backup_dir}/"
!cp git_sha.txt "{backup_dir}/"

print(f"Results backed up to: {backup_dir}")

In [None]:
# W&B終了
wandb.finish()
print("Training completed!")