# RSNA Aneurysm Detection Training - exp0001

**実験概要**: ResNet50 ベースラインモデル

**実行環境**: 
- Google Colab (GPU)
- PyTorch with Mixed Precision
- W&B Experiment Tracking

In [None]:
# Google Colab環境セットアップ
import os
import sys

# GPU確認
!nvidia-smi

# リポジトリクローン（初回のみ）
repo_name = "DATASCIENCE-TEM"
repo_url = "https://github.com/YOUR_USERNAME/DATASCIENCE-TEM.git"

if not os.path.exists(f"/content/{repo_name}"):
    print(f"Cloning {repo_url}...")
    !git clone {repo_url}
else:
    print(f"Repository {repo_name} already exists, updating...")
    %cd /content/{repo_name}
    !git pull origin main

%cd /content/DATASCIENCE-TEM/kaggle-projects/rsna-aneurysm/experiments/exp0001

In [None]:
# 依存関係インストール
!pip install -r env/requirements.lock

In [None]:
# API認証設定
from google.colab import userdata

os.environ["WANDB_API_KEY"] = userdata.get("WANDB_API_KEY")
os.environ["KAGGLE_USERNAME"] = userdata.get("KAGGLE_USERNAME")
os.environ["KAGGLE_KEY"] = userdata.get("KAGGLE_KEY")

# Google Drive マウント（成果物保存用）
from google.colab import drive

drive.mount("/content/drive")

In [None]:
# Kaggle CLI 用の認証ファイル作成（環境変数から生成）
import os, json, pathlib, subprocess
kdir = pathlib.Path.home().joinpath('.kaggle')
kdir.mkdir(parents=True, exist_ok=True)
with open(kdir.joinpath('kaggle.json'), 'w') as f:
    f.write(json.dumps({'username': os.environ.get('KAGGLE_USERNAME', ''), 'key': os.environ.get('KAGGLE_KEY', '')}))
kjson = kdir.joinpath('kaggle.json')
try:
    subprocess.run(['chmod', '600', str(kjson)], check=False)
except Exception as e:
    print('chmod failed:', e)
print('Kaggle credentials prepared at ~/.kaggle/kaggle.json')


In [None]:
# データ準備（Kaggleダウンロード→メタ抽出→PNG変換→学習用メタ生成）
from pathlib import Path
import os, subprocess, shlex

# 作業パス確認（このノートは exp0001 直下で動かす想定）
print('CWD:', os.getcwd())

# ルート相対パス
RAW_DIR = Path('../../data/raw')
PROC_DIR = Path('../../data/processed')
RAW_DIR.mkdir(parents=True, exist_ok=True)
PROC_DIR.mkdir(parents=True, exist_ok=True)

# 1) Kaggle データダウンロード（存在すればスキップ）
if not (RAW_DIR / 'train.csv').exists():
    cmd = 'python ../../scripts/download_data.py --competition rsna-intracranial-aneurysm-detection --output ../../data/raw'
    subprocess.run(shlex.split(cmd), check=True)
else:
    print('Skip download: train.csv found')

# 2) DICOM メタデータ抽出（存在すればスキップ）
if not (PROC_DIR / 'train_metadata.csv').exists():
    cmd = 'python ../../scripts/dicom_utils.py extract-metadata --input ../../data/raw/train_images --output ../../data/processed/train_metadata.csv'
    subprocess.run(shlex.split(cmd), check=True)
else:
    print('Skip metadata extract: train_metadata.csv found')

# 3) DICOM → PNG 変換（PNGが未生成なら実行）
# 画像は processed 直下に <SOPInstanceUID>.png で出力（Datasetがそのまま参照）
if not any(PROC_DIR.glob('*.png')):
    cmd = 'python ../../scripts/dicom_utils.py convert-images --input ../../data/raw/train_images --output ../../data/processed --format png --target-size 512 512 --window-center 40 --window-width 80'
    subprocess.run(shlex.split(cmd), check=True)
else:
    print('Skip convert: PNG already exists in processed dir')

# 4) 学習用メタ生成（ノートブックは processed/train.csv を読む想定）
if not (PROC_DIR / 'train.csv').exists():
    cmd = 'python ../../scripts/create_metadata.py --train-csv ../../data/raw/train.csv --metadata ../../data/processed/train_metadata.csv --output ../../data/processed/train_processed.csv'
    subprocess.run(shlex.split(cmd), check=True)
    # cp の代わりにPythonでコピー
    import shutil
    shutil.copy2('../../data/processed/train_processed.csv', '../../data/processed/train.csv')
else:
    print('Skip create_metadata: processed/train.csv found')

print('Data preparation completed.')


In [None]:
# 必要ライブラリのインポート
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
import json
import warnings
from pathlib import Path
from datetime import datetime
import subprocess

# PyTorch関連
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.models as models
from torch.cuda.amp import autocast, GradScaler

# 画像処理
import cv2
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pydicom

# ML・評価指標
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
import wandb

warnings.filterwarnings("ignore")
plt.style.use("seaborn-v0_8")

In [None]:
# 設定読み込み
with open("config.yaml", "r") as f:
    cfg = yaml.safe_load(f)

print(f"Experiment ID: {cfg['experiment']['id']}")
print(f"Description: {cfg['experiment']['description']}")
print(f"Model: {cfg['model']['architecture']}")
print(f"Image Size: {cfg['data']['image_size']}")

In [None]:
# シード設定（再現性確保）
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = cfg["environment"]["deterministic"]
    torch.backends.cudnn.benchmark = not cfg["environment"]["deterministic"]


set_seed(cfg["environment"]["seed"])
device = torch.device(cfg["environment"]["device"])
print(f"Using device: {device}")

In [None]:
# Git SHA取得（バージョン管理）
def get_git_sha():
    try:
        return subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("ascii").strip()
    except:
        return "unknown"


git_sha = get_git_sha()
print(f"Git SHA: {git_sha}")

# Git SHA保存
with open("git_sha.txt", "w") as f:
    f.write(git_sha)

In [None]:
# データ読み込み
data_dir = Path(cfg["paths"]["data_dir"])
print(f"Data directory: {data_dir}")

# データファイルの存在確認
train_csv_path = data_dir / "processed" / "train.csv"
if not train_csv_path.exists():
    print(f"Warning: {train_csv_path} not found. Creating sample data for testing...")
    # サンプルデータ作成（実際のデータがない場合の対応）
    sample_data = {
        "image_id": [f"img_{i:04d}" for i in range(100)],
        "PatientID": [f"patient_{i // 10:03d}" for i in range(100)],
        cfg["data"]["target_column"]: np.random.choice([0, 1], 100, p=[0.7, 0.3]),
    }
    sample_df = pd.DataFrame(sample_data)

    # 保存ディレクトリ作成
    (data_dir / "processed").mkdir(parents=True, exist_ok=True)
    sample_df.to_csv(train_csv_path, index=False)
    print(f"Sample data saved to {train_csv_path}")

# データ読み込み
train_df = pd.read_csv(train_csv_path)
print(f"Training data shape: {train_df.shape}")
print(f"Target distribution:")
print(train_df[cfg["data"]["target_column"]].value_counts())

In [None]:
# CV分割作成
sgkf = StratifiedGroupKFold(n_splits=cfg["cv"]["n_folds"], shuffle=True, random_state=cfg["cv"]["seed"])

train_df["fold"] = -1
for fold, (train_idx, val_idx) in enumerate(
    sgkf.split(train_df, train_df[cfg["data"]["target_column"]], train_df[cfg["cv"]["group_column"]])
):
    train_df.loc[val_idx, "fold"] = fold

# CV分割保存
train_df[["PatientID", "fold", cfg["data"]["target_column"]]].to_csv("cv_folds.csv", index=False)
print("CV splits saved to cv_folds.csv")

# Fold分布確認
print("\nFold distribution:")
print(train_df.groupby("fold")[cfg["data"]["target_column"]].value_counts().unstack(fill_value=0))

In [None]:
# データ拡張設定
def get_transforms(mode="train"):
    if mode == "train":
        transform = A.Compose(
            [
                A.Resize(cfg["data"]["image_size"][0], cfg["data"]["image_size"][1]),
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.3),
                A.Rotate(limit=10, p=0.5),
                A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.5),
                A.CLAHE(p=0.3),
                A.Normalize(
                    mean=cfg["data"]["normalization"]["mean"], std=cfg["data"]["normalization"]["std"], max_pixel_value=255.0
                ),
                ToTensorV2(),
            ]
        )
    else:
        transform = A.Compose(
            [
                A.Resize(cfg["data"]["image_size"][0], cfg["data"]["image_size"][1]),
                A.Normalize(
                    mean=cfg["data"]["normalization"]["mean"], std=cfg["data"]["normalization"]["std"], max_pixel_value=255.0
                ),
                ToTensorV2(),
            ]
        )
    return transform

In [None]:
# Dataset定義
class AneurysmDataset(Dataset):
    def __init__(self, df, image_dir, transform=None, mode="train"):
        self.df = df.reset_index(drop=True)
        self.image_dir = Path(image_dir)
        self.transform = transform
        self.mode = mode

        # 画像ディレクトリの存在確認
        if not self.image_dir.exists():
            print(f"Warning: Image directory {self.image_dir} does not exist.")
            print("Creating dummy images for testing...")
            self.image_dir.mkdir(parents=True, exist_ok=True)
            self._create_dummy_images()

    def _create_dummy_images(self):
        """テスト用のダミー画像を作成"""
        for idx, row in self.df.iterrows():
            image_path = self.image_dir / f"{row['image_id']}.png"
            if not image_path.exists():
                # 512x512のランダム画像を作成
                dummy_image = np.random.randint(0, 256, (512, 512, 3), dtype=np.uint8)
                cv2.imwrite(str(image_path), dummy_image)
        print(f"Created {len(self.df)} dummy images")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # 画像読み込み
        image_path = self.image_dir / f"{row['image_id']}.png"

        try:
            image = cv2.imread(str(image_path))
            if image is None:
                raise FileNotFoundError(f"Could not load image: {image_path}")
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")
            # フォールバック：ランダム画像
            image = np.random.randint(0, 256, (512, 512, 3), dtype=np.uint8)

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented["image"]

        if self.mode != "test":
            label = torch.tensor(row[cfg["data"]["target_column"]], dtype=torch.float)
            return image, label
        else:
            return image


print("Dataset class defined")

In [None]:
# モデル定義
class AneurysmModel(nn.Module):
    def __init__(self, model_name="resnet50", num_classes=1, pretrained=True):
        super().__init__()

        # バックボーン
        if model_name == "resnet50":
            self.backbone = models.resnet50(pretrained=pretrained)
            in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()

        # 分類ヘッド
        self.classifier = nn.Sequential(
            nn.Dropout(cfg["model"]["dropout"]),
            nn.Linear(in_features, cfg["model"]["hidden_dim"]),
            nn.ReLU(),
            nn.Dropout(cfg["model"]["dropout"]),
            nn.Linear(cfg["model"]["hidden_dim"], num_classes),
        )

    def forward(self, x):
        features = self.backbone(x)
        output = self.classifier(features)
        return output


# モデル初期化テスト
model = AneurysmModel(
    model_name=cfg["model"]["architecture"], num_classes=cfg["model"]["num_classes"], pretrained=cfg["model"]["pretrained"]
)
model = model.to(device)
print(f"Model created: {cfg['model']['architecture']}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

In [None]:
# W&B初期化
run = wandb.init(
    project=cfg["logging"]["wandb"]["project"],
    name=cfg["logging"]["wandb"]["name"],
    config=cfg,
    tags=cfg["logging"]["wandb"]["tags"],
    notes=cfg["logging"]["wandb"]["notes"],
)

# W&B URL保存
wandb_url = f"https://wandb.ai/{wandb.run.entity}/{wandb.run.project}/runs/{wandb.run.id}"
with open("wandb_run.txt", "w") as f:
    f.write(f"URL: {wandb_url}\n")
    f.write(f"Run ID: {wandb.run.id}\n")

print(f"W&B run: {wandb_url}")

In [None]:
# 学習・評価関数
def train_epoch(model, dataloader, optimizer, criterion, scaler, device):
    model.train()
    total_loss = 0

    for batch_idx, (images, labels) in enumerate(dataloader):
        images = images.to(device)
        labels = labels.to(device).unsqueeze(1)

        optimizer.zero_grad()

        with autocast(enabled=cfg["environment"]["mixed_precision"]):
            outputs = model(images)
            loss = criterion(outputs, labels)

        if cfg["environment"]["mixed_precision"]:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        total_loss += loss.item()

        if batch_idx % 50 == 0:
            print(f"Batch {batch_idx}/{len(dataloader)}, Loss: {loss.item():.4f}")

    return total_loss / len(dataloader)


def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    predictions = []
    targets = []

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device).unsqueeze(1)

            with autocast(enabled=cfg["environment"]["mixed_precision"]):
                outputs = model(images)
                loss = criterion(outputs, labels)

            total_loss += loss.item()

            # 予測確率に変換
            probs = torch.sigmoid(outputs).cpu().numpy()
            predictions.extend(probs.flatten())
            targets.extend(labels.cpu().numpy().flatten())

    avg_loss = total_loss / len(dataloader)
    auc = roc_auc_score(targets, predictions)

    return avg_loss, auc, predictions, targets

In [None]:
# K-Fold Cross Validation
fold_scores = []
oof_predictions = np.zeros(len(train_df))
oof_targets = train_df[cfg["data"]["target_column"]].values

# モデル保存ディレクトリ作成
Path("model").mkdir(exist_ok=True)

for fold in range(cfg["cv"]["n_folds"]):
    print(f"\n{'=' * 50}")
    print(f"FOLD {fold + 1}/{cfg['cv']['n_folds']}")
    print(f"{'=' * 50}")

    # データ分割
    train_fold_df = train_df[train_df["fold"] != fold]
    valid_fold_df = train_df[train_df["fold"] == fold]

    print(f"Train: {len(train_fold_df)}, Valid: {len(valid_fold_df)}")

    # Dataset・DataLoader作成
    train_dataset = AneurysmDataset(train_fold_df, cfg["paths"]["processed_data"], transform=get_transforms("train"))
    valid_dataset = AneurysmDataset(valid_fold_df, cfg["paths"]["processed_data"], transform=get_transforms("valid"))

    train_loader = DataLoader(
        train_dataset,
        batch_size=cfg["train"]["batch_size"],
        shuffle=True,
        num_workers=2,  # Colabでは2に制限
        pin_memory=True,
    )
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=cfg["train"]["batch_size"],
        shuffle=False,
        num_workers=2,  # Colabでは2に制限
        pin_memory=True,
    )

    # モデル・最適化設定
    model = AneurysmModel(
        model_name=cfg["model"]["architecture"], num_classes=cfg["model"]["num_classes"], pretrained=cfg["model"]["pretrained"]
    ).to(device)

    # 最適化設定の修正
    optimizer_type = cfg["train"]["optimizer"]["type"]
    if optimizer_type == "AdamW":
        optimizer = optim.AdamW(
            model.parameters(), lr=cfg["train"]["optimizer"]["lr"], weight_decay=cfg["train"]["optimizer"]["weight_decay"]
        )
    else:
        # フォールバック
        optimizer = optim.Adam(
            model.parameters(), lr=cfg["train"]["optimizer"]["lr"], weight_decay=cfg["train"]["optimizer"]["weight_decay"]
        )

    criterion = nn.BCEWithLogitsLoss()
    scaler = GradScaler(enabled=cfg["environment"]["mixed_precision"])

    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, T_0=cfg["train"]["scheduler"]["T_0"], eta_min=cfg["train"]["scheduler"]["eta_min"]
    )

    # Early Stopping
    best_auc = 0
    patience_counter = 0

    # 学習ループ
    for epoch in range(cfg["train"]["epochs"]):
        train_loss = train_epoch(model, train_loader, optimizer, criterion, scaler, device)
        valid_loss, valid_auc, valid_preds, valid_targets = validate_epoch(model, valid_loader, criterion, device)

        scheduler.step()

        print(f"Epoch {epoch + 1}/{cfg['train']['epochs']}")
        print(f"Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid AUC: {valid_auc:.4f}")

        # W&Bログ
        wandb.log(
            {
                f"fold_{fold}_train_loss": train_loss,
                f"fold_{fold}_valid_loss": valid_loss,
                f"fold_{fold}_valid_auc": valid_auc,
                "epoch": epoch,
                "learning_rate": optimizer.param_groups[0]["lr"],
            }
        )

        # Best model保存
        if valid_auc > best_auc:
            best_auc = valid_auc
            patience_counter = 0
            torch.save(model.state_dict(), f"model/fold_{fold}_best.pth")
            best_preds = valid_preds.copy()
        else:
            patience_counter += 1

        # Early Stopping
        if patience_counter >= cfg["train"]["early_stopping"]["patience"]:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    # OOF予測保存
    valid_indices = valid_fold_df.index
    oof_predictions[valid_indices] = best_preds

    fold_scores.append(best_auc)
    print(f"Fold {fold + 1} Best AUC: {best_auc:.4f}")

    # メモリクリア
    del model, optimizer, train_loader, valid_loader
    torch.cuda.empty_cache()

# CV結果集計
cv_mean = np.mean(fold_scores)
cv_std = np.std(fold_scores)
oof_auc = roc_auc_score(oof_targets, oof_predictions)

print(f"\n{'=' * 50}")
print(f"CV RESULTS")
print(f"{'=' * 50}")
print(f"Fold Scores: {fold_scores}")
print(f"CV Mean: {cv_mean:.4f} ± {cv_std:.4f}")
print(f"OOF AUC: {oof_auc:.4f}")

# W&Bに最終結果ログ
wandb.log({"cv_mean_auc": cv_mean, "cv_std_auc": cv_std, "oof_auc": oof_auc})

In [None]:
# OOF結果保存
oof_df = pd.DataFrame({"index": train_df.index, "fold": train_df["fold"], "y_true": oof_targets, "y_pred": oof_predictions})
oof_df.to_csv("oof_predictions.csv", index=False)

# メトリクス保存
metrics = {
    "experiment_id": cfg["experiment"]["id"],
    "cv_mean_auc": float(cv_mean),
    "cv_std_auc": float(cv_std),
    "oof_auc": float(oof_auc),
    "fold_scores": [float(x) for x in fold_scores],
    "git_sha": git_sha,
    "wandb_url": wandb_url,
    "timestamp": datetime.now().isoformat(),
}

with open("metrics.json", "w") as f:
    json.dump(metrics, f, indent=2)

print("Results saved:")
print("- oof_predictions.csv")
print("- metrics.json")
print(f"- model/fold_*_best.pth ({cfg['cv']['n_folds']} files)")
print("- wandb_run.txt")
print("- git_sha.txt")

In [None]:
# Google Drive に成果物バックアップ
backup_dir = f"/content/drive/MyDrive/rsna-aneurysm/{cfg['experiment']['id']}"
!mkdir -p "{backup_dir}"
!cp oof_predictions.csv "{backup_dir}/"
!cp metrics.json "{backup_dir}/"
!cp -r model "{backup_dir}/"
!cp wandb_run.txt "{backup_dir}/"
!cp git_sha.txt "{backup_dir}/"

print(f"Results backed up to: {backup_dir}")

In [None]:
# W&B終了
wandb.finish()
print("Training completed!")