# PaDiM Training

- PaDiMモデルを単一カテゴリデータ(Normal)で学習します
- 推論には 「n_features」 が必要になります

### 0. CUDA Version 確認

In [None]:
import torch, platform
print("torch version :", torch.__version__)
print("cuda in torch :", torch.version.cuda)
print("cuda available:", torch.cuda.is_available())


## 1. 初期設定

- 処理する画像サイズ 「IMAGE_SIZE」 と、検出対象とする画像カテゴリ 「CATEGORY」を指定してください
- 作成された「RUN_DIR」に、学習済モデルが作成されます

In [2]:
from pathlib import Path
from datetime import datetime, timezone, timedelta
from collections import defaultdict
import json, yaml, torch, timm, gc, shutil
from anomalib.data import Folder
from anomalib.models import Padim
from anomalib.engine import Engine
from pytorch_lightning.loggers import TensorBoardLogger

# -------- ユーザ設定項目 --------
IMAGE_SIZE  = 256                              # 画像サイズ(★変更対象)
IMAGE_THRESHOLD  = 0.5                         # 異常検出閾値(★変更対象)
CATEGORY    = "VisA_pipe_fryum"                # 検出対象のカテゴリ(★変更対象)
DATA_ROOT   = Path("/workspace/data")          # データフォルダ(tarin/test)
# -------------------------------

JST = timezone(timedelta(hours=9))
timestamp = datetime.now(JST).strftime('%Y%m%d_%H%M%S')       # 実行時のシステム日時
OUTPUT_DIR  = Path("/workspace/models") / "PaDiM" / CATEGORY  # 学習済モデルの出力先
RUN_DIR   = OUTPUT_DIR / timestamp
(RUN_DIR / 'checkpoint').mkdir(parents=True, exist_ok=True)
(RUN_DIR / 'pytorch').mkdir(parents=True, exist_ok=True)
TEMP_DIR = RUN_DIR / "temp"
PARAM_DIR = RUN_DIR / "param"
LOG_DIR = RUN_DIR / "logs"

print('RUN DIR:', RUN_DIR)

# -------- DataModule定義 --------
def build_datamodule() -> Folder:
    return Folder(
        name=CATEGORY,
        root=DATA_ROOT,
        normal_dir=f"train/{CATEGORY}",             # 学習用正常データ
        abnormal_dir=f"test/{CATEGORY}/anomaly",    # テスト(評価)用異常データ (学習では使われない/Optuna探索では使われる)
        normal_test_dir=f"test/{CATEGORY}/normal",  # テスト(評価)用正常データ
        train_batch_size=16,         # default=32
        eval_batch_size=16,          # default=32
        extensions=(
            ".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp",
            ".JPG", ".JPEG", ".PNG", ".BMP", ".TIF", ".TIFF", ".WEBP",
        ),
    )

# -------- n_features_max 自動生成定義 --------
def _canon(layer_name: str) -> str:
    """layer2 → layer2 / blocks.2 → blocks.2 のように正規化"""
    return layer_name.strip()

def build_nfeat_table(backbones, layer_opts):
    """
    Returns:
        dict[(backbone, layers_tuple)] -> int  (sum of channels of chosen layers)
    """
    nfeat_tbl = {}
    for backbone in backbones:
        # timm の特徴抽出器
        model = timm.create_model(backbone, features_only=True, pretrained=True)
        # {モジュール名: 出力チャネル数} を作る
        chan_map = {fi["module"]: fi["num_chs"] for fi in model.feature_info}
        for layers in layer_opts:
            ch_sum = sum(chan_map[_canon(l)] for l in layers)
            nfeat_tbl[(backbone, layers)] = ch_sum      # ← PaDiM の上限値
    return nfeat_tbl

RUN DIR: /workspace/models/PaDiM/VisA_pipe_fryum/20250430_231424


## 2. ハイパーパラメータ探索 (Optuna)

- ここではハイパーパラメータの自動探索を行います
- PaDiM は、教師なし学習ですが、Optuna 探索は検証用に異常ラベルを必要とする教師ありプロセスと言えます
- 探索を行わずに手動でハイパーパラメータを調整する場合は、ここをスキップして「3. 学習」へ

In [None]:
import optuna, torch, yaml, types
from anomalib.models import Padim
from anomalib.engine import Engine

# -------- GPU利用可否 --------
GPU_OK = torch.cuda.is_available()

# -------- サーチスペース(★変更対象) --------
N_TRIALS   = 3                                # 試行回数
BACKBONES  = ["resnet18", "wide_resnet50_2"]  # バックボーンCNN(複数可)
LAYER_OPTS = [
    ("layer2",),
    # ("layer2", "layer3"),
    # ("layer1", "layer2", "layer3"),
]
N_FEAT_MAX = build_nfeat_table(BACKBONES, LAYER_OPTS)

# -------- Objective --------
def objective(trial: optuna.Trial):

    backbone = trial.suggest_categorical("backbone", BACKBONES)

    # layers は Tuple → Index にして渡す
    idx    = trial.suggest_categorical("layers_idx", list(range(len(LAYER_OPTS))))
    layers = LAYER_OPTS[idx]

    # n_features 上限値
    n_feat = N_FEAT_MAX[(backbone, layers)]

    # Data & Model
    dm  = build_datamodule()
    pre = Padim.configure_pre_processor(image_size=(IMAGE_SIZE, IMAGE_SIZE))
    model = Padim(
        backbone      = backbone,
        layers        = layers,
        n_features    = n_feat,
        pre_processor = pre,
    )

    # Engine (GPU/CPUフォールバック)
    used_gpu = None
    for use_gpu in (GPU_OK, False):
        try:
            engine = Engine(
                logger=False,
                default_root_dir=TEMP_DIR / "optuna",
                accelerator="gpu" if use_gpu else "cpu",
                max_epochs=1,      # 1 epoch 評価
                enable_progress_bar=False,
            )
            engine.fit(model=model, datamodule=dm)
            used_gpu = use_gpu
            break
        except RuntimeError as e:
            if "cudaGetDeviceCount" in str(e):
                print("⚠️ CUDA 初期化エラー → CPU で再試行")
                continue
            raise

    trial.set_user_attr("used_gpu", used_gpu)
    if not used_gpu:
        print(f"Trial {trial.number}: CPU で実行 (GPU fallback)")

    return engine.trainer.callback_metrics.get(
        "image_AUROC", torch.tensor(0.0)
    ).item()

# -------- 探索実行 --------
TEMP_DIR.mkdir(parents=True, exist_ok=True)
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=N_TRIALS)

print("BEST :", study.best_params)
print("AUROC:", study.best_value)

# -------- 結果保存 --------
PARAM_DIR.mkdir(exist_ok=True)
search_space = dict(
    backbone   = BACKBONES,
    layers     = LAYER_OPTS,
    n_features = "auto (<= channel sum)",
)
yaml.safe_dump(search_space, open(PARAM_DIR / "search_space.yaml", "w"))
best = study.best_params.copy()
best["layers"]      = LAYER_OPTS[best.pop("layers_idx")]
best["n_features"]  = N_FEAT_MAX[(best["backbone"], best["layers"])]
yaml.safe_dump(best, open(PARAM_DIR / "best_params.yaml", "w"))
study.trials_dataframe().to_csv(PARAM_DIR / "trials.csv", index=False)

# ---------- TEMP_DIR のクリーンアップ ----------
# Debugする場合は、コメントアウトしてください
import shutil, gc
if TEMP_DIR.exists():
    # Windows でハンドルが残ると削除に失敗することがあるので、念のため GC
    gc.collect()
    shutil.rmtree(TEMP_DIR, ignore_errors=True)


## 3. 学習

- 「2. ハイパーパラメータ探索(Optuna)」 を行った場合は、保存されたベストパラメータで学習します
- 自動探索を行わない行場合は、ハイパーパラメータ 「MANUAL_PARAMS」を手動で調整してください

In [3]:
from pytorch_lightning.loggers import TensorBoardLogger
from anomalib.models import Padim
from anomalib.engine import Engine
from anomalib.deploy import ExportType
import numpy as np, torch, yaml, types
import logging

# -------- GPU利用可否 --------
GPU_OK = torch.cuda.is_available()

# -------- 学習設定 --------
MAX_EPOCHS = 1   # 1回のみ実施(μ・Σの算出) ※固定

# -------- ★手動パラメータ（best_params.yaml が存在しない場合に利用） --------
MANUAL_PARAMS = dict(
    backbone       = "resnet18",
    layers         = ("layer2",),
    n_features     = None,
)

# ------- calculation n_features -------
def _canon(layer_name: str) -> str:
    return layer_name.strip()

def infer_n_features(backbone: str, layers: tuple) -> int:
    model = timm.create_model(backbone, features_only=True, pretrained=True)
    chan_map = {fi["module"]: fi["num_chs"] for fi in model.feature_info}
    return sum(chan_map[_canon(l)] for l in layers)

# -------- best_params.yaml 読み込み --------
best_params_path = PARAM_DIR / "best_params.yaml"

if best_params_path.exists():
    cfg = yaml.safe_load(open(best_params_path))
    print("▶ Using best_params.yaml:", cfg)
else:
    cfg = MANUAL_PARAMS.copy()
    print("▶ Using manual params:", cfg)

# -------- layers をタプル化 --------
raw_layers = cfg["layers"]
if isinstance(raw_layers, (list, tuple)):
    layers_tuple = tuple(raw_layers)
else:
    layers_tuple = eval(raw_layers)

# -------- n_features を確定 --------
if cfg.get("n_features") is not None:
    n_feat = cfg["n_features"]
elif best_params_path.exists():
    _bp        = yaml.safe_load(open(best_params_path))
    n_feat = int(_bp["n_features"])
else:
    n_feat = infer_n_features(cfg["backbone"], layers_tuple)

# -------- Model & DataModule --------
pre   = Padim.configure_pre_processor(image_size=(IMAGE_SIZE, IMAGE_SIZE))
model = Padim(
    backbone               = cfg["backbone"],
    layers                 = layers_tuple,
    n_features             = n_feat,
    pre_processor          = pre,
)
dm = build_datamodule()

# -------- Lightning学習 (GPU/CPUフォールバック) --------
tb_logger = TensorBoardLogger(save_dir=RUN_DIR / "logs", name="final")

used_gpu  = None
for use_gpu in (GPU_OK, False):
    try:
        engine = Engine(
            default_root_dir=TEMP_DIR / "train",
            accelerator        = "gpu" if use_gpu else "cpu",
            max_epochs         = MAX_EPOCHS,
            log_every_n_steps  = 1,
            enable_progress_bar= False,
            logger             = tb_logger,
        )
        engine.fit(model=model, datamodule=dm)
        used_gpu = use_gpu
        break
    except RuntimeError as e:
        if "cudaGetDeviceCount" in str(e):
            print("⚠️ CUDA 初期化エラー → CPU でリトライ")
            continue
        raise

if not used_gpu:
    print("⚠️ GPU 使用不可 → CPU で学習完了")


# -------- Stats(mu, inv_cov) 保存 --------
# PadimModel.MultiVariateGaussian 取得
gaussian = model.model.gaussian

# inv_covariance がなければ停止
if not hasattr(gaussian, "inv_covariance"):
    raise RuntimeError("❌ PadimModel.gaussian に 'inv_covariance' が定義されていません")

# 平均ベクトルと逆共分散行列を取り出し
mu      = gaussian.mean.cpu().numpy()             # shape: (n_features,)
inv_cov = gaussian.inv_covariance.cpu().numpy()   # shape: (n_features, n_features)

stats_path = RUN_DIR / "pytorch" / "stats.npz"
np.savez_compressed(stats_path, mu=mu, inv_cov=inv_cov)
print("✓ Stats(mu, inv_cov) :", stats_path)

# -------- Checkpoint 保存 --------
ckpt_path = RUN_DIR / "checkpoint" / "best.ckpt"
engine.trainer.save_checkpoint(ckpt_path)
print("✓ Checkpoint :", ckpt_path)

# -------- 推論用モデル .pth 保存 -------- 
state_path = RUN_DIR / "pytorch" / "model.pth"
torch.save(model.state_dict(), state_path)
print("✓ Weights :", state_path)

# ======== 学習データのみ推論して image_min, image_max を取得 ========
# 正規化のために推論処理へ連携する (別の方法 Z-Score(μ, σ)正規化もあり)
print(f"Computing image_min and image_max ...")

# Post-Processor の正規化だけオフにする
model.post_processor.enable_normalization = False

# DataModule を train のみでセットアップ
dm.setup("fit")                         # train フェーズ相当
predict_loader = dm.train_dataloader()  # train データを予測

# Engine.predict で生マップを取得
scores = []
logging.getLogger("anomalib.visualization.image.item_visualizer").setLevel(logging.ERROR)
for batch in engine.predict(model=model, datamodule=dm, return_predictions=True):
    # batch は list of ImagePrediction
    for item in batch:
        # 生の anomaly_map を最大値で画像ごとの raw スコアに
        raw = float(item.anomaly_map.max().cpu())
        scores.append(raw)

# 正規化機構を元に戻す
model.post_processor.enable_normalization = True

# image_min と image_max を計算
image_min = float(np.min(scores))
image_max = float(np.max(scores))
print(f"Computed image_min: {image_min}, image_max: {image_max}")
# ==================================================================

# ------- .pth に含まれないメタ情報を出力(json) -------
import json

meta_path = RUN_DIR / "pytorch" / "meta.json"
meta = {
    "backbone"            : cfg["backbone"],
    "layers"              : list(layers_tuple),
    "n_features"          : int(n_feat),
    "image_size"          : IMAGE_SIZE,
    "weights_pth"         : state_path.name,
    "stats"               : "stats.npz",
    "image_threshold"     : IMAGE_THRESHOLD,  # 手動設定
    "image_threshold_auto": IMAGE_THRESHOLD,  # Test後に上書き用(任意)
    "image_min"           : image_min,
    "image_max"           : image_max,
}

with open(meta_path, "w", encoding="utf-8") as f:
    json.dump(meta, f, indent=2, ensure_ascii=False)

print("✓ Meta JSON :", meta_path)

# ---------- TEMP_DIR のクリーンアップ ----------
# Debugする場合は、コメントアウトしてください。
import shutil, gc
if TEMP_DIR.exists():
    # Windows でハンドルが残ると削除に失敗することがあるので、念のため GC
    gc.collect()
    shutil.rmtree(TEMP_DIR, ignore_errors=True)


## 4. 検出性能テスト

In [None]:
from pathlib import Path
from anomalib.models import Padim
from anomalib.engine import Engine
import torch, pandas as pd, shutil
import logging 

# ---------- パス・デバイス ----------
state_path  = Path(RUN_DIR / "pytorch" / "model.pth")
result_root = RUN_DIR / "test_result"
device      = "cuda" if torch.cuda.is_available() else "cpu"
n_feat = cfg.get("n_features")

# ---------- モデル ----------
model = Padim(
    backbone      = cfg["backbone"],
    layers        = layers_tuple,
    n_features    = n_feat,
    pre_processor = pre,
).to(device)
model.load_state_dict(torch.load(state_path, map_location=device), strict=True)

# ---------- DataModule ----------
dm = build_datamodule(); dm.setup("test")

# ---------- テスト (Visualizer はデフォルトのまま有効) ----------
logging.getLogger("anomalib.visualization.image.item_visualizer").setLevel(logging.ERROR)
engine = Engine(
    accelerator="gpu" if device == "cuda" else "cpu",
    devices=1,
    enable_progress_bar=False,
    logger=False,
)
engine.test(model=model, datamodule=dm)

metrics = engine.trainer.callback_metrics.copy()
auroc   = float(metrics.get("image_AUROC", 0))
f1      = float(metrics.get("image_F1Score", 0))

# testで自動決定される image_threshold 
threshold = float(getattr(model.post_processor, "image_threshold", IMAGE_THRESHOLD))

# ---------- 可視化画像を result_root へ集約 ----------
# Visualizer は  results/PaDiM/<CATEGORY>/<timestamp>/images/*
default_root = Path("results") / "PaDiM" / CATEGORY
if default_root.exists():
    # 直近 (mtime が最大) の test_run を取得
    latest = max(default_root.iterdir(), key=lambda p: p.stat().st_mtime)
    src_images = latest / "images"                    # normal / anomaly
    dst_images = result_root / "images"
    for lbl_dir in src_images.glob("*"):              # normal & anomaly
        (dst_images / lbl_dir.name).mkdir(parents=True, exist_ok=True)
        for img in lbl_dir.glob("*"):
            shutil.copy2(img, dst_images / lbl_dir.name / img.name)

# ---------- 1枚ずつチェック ----------
def get_gt(item: "ImagePrediction") -> int:
    """0=normal, 1=anomaly を返す。属性が無ければパスで判定"""
    for key in ("label", "labels", "is_anomaly", "targets"):
        if hasattr(item, key):
            return int(getattr(item, key))

    # fallback
    parent = Path(item.image_path).parent.name.lower()
    return 1 if parent == "anomaly" else 0

records = []
for batch in engine.predict(model=model, datamodule=dm):
    for item in batch:
        file  = Path(item.image_path).name
        score = float(item.pred_score)
        pred  = "anomaly" if int(item.pred_label) == 1 else "normal"
        gt    = "anomaly" if get_gt(item) else "normal"
        print(f"{file:40s} | score={score:7.4f} | pred={pred:7s} | label={gt}")
        records.append(dict(file=file, score=score, pred=pred, label=gt))

# ---------- CSV 保存 ----------
result_root.mkdir(parents=True, exist_ok=True)
csv_path = result_root / "predictions.csv"
pd.DataFrame(records).to_csv(csv_path, index=False)
print(f"\n✓ predictions.csv saved to {csv_path}\n")

# ---------- meta.json 更新 (image_threshold_auto) ----------
meta_path = Path(RUN_DIR) / "pytorch" / "meta.json"
with open(meta_path, "r") as f:
    meta = json.load(f)
meta["image_threshold_auto"] = float(threshold)
with open(meta_path, "w") as f:
    json.dump(meta, f, indent=2, ensure_ascii=False)
print(f"✓ threshold_auto updated to {meta_path}")

# ---------- 指標 ----------
print("\n==========  EVALUATION  ==========")
print(f"Images tested : {len(dm.test_data)}")
print(f"AUROC         : {auroc:7.4f}")
print(f"Best F1       : {f1:7.4f}")
print("===================================\n")
