In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json
import warnings
import sys
import time
warnings.filterwarnings('ignore')

from tqdm import tqdm
from pathlib import Path
from collections import Counter, defaultdict
from PIL import Image

In [2]:
try:
    import google.colab
    from google.colab import drive
    !uv pip install anomalib
    !uv pip install open-clip-torch
    drive.mount('/content/drive', force_remount=True)
    PROJECT_ROOT = Path('/content/drive/Othercomputers/my_notebook/lion_final_pro_multimodal-anomaly-report-generation') # Î≥∏Ïù∏ Í≤ΩÎ°ú ÏàòÏ†ï: Mac/Window
except ImportError:
    PROJECT_ROOT = Path.cwd().parents[1]

os.chdir(PROJECT_ROOT) # ÌòÑÏû¨ Í≤ΩÎ°ú ÏàòÏ†ï
print(f"Current working directory: {os.getcwd()}")

[2mUsing Python 3.12.12 environment at: /usr[0m
[2K[2mResolved [1m94 packages[0m [2min 1.47s[0m[0m
[2K[2mPrepared [1m12 packages[0m [2min 534ms[0m[0m
[2K[2mInstalled [1m12 packages[0m [2min 16ms[0m[0m
 [32m+[39m [1manomalib[0m[2m==2.2.0[0m
 [32m+[39m [1mfreia[0m[2m==0.2[0m
 [32m+[39m [1mimagecodecs[0m[2m==2026.1.14[0m
 [32m+[39m [1mjsonargparse[0m[2m==4.46.0[0m
 [32m+[39m [1mkornia[0m[2m==0.8.2[0m
 [32m+[39m [1mkornia-rs[0m[2m==0.1.10[0m
 [32m+[39m [1mlightning[0m[2m==2.6.1[0m
 [32m+[39m [1mlightning-utilities[0m[2m==0.15.2[0m
 [32m+[39m [1mpytorch-lightning[0m[2m==2.6.1[0m
 [32m+[39m [1mrich-argparse[0m[2m==1.7.2[0m
 [32m+[39m [1mtorchmetrics[0m[2m==1.8.2[0m
 [32m+[39m [1mtypeshed-client[0m[2m==2.8.2[0m
[2mUsing Python 3.12.12 environment at: /usr[0m
[2K[2mResolved [1m49 packages[0m [2min 212ms[0m[0m
[2K[2mPrepared [1m2 packages[0m [2min 107ms[0m[0m
[2K[2mInstalled [1m2 pa

In [3]:
# TODO: PATH ÏÑ§Ï†ï

# dataset path
DATA_ROOT = PROJECT_ROOT / "dataset" / "MMAD"
DOMAIN_JSON = DATA_ROOT / "domain_knowledge.json"
MMAD_JSON = DATA_ROOT / "mmad.json"
META_CSV = DATA_ROOT / "metadata.csv"

# config.yaml path
CONFIG_ROOT = PROJECT_ROOT / "configs"
RUNTIME_CONFIG_ROOT = CONFIG_ROOT / "runtime.yaml"
EVAL_CONFIG_ROOT = CONFIG_ROOT / "eval.yaml"

# output path
OUTPUT_ROOT = PROJECT_ROOT / "output"

# check (ÏÑ†ÌÉùÏÇ¨Ìï≠)
# print(f"Project Root: {PROJECT_ROOT}")
# print(f"Data Root: {DATA_ROOT}")
# print(f"Config Root: {CONFIG_ROOT}")

### MVTecAD Dataset

In [13]:
import os
import pandas as pd
from pathlib import Path
from anomalib.data import Folder
from IPython.display import display

# 1. Í≤ΩÎ°ú ÏÑ§Ï†ï
PROJECT_ROOT = Path('/content/drive/Othercomputers/my_notebook/lion_final_pro_multimodal-anomaly-report-generation')
base_data_path = PROJECT_ROOT / "dataset" / "MMAD"
DATASET_NAMES = ["GoodsAD", "MVTec-AD", "MVTec-LOCO", "VisA"]

def verify_all_datasets():
    if not base_data_path.exists():
        print(f"‚ùå ÏóêÎü¨: {base_data_path} Í≤ΩÎ°úÍ∞Ä Ï°¥Ïû¨ÌïòÏßÄ ÏïäÏäµÎãàÎã§.")
        return

    all_results = []

    print(f"üöÄ [Anomalib 2.2.0] Îç∞Ïù¥ÌÑ∞ÏÖã Í≤ÄÏ¶ù ÏãúÏûë")
    print(f"üìç Í≤ΩÎ°ú: {base_data_path.resolve()}")
    print("-" * 70)

    for ds_name in DATASET_NAMES:
        ds_path = base_data_path / ds_name
        if not ds_path.exists():
            print(f"‚ö†Ô∏è  [SKIP] {ds_name} Ìè¥ÎçîÍ∞Ä ÏóÜÏäµÎãàÎã§.")
            continue

        categories = sorted([f.name for f in ds_path.iterdir() if f.is_dir()])

        for cat in categories:
            cat_path = ds_path / cat

            try:
                # -------------------------------------------------------
                # [Anomalib 2.2.0 ÌïµÏã¨ ÏàòÏ†ï ÏÇ¨Ìï≠]
                # 1. validation_split_ratio -> split_ratioÎ°ú ÌÜµÌï© Í¥ÄÎ¶¨ÎêòÎäî Í≤ΩÏö∞Í∞Ä ÎßéÏùå
                # 2. test_split_mode -> Ïù∏Ïûê Ïù¥Î¶Ñ ÌôïÏù∏ (v2.2Îäî test_split_mode ÏßÄÏõê)
                # 3. extensions -> Î¶¨Ïä§Ìä∏ ÌòïÌÉúÎ°ú Ï†ÑÎã¨
                # -------------------------------------------------------
                datamodule = Folder(
                    name=cat,
                    root=str(cat_path),
                    normal_dir="train/good",
                    test_split_mode="from_dir",
                    extensions=[".jpg", ".jpeg", ".png", ".JPG", ".PNG"],
                    train_batch_size=1,
                )

                datamodule.setup()

                # Îç∞Ïù¥ÌÑ∞ Ïû•Ïàò Í≥ÑÏÇ∞ (train_data ÎÇ¥Ïóê valÏù¥ Ìè¨Ìï®ÎêòÏñ¥ ÏûàÏùÑ Ïàò ÏûàÏùå)
                train_num = len(datamodule.train_data) if datamodule.train_data is not None else 0
                val_num = len(datamodule.val_data) if datamodule.val_data is not None else 0
                test_num = len(datamodule.test_data) if datamodule.test_data is not None else 0

                all_results.append({
                    "Dataset": ds_name,
                    "Class": cat,
                    "Train(ÌïôÏäµ)": train_num,
                    "Val(Í≤ÄÏ¶ù)": val_num,
                    "Total_Good": train_num + val_num,
                    "Test(ÌèâÍ∞Ä)": test_num,
                    "Status": "‚úÖ OK"
                })

            except Exception as e:
                # ÏóêÎü¨ Î∞úÏÉù Ïãú Ïñ¥Îñ§ Ïù∏ÏûêÍ∞Ä Î¨∏Ï†úÏù∏ÏßÄ Ï†ïÌôïÌûà Î≥¥Í∏∞ ÏúÑÌï¥ ÏóêÎü¨ Î©îÏãúÏßÄ Î≥¥Ï°¥
                all_results.append({
                    "Dataset": ds_name,
                    "Class": cat,
                    "Train(ÌïôÏäµ)": 0, "Val(Í≤ÄÏ¶ù)": 0, "Total_Good": 0, "Test(ÌèâÍ∞Ä)": 0,
                    "Status": f"‚ùå Error: {str(e)}"
                })

    if all_results:
        df = pd.DataFrame(all_results)
        print("\nüìä Îç∞Ïù¥ÌÑ∞ÏÖã Î°úÎìú Í≤∞Í≥º ÏöîÏïΩ")
        display(df)

        print(f"\n‚úÖ Í≤ÄÏ¶ù ÏôÑÎ£å: Ï¥ù {len(df)}Í∞ú ÌÅ¥ÎûòÏä§ ÌÉêÏÉâ")
        print(f"üìà Ï†ïÏÉÅ Î°úÎìúÎêú Ï¥ù ÌïôÏäµ Í∞ÄÎä• Ïù¥ÎØ∏ÏßÄ: {df['Total_Good'].sum()} Ïû•")

        errors = df[df['Status'].str.contains("‚ùå")]
        if not errors.empty:
            print("\nüö® ÏóêÎü¨ Î∞úÏÉù ÌÅ¥ÎûòÏä§ ÏÉÅÏÑ∏:")
            display(errors)
    else:
        print("‚ùå Í≤ÄÏÉâÎêú Îç∞Ïù¥ÌÑ∞Í∞Ä ÏóÜÏäµÎãàÎã§.")

# Ïã§Ìñâ
verify_all_datasets()



üöÄ [Anomalib 2.2.0] Îç∞Ïù¥ÌÑ∞ÏÖã Í≤ÄÏ¶ù ÏãúÏûë
üìç Í≤ΩÎ°ú: /content/drive/.shortcut-targets-by-id/1MOlF1Xwaw_0p4R5EY_HXDRgLg0it43Jy/MMAD
----------------------------------------------------------------------





üìä Îç∞Ïù¥ÌÑ∞ÏÖã Î°úÎìú Í≤∞Í≥º ÏöîÏïΩ


Unnamed: 0,Dataset,Class,Train(ÌïôÏäµ),Val(Í≤ÄÏ¶ù),Total_Good,Test(ÌèâÍ∞Ä),Status
0,GoodsAD,cigarette_box,147,18,165,18,‚úÖ OK
1,GoodsAD,drink_bottle,587,73,660,73,‚úÖ OK
2,GoodsAD,drink_can,188,23,211,24,‚úÖ OK
3,GoodsAD,food_bottle,812,101,913,101,‚úÖ OK
4,GoodsAD,food_box,346,43,389,43,‚úÖ OK
5,GoodsAD,food_package,432,54,486,54,‚úÖ OK
6,MVTec-AD,bottle,168,20,188,21,‚úÖ OK
7,MVTec-AD,cable,180,22,202,22,‚úÖ OK
8,MVTec-AD,capsule,176,21,197,22,‚úÖ OK
9,MVTec-AD,carpet,224,28,252,28,‚úÖ OK



‚úÖ Í≤ÄÏ¶ù ÏôÑÎ£å: Ï¥ù 38Í∞ú ÌÅ¥ÎûòÏä§ ÌÉêÏÉâ
üìà Ï†ïÏÉÅ Î°úÎìúÎêú Ï¥ù ÌïôÏäµ Í∞ÄÎä• Ïù¥ÎØ∏ÏßÄ: 15484 Ïû•


In [4]:
import anomalib.data
print(dir(anomalib.data))

['ADAM3D', 'ADAM3DDataset', 'AnomalibDataModule', 'AnomalibDataset', 'Avenue', 'AvenueDataset', 'BMAD', 'BMADDataset', 'BTech', 'BTechDataset', 'Batch', 'DataFormat', 'DatasetItem', 'Datumaro', 'DatumaroDataset', 'DepthBatch', 'DepthDataFormat', 'DepthItem', 'DictConfig', 'Enum', 'Folder', 'Folder3D', 'Folder3DDataset', 'FolderDataset', 'ImageBatch', 'ImageDataFormat', 'ImageItem', 'InferenceBatch', 'Kolektor', 'KolektorDataset', 'ListConfig', 'MPDD', 'MPDDDataset', 'MVTec', 'MVTec3D', 'MVTec3DDataset', 'MVTecAD', 'MVTecAD2', 'MVTecADDataset', 'MVTecLOCO', 'MVTecLOCODataset', 'NumpyImageBatch', 'NumpyImageItem', 'NumpyVideoBatch', 'NumpyVideoItem', 'PredictDataset', 'RealIAD', 'ShanghaiTech', 'ShanghaiTechDataset', 'Tabular', 'TabularDataset', 'UCSDped', 'UCSDpedDataset', 'UnknownDatamoduleError', 'VAD', 'VADDataset', 'VideoBatch', 'VideoDataFormat', 'VideoItem', 'Visa', 'VisaDataset', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package_

In [None]:
from src.utils import load_config, load_json, load_csv
from anomalib.data import MVTecAD
from anomalib.models import EfficientAd, MVTecLOCO, Visa
from anomalib.engine import Engine

# TODO: runtime_config.yaml ÌôïÏû•/ÏàòÏ†ï
runtime_config = load_config(RUNTIME_CONFIG_ROOT)
domain_json = load_json(DOMAIN_JSON)
mmad_json = load_json(MMAD_JSON)
meta_csv = load_csv(META_CSV)

# tqdm bar off
os.environ["TQDM_DISABLE"] = "1"

In [None]:
!python /content/drive/Othercomputers/my_notebook/lion_final_pro_multimodal-anomaly-report-generation/notebooks/noh/train_efficientad.py --project-root /content/drive/Othercomputers/my_notebook/lion_final_pro_multimodal-anomaly-report-generation --category all --max-epochs 30

[1;30;43mÏä§Ìä∏Î¶¨Î∞ç Ï∂úÎ†• ÎÇ¥Ïö©Ïù¥ Í∏∏Ïñ¥ÏÑú ÎßàÏßÄÎßâ 5000Ï§ÑÏù¥ ÏÇ≠Ï†úÎêòÏóàÏäµÎãàÎã§.[0m
                                                               [3mtrain_ae_epoch:  [0m
                                                               [3m0.543            [0m
                                                               [3mtrain_stae_epoch:[0m
                                                               [3m0.074            [0m
                                                               [3mtrain_loss_epoch:[0m
[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2K[1A[2KEpoch 26/29 [35m‚îÅ‚îÅ‚îÅ‚îÅ[0m[90m‚ï∫[0m[90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m 11/42 [2m0:00:02 ‚Ä¢ 0:00:07[0m [2;4m5.14it/s[0m [3mtrain_st_step:   [0m
                                                               [3m3.081            [0m
                                                               [3mtrain_a

In [None]:
import logging
logging.disable(logging.WARNING)

# # tqdm bar on
# os.environ.pop("TQDM_DISABLE", None)

categories = [
    "bottle", "cable", "capsule", "carpet", "grid",
    "hazelnut", "leather", "metal_nut", "pill", "screw",
    "tile", "toothbrush", "transistor", "wood", "zipper"
]

all_predictions = {}
for i, category in enumerate(categories, 1):
    print(f"[{i}/{len(categories)}] Inference: {category}")
    # ckpt_path = OUTPUT_ROOT / category / "v0/weights/lightning/model.ckpt"
    ckpt_path = OUTPUT_ROOT / "EfficientAd" / "MVTecAD" / category / "v0/weights/lightning/model.ckpt"

    datamodule = MVTecAD(
          root=DATA_ROOT / "MVTec-AD",
          category=category
    )

    model = EfficientAd()
    engine = Engine(
        logger=False,
        enable_progress_bar=False,
        accelerator="auto",
        devices=1,
        default_root_dir=OUTPUT_ROOT
    )

    predictions = engine.predict(
        datamodule=datamodule,
        model=model,
        ckpt_path=ckpt_path,
    )

    all_predictions[category] = predictions
    print(f"‚úì [{i}/{len(categories)}] {category} ÏôÑÎ£å\n")

In [None]:
from src.visual.plot import kde_plot

y_true_list = []
y_score_list = []

for p in predictions:
    gt = p.gt_label.cpu().numpy() if hasattr(p.gt_label, 'cpu') else p.gt_label
    score = p.pred_score.cpu().numpy() if hasattr(p.pred_score, 'cpu') else p.pred_score
    y_true_list.append(gt)
    y_score_list.append(score)

y_true = np.concatenate(y_true_list)
y_score = np.concatenate(y_score_list)

normal_scores = y_score[y_true == 0]
anomaly_scores = y_score[y_true == 1]

scores_df = pd.DataFrame({
    'score': np.concatenate([normal_scores, anomaly_scores]),
    'label': ['Normal'] * len(normal_scores) + ['Anomaly'] * len(anomaly_scores)
})

kde_plot(
    scores_df,
    col='score',
    hue='label',
    palette=['steelblue', 'salmon'],
    title=f'{category}: Score Distribution',
    xlabel='Anomaly Score'
)

In [None]:
import torch
from sklearn.metrics import (
    roc_auc_score,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    jaccard_score  # IoU
)

results = []
for category, preds in all_predictions.items():
    # Image-level
    y_true = np.concatenate([p.gt_label.cpu().numpy() for p in preds])
    y_score = np.concatenate([p.pred_score.cpu().numpy() for p in preds])
    y_pred = (y_score >= 0.5).astype(int)

    # Pixel-level
    gt_masks = torch.cat([p.gt_mask for p in preds]).int()
    pred_masks = torch.cat([(p.anomaly_map > 0.5).int() for p in preds])

    metrics = {
        "Category": category,
        "AUROC": round(roc_auc_score(y_true, y_score), 4),
        "Accuracy": round(accuracy_score(y_true, y_pred), 4),
        "Precision": round(precision_score(y_true, y_pred, zero_division=0), 4),
        "Recall": round(recall_score(y_true, y_pred, zero_division=0), 4),
        "F1": round(f1_score(y_true, y_pred, zero_division=0), 4),
        "Dice": round(
            f1_score(
                gt_masks.flatten().cpu().numpy(),
                pred_masks.flatten().cpu().numpy(),
                zero_division=0
            ),
            4
        ),
        "IoU": round(
            jaccard_score(
                gt_masks.flatten().numpy(),
                pred_masks.flatten().numpy(),
                average='binary',
                zero_division=0
            ),
            4
        ),
        "N_samples": len(y_true)
    }
    results.append(metrics)

metrics_df = pd.DataFrame(results).set_index("Category")
avg_row = metrics_df.drop(columns=['N_samples']).mean().round(4)
avg_row['N_samples'] = metrics_df['N_samples'].sum()
metrics_df.loc['Average'] = avg_row

metrics_df

In [None]:
from src.visual.plot import heatmap_plot

metrics_trans = metrics_df.drop('Average').drop(columns='N_samples')
heatmap_plot(
    metrics_trans,
    figsize=(10, 10),
    cmap='RdYlGn',
    annot=True,
    fmt='.4f',
    linewidths=0.5,
    title='PatchCore Performance by Category',
    rotation_x=45,
    rotation_y=0
)

In [None]:
from src.visual.plot import anomaly_grid_from_dir

for category in categories:
    OUTPUT_IMG_ROOT = OUTPUT_ROOT / "Patchcore" / "MVTecAD" / category / "latest" / "images"
    print(f"{category}")
    anomaly_grid_from_dir(OUTPUT_IMG_ROOT, n_samples=1, n_cols=1)