In [1]:
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd().parent)) 

# necessary imports
import matplotlib.pyplot as plt
import cv2
import faiss
import numpy as np
from scipy.ndimage import gaussian_filter

from src.detection import augment_image
from src.utils import resize_mask_img, get_dataset_info
from src.backbones import get_model

from matplotlib.colors import LinearSegmentedColormap
from pathlib import Path
import pandas as pd

In [2]:
data_root = "../data"
mvtec_path = f"{data_root}/kagome"
tf_masking  = False
tf_rotation = True

In [3]:
def score_single_image(image_test, image_ref_list, model, masking = False, rotation = True, use_faiss = True):
    features_ref = []

    # Extract reference features and set up the knn index (memory bank)
    for image_ref in image_ref_list:
        if rotation:
            img_augmented = augment_image(image_ref)
        else:
            img_augmented = [image_ref]

        for i in range(len(img_augmented)):
            image_ref = img_augmented[i]
            image_ref_tensor, _ = model.prepare_image(image_ref)
            features_ref_i = model.extract_features(image_ref_tensor)
            features_ref.append(features_ref_i)

    features_ref = np.concatenate(features_ref, axis=0)
    if use_faiss:
        knn_index1 = faiss.IndexFlatL2(features_ref.shape[1])
        faiss.normalize_L2(features_ref)
        knn_index1.add(features_ref)
    else:
        knn_index1 = NearestNeighbors(n_neighbors=1, metric="cosine")
        # normalize features
        features_ref = features_ref / np.linalg.norm(features_ref, axis=1)[:, None]
        knn_index1.fit(features_ref)

    # Extract test features
    image_tensor_test, grid_size2 = model.prepare_image(image_test)
    features_test = model.extract_features(image_tensor_test)

    if use_faiss:
        faiss.normalize_L2(features_test)
        distances, _ = knn_index1.search(features_test, k = 1)
        distances = distances / 2
    else:
        # normalize features
        features_test = features_test / np.linalg.norm(features_test, axis=1)[:, None]
        distances, _ = knn_index1.kneighbors(features_test, n_neighbors=1)
        
    # Filter out the relevant patches of test image
    if masking:
        mask2 = model.compute_background_mask_from_image(image_test, threshold=10, masking_type=masking)
        distances[~mask2] = 0.0
    else:
        mask2 = np.ones(features_test.shape[0], dtype=bool)

    # Compute anomaly score
    score_top1p = np.mean(sorted(distances, reverse = True)[:int(max(1,len(distances) * 0.01))])
    distances = distances.reshape(grid_size2)
    return score_top1p, distances, mask2

In [4]:
# -----------------------
# 0. 前提設定
# -----------------------
object_name = "01_omote_crop"          # ★必要に応じて変更
img_name = "2025_05_28_master_omote.jpg"

# omote_crop
# rotation=False
# dinov2_vits14 => layer_5
# dinov2_vitb14 => layer_5 ○
# dinov2_vitl14 => layer_13

# rotation=True
# dinov2_vits14 => layer_5 ○
# dinov2_vitb14 => layer_5
# dinov2_vitl14 => layer_14


# MVTec のパス
image_test_dir = Path(f"{mvtec_path}/{object_name}/test")

# -----------------------
# 1. テスト画像のリストを 1 回だけ取得
# -----------------------
jpg_files = [p for p in image_test_dir.rglob('*') if p.is_file() and p.suffix.lower() in {'.jpg', '.jpeg'}]

# -----------------------
# 2. 参照画像をロード（層が変わっても同じなので外に出して OK）
# -----------------------
image_ref_path = f"{mvtec_path}/{object_name}/train/good/{img_name}"
image_ref = cv2.cvtColor(cv2.imread(image_ref_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)


In [5]:
# -----------------------
# dinov2_vits14
# -----------------------
score_df1 = pd.DataFrame()

for num_layer in range(12):
    # --- 3-1. モデル読み込み ---
    model = get_model('dinov2_vits14','cuda', smaller_edge_size=512, feat_layer=num_layer)

    # --- 3-2. 参照特徴量 & マスク ---
    image_tensor_ref, grid_size_ref = model.prepare_image(image_ref)
    features_ref = model.extract_features(image_tensor_ref)
    mask_ref = model.compute_background_mask(features_ref, grid_size_ref, threshold=10, masking_type=tf_masking)

    # --- 3-3. テスト画像ループ ---
    for test_img_path in jpg_files:
        image_test = cv2.cvtColor(cv2.imread(test_img_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)

        # 特徴量・マスク取得
        image_tensor_test, grid_size_test = model.prepare_image(image_test)
        features_test = model.extract_features(image_tensor_test)
        mask_test = model.compute_background_mask(features_test, grid_size_test, threshold=10, masking_type=tf_masking)

        # アノマリースコア計算
        anomaly_score, patch_distances, mask_test = score_single_image(image_test, [image_ref], model, masking=tf_masking, rotation=tf_rotation)

        # DataFrame に保存
        score_df1.at[test_img_path.name, f"layer_{num_layer}"] = anomaly_score

# -----------------------
# 4. 結果の表示／保存
# -----------------------
print("=== Anomaly score matrix ===")
score_df1.loc["nearest"] = score_df1.iloc[1:].min() - score_df1.iloc[0]
score_df1

Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vits14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


=== Anomaly score matrix ===


Unnamed: 0,layer_0,layer_1,layer_2,layer_3,layer_4,layer_5,layer_6,layer_7,layer_8,layer_9,layer_10,layer_11
2025_05_28_omote_2.jpg,0.410988,0.306172,0.277923,0.24657,0.251964,0.222996,0.304032,0.330413,0.296046,0.280246,0.303557,0.317021
2025_05_28_omote_3.jpg,0.549279,0.413619,0.357941,0.348608,0.382285,0.375177,0.394947,0.464867,0.41344,0.427164,0.458075,0.504363
2025_05_28_omote_1.jpg,0.443799,0.431751,0.398188,0.357156,0.361651,0.353239,0.31575,0.358465,0.303048,0.289635,0.31288,0.357638
2025_05_28_omote_4.jpg,0.314356,0.325921,0.288374,0.297994,0.379282,0.341061,0.390699,0.438311,0.401592,0.372447,0.415373,0.485107
2025_05_28_omote_6.jpg,0.207216,0.194825,0.228745,0.242288,0.302523,0.325353,0.382337,0.429724,0.404017,0.399143,0.378369,0.480568
2025_05_28_omote_5.jpg,0.261553,0.30532,0.279042,0.300672,0.325148,0.328957,0.391723,0.459968,0.396104,0.356625,0.347576,0.37783
nearest,-0.203772,-0.111347,-0.049178,-0.004282,0.050558,0.102357,0.011718,0.028051,0.007002,0.009389,0.009323,0.040617


In [6]:
# -----------------------
# dinov2_vitb14
# -----------------------
score_df2 = pd.DataFrame()

for num_layer in range(12):
    # --- 3-1. モデル読み込み ---
    model = get_model('dinov2_vitb14', 'cuda', smaller_edge_size=512, feat_layer=num_layer)

    # --- 3-2. 参照特徴量 & マスク ---
    image_tensor_ref, grid_size_ref = model.prepare_image(image_ref)
    features_ref = model.extract_features(image_tensor_ref)
    mask_ref = model.compute_background_mask(features_ref, grid_size_ref, threshold=10, masking_type=tf_masking)

    # --- 3-3. テスト画像ループ ---
    for test_img_path in jpg_files:
        image_test = cv2.cvtColor(cv2.imread(test_img_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)

        # 特徴量・マスク取得
        image_tensor_test, grid_size_test = model.prepare_image(image_test)
        features_test = model.extract_features(image_tensor_test)
        mask_test = model.compute_background_mask(features_test, grid_size_test, threshold=10, masking_type=tf_masking)

        # アノマリースコア計算
        anomaly_score, patch_distances, mask_test = score_single_image(image_test, [image_ref], model, masking=tf_masking, rotation=tf_rotation)

        # DataFrame に保存
        score_df2.at[test_img_path.name, f"layer_{num_layer}"] = anomaly_score

# -----------------------
# 4. 結果の表示／保存
# -----------------------
print("=== Anomaly score matrix ===")
score_df2.loc["nearest"] = score_df2.iloc[1:].min() - score_df2.iloc[0]
score_df2

Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitb14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


=== Anomaly score matrix ===


Unnamed: 0,layer_0,layer_1,layer_2,layer_3,layer_4,layer_5,layer_6,layer_7,layer_8,layer_9,layer_10,layer_11
2025_05_28_omote_2.jpg,0.42865,0.31293,0.322604,0.209148,0.246611,0.27761,0.217236,0.268572,0.338891,0.340534,0.422117,0.450584
2025_05_28_omote_3.jpg,0.573328,0.44934,0.428555,0.316503,0.372238,0.415478,0.341635,0.387603,0.432742,0.452529,0.530783,0.591304
2025_05_28_omote_1.jpg,0.484049,0.466616,0.468457,0.311234,0.347578,0.386022,0.260299,0.277158,0.341414,0.345994,0.397516,0.472218
2025_05_28_omote_4.jpg,0.331391,0.348309,0.368113,0.282165,0.399039,0.421073,0.3226,0.367212,0.395477,0.398197,0.444451,0.526968
2025_05_28_omote_6.jpg,0.147295,0.213474,0.203709,0.186074,0.298503,0.363414,0.330637,0.401192,0.439612,0.44777,0.514209,0.560446
2025_05_28_omote_5.jpg,0.229819,0.313366,0.341923,0.28178,0.326502,0.387198,0.300391,0.344057,0.405443,0.406407,0.45179,0.54488
nearest,-0.281356,-0.099455,-0.118896,-0.023074,0.051892,0.085804,0.043063,0.008587,0.002523,0.00546,-0.024601,0.021634


In [7]:
# -----------------------
# dinov2_vitl14
# -----------------------
score_df3 = pd.DataFrame()

for num_layer in range(24):
    # --- 3-1. モデル読み込み ---
    model = get_model('dinov2_vitl14', 'cuda', smaller_edge_size=512, feat_layer=num_layer)

    # --- 3-2. 参照特徴量 & マスク ---
    image_tensor_ref, grid_size_ref = model.prepare_image(image_ref)
    features_ref = model.extract_features(image_tensor_ref)
    mask_ref = model.compute_background_mask(features_ref, grid_size_ref, threshold=10, masking_type=tf_masking)

    # --- 3-3. テスト画像ループ ---
    for test_img_path in jpg_files:
        image_test = cv2.cvtColor(cv2.imread(test_img_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)

        # 特徴量・マスク取得
        image_tensor_test, grid_size_test = model.prepare_image(image_test)
        features_test = model.extract_features(image_tensor_test)
        mask_test = model.compute_background_mask(features_test, grid_size_test, threshold=10, masking_type=tf_masking)

        # アノマリースコア計算
        anomaly_score, patch_distances, mask_test = score_single_image(image_test, [image_ref], model, masking=tf_masking, rotation=tf_rotation)

        # DataFrame に保存
        score_df3.at[test_img_path.name, f"layer_{num_layer}"] = anomaly_score

# -----------------------
# 4. 結果の表示／保存
# -----------------------
print("=== Anomaly score matrix ===")
score_df3.loc["nearest"] = score_df3.iloc[1:].min() - score_df3.iloc[0]
score_df3.iloc[:, 10:]

Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


Loading model: dinov2_vitl14
Device: cuda


Using cache found in /workspace/hub/facebookresearch_dinov2_main


=== Anomaly score matrix ===


Unnamed: 0,layer_10,layer_11,layer_12,layer_13,layer_14,layer_15,layer_16,layer_17,layer_18,layer_19,layer_20,layer_21,layer_22,layer_23
2025_05_28_omote_2.jpg,0.271175,0.269805,0.282867,0.28326,0.27346,0.301739,0.337218,0.339367,0.360514,0.371067,0.358555,0.384288,0.438311,0.505623
2025_05_28_omote_3.jpg,0.400379,0.423988,0.443729,0.435449,0.414793,0.430426,0.420911,0.402415,0.417462,0.454111,0.488485,0.485725,0.559557,0.675681
2025_05_28_omote_1.jpg,0.389936,0.415211,0.434128,0.41008,0.348752,0.341831,0.340644,0.319681,0.341953,0.361328,0.362199,0.376845,0.419052,0.495917
2025_05_28_omote_4.jpg,0.350459,0.37672,0.402668,0.406586,0.374547,0.388432,0.391729,0.371059,0.382754,0.41747,0.414265,0.430557,0.48242,0.559465
2025_05_28_omote_6.jpg,0.239381,0.268722,0.305523,0.35648,0.362192,0.396852,0.414823,0.405779,0.442365,0.454659,0.458972,0.478188,0.520348,0.577595
2025_05_28_omote_5.jpg,0.328582,0.350028,0.372719,0.396824,0.370973,0.404286,0.417455,0.381391,0.386143,0.410637,0.418202,0.421269,0.463531,0.535705
nearest,-0.031794,-0.001083,0.022656,0.07322,0.075292,0.040092,0.003426,-0.019686,-0.018561,-0.009739,0.003644,-0.007444,-0.019259,-0.009706
