In [1]:
import os
import cv2
import torch
import numpy as np
from tqdm import tqdm
from torchvision import models, transforms
from torch import nn
from sklearn.metrics import roc_auc_score
from sklearn.metrics import pairwise_distances
from sklearn.random_projection import SparseRandomProjection

In [23]:
# --- Settings ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
TRAIN_DIR = "masks/good"
TEST_DIR_GOOD = "masks/test/good"
TEST_DIR_DEFECT = "masks/test/defective"



In [4]:
dino = torch.hub.load('facebookresearch/dino:main', 'dino_vitb16').to(DEVICE).eval()
# DINO has no classification head; extracts patch features

# --- Adjust transform to DINO expectations ---
from torchvision import transforms
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))])
    

Using cache found in C:\Users\DDP Engineering/.cache\torch\hub\facebookresearch_dino_main


In [18]:
def extract_patch_features(img):
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    x = transform(img_rgb).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        # 🧠 Manual forward through DINO transformer to get patch tokens
        x = dino.prepare_tokens(x)  # token + positional encoding
        for blk in dino.blocks:
            x = blk(x)
        x = dino.norm(x)  # [1, 197, 768]
        patch_tokens = x[:, 1:, :]  # remove CLS

    return patch_tokens.squeeze(0).cpu().numpy()


In [19]:
# --- Reuse your auto-crop function ---
def auto_crop_only(img):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    mask = cv2.inRange(hsv, np.array([0,0,200]), np.array([180,40,255]))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE,
                             cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(15,15)))
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None
    x,y,w,h = cv2.boundingRect(max(contours, key=cv2.contourArea))
    return img[y:y+h, x:x+w]


In [20]:
def process_image(path):
    img = cv2.imread(path)
    crop = auto_crop_only(img)
    if crop is None:
        return None
    return extract_patch_features(crop)

In [21]:
# --- Build memory bank with k-Center Greedy on DINO features ---
all_feats = []
print("🔍 Extracting DINO features from TRAIN_DIR …")
for fname in tqdm(os.listdir(TRAIN_DIR)):
    if fname.lower().endswith(('.jpg','.png','.jpeg')):
        feats = process_image(os.path.join(TRAIN_DIR, fname))
        if feats is not None:
            all_feats.append(feats)
all_feats = np.vstack(all_feats)

rp = SparseRandomProjection(eps=0.5)
proj = rp.fit_transform(all_feats)
n_keep = max(1, int(len(proj)*0.01))
selected, dist = [], None
print("🎯 Building memory bank …")
for _ in tqdm(range(n_keep)):
    if dist is None:
        idx = np.random.randint(0, len(proj))
        cent = proj[idx:idx+1]
        dist = pairwise_distances(proj, cent)
        selected = [idx]
    else:
        idx = np.argmax(dist)
        cent = proj[idx:idx+1]
        newd = pairwise_distances(proj, cent)
        dist = np.minimum(dist, newd)
        selected.append(idx)
memory = torch.tensor(all_feats[selected]).float().to(DEVICE)

🔍 Extracting DINO features from TRAIN_DIR …


100%|██████████| 32/32 [00:04<00:00,  7.43it/s]


🎯 Building memory bank …


100%|██████████| 62/62 [00:00<00:00, 132.26it/s]


In [25]:
# --- Define anomaly scoring on patches ---
def anomaly_score(path):
    feats = process_image(path)
    if feats is None:
        return None
    d = torch.cdist(torch.tensor(feats).float().to(DEVICE), memory)
    return d.min(dim=1).values.max().item()

def score_folder(folder):
    out = []
    for fname in tqdm(os.listdir(folder)):
        if fname.lower().endswith(('.jpg','.png','.jpeg')):
            sc = anomaly_score(os.path.join(folder,fname))
            if sc is not None:
                out.append((fname, sc))
    return out

print("📊 Scoring GOOD images …")
scores_good = score_folder(TEST_DIR_GOOD)
print("📊 Scoring DEFECTIVE images …")
scores_defect = score_folder(TEST_DIR_DEFECT)

📊 Scoring GOOD images …


100%|██████████| 1/1 [00:00<00:00,  5.40it/s]


📊 Scoring DEFECTIVE images …


100%|██████████| 1/1 [00:00<00:00,  5.46it/s]


In [26]:

good_vals = [v for _,v in scores_good]
thr = np.percentile(good_vals, 95)
print(f"✅ Threshold (95th percentile of GOOD scores): {thr:.4f}\nResults:")
for lbl, scs in [('GOOD', scores_good), ('DEFECT', scores_defect)]:
    for fname, score in scs:
        outcome = "ANOMALY 🚨" if score > thr else "Normal ✅"
        print(f"{lbl} | {fname:<30} | Score: {score:.4f} | {outcome}")

✅ Threshold (95th percentile of GOOD scores): 54.8213
Results:
GOOD | Image__2025-05-05__10-06-44.jpg | Score: 54.8213 | Normal ✅
DEFECT | 33af61dd-Image__2025-05-02__10-36-24.jpg | Score: 64.4739 | ANOMALY 🚨
