In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mvtec-ad/readme.txt
/kaggle/input/mvtec-ad/license.txt
/kaggle/input/mvtec-ad/wood/readme.txt
/kaggle/input/mvtec-ad/wood/license.txt
/kaggle/input/mvtec-ad/wood/ground_truth/hole/000_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/hole/003_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/hole/004_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/hole/005_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/hole/001_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/hole/006_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/hole/008_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/hole/007_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/hole/002_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/hole/009_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/liquid/000_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/liquid/003_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/liquid/004_mask.png
/kaggle/input/mvtec-ad/wood/ground_truth/liquid/005_mask.png
/kaggle/input/mvte

In [2]:
!pip install ftfy regex tqdm faiss-cpu git+https://github.com/openai/CLIP.git


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-8v_s0_25
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-8v_s0_25
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->clip==1.0)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->clip==1.0)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->clip==1.0)
  Downloading nvidia_cufft_cu12

In [3]:
import os
import numpy as np
import torch
import clip
from PIL import Image
import faiss
from tqdm import tqdm

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
# Load CLIP model
model, preprocess = clip.load("ViT-B/32", device=device)

100%|███████████████████████████████████████| 338M/338M [00:06<00:00, 55.6MiB/s]


In [6]:
clip.available_models()


['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [7]:
# Path to MVTec bottle category
base_path = "/kaggle/input/mvtec-ad/bottle"


In [8]:
# Load training normal images
train_dir = os.path.join(base_path, "train", "good")
train_images = [os.path.join(train_dir, f) for f in os.listdir(train_dir) if f.endswith(".png") or f.endswith(".jpg")]


In [9]:
# Encode training images using CLIP
train_features = []
for img_path in tqdm(train_images, desc="Encoding train images"):
    img = preprocess(Image.open(img_path).convert("RGB")).unsqueeze(0).to(device)
    with torch.no_grad():
        features = model.encode_image(img).cpu().numpy()
    train_features.append(features[0])

train_features = np.stack(train_features).astype("float32")

Encoding train images: 100%|██████████| 209/209 [00:38<00:00,  5.40it/s]


In [10]:
train_features.shape

(209, 512)

In [11]:
# Normalize features (important for cosine similarity)
train_features /= np.linalg.norm(train_features, axis=1, keepdims=True)

In [12]:

# Create FAISS index (cosine similarity = dot product since vectors are normalized)
index = faiss.IndexFlatIP(train_features.shape[1])
index.add(train_features)

In [13]:
# Test images
test_root = os.path.join(base_path, "test")
classes = os.listdir(test_root)
results = []

for cls in classes:
    cls_dir = os.path.join(test_root, cls)
    image_paths = [os.path.join(cls_dir, f) for f in os.listdir(cls_dir) if f.endswith(".png") or f.endswith(".jpg")]
    
    for img_path in image_paths:
        img = preprocess(Image.open(img_path).convert("RGB")).unsqueeze(0).to(device)
        with torch.no_grad():
            feat = model.encode_image(img).cpu().numpy()
        feat /= np.linalg.norm(feat, axis=1, keepdims=True)
        sim, idx = index.search(feat.astype("float32"), k=1)
        score = 1 - sim[0][0]  # 1 - similarity = anomaly score
        results.append((img_path, cls, score))

# Print top 10 most anomalous
results.sort(key=lambda x: x[2], reverse=True)
print("\nTop 10 likely defects:")
for path, label, score in results[:10]:
    print(f"{path} | Label: {label} | Anomaly score: {score:.4f}")


Top 10 likely defects:
/kaggle/input/mvtec-ad/bottle/test/contamination/005.png | Label: contamination | Anomaly score: 0.0921
/kaggle/input/mvtec-ad/bottle/test/contamination/006.png | Label: contamination | Anomaly score: 0.0744
/kaggle/input/mvtec-ad/bottle/test/broken_large/016.png | Label: broken_large | Anomaly score: 0.0667
/kaggle/input/mvtec-ad/bottle/test/broken_large/003.png | Label: broken_large | Anomaly score: 0.0645
/kaggle/input/mvtec-ad/bottle/test/contamination/007.png | Label: contamination | Anomaly score: 0.0625
/kaggle/input/mvtec-ad/bottle/test/broken_large/018.png | Label: broken_large | Anomaly score: 0.0620
/kaggle/input/mvtec-ad/bottle/test/contamination/013.png | Label: contamination | Anomaly score: 0.0551
/kaggle/input/mvtec-ad/bottle/test/broken_large/006.png | Label: broken_large | Anomaly score: 0.0538
/kaggle/input/mvtec-ad/bottle/test/broken_large/013.png | Label: broken_large | Anomaly score: 0.0497
/kaggle/input/mvtec-ad/bottle/test/contamination/0