## This notebook is used for the implementation of zero_shot AD model proposed by Xuhai Chen et al in the paper "A Zero-/Few-Shot Anomaly Classification and Segmentation Method for CVPR 2023 VAND Workshop Challenge Tracks 1&2: 1st Place on Zero-shot AD and 4th Place on Few-shot AD"

In [24]:
import os
import clip
import torch
import pandas as pd
import PIL
import sys
%PIL inline
print("Torch version: ", torch.__version__)

UsageError: Line magic function `%PIL` not found.


Load the model

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-L/14@336px', device)
print('Model input resulution: ', model.visual.input_resolution)
print('Context length resulution: ', model.context_length)

Model input resulution:  336
Context length resulution:  77


Create the image_path

In [19]:
df = pd.read_csv('VisA/split_csv/1cls.csv')
img_path = df.apply(lambda row: os.path.join('VisA', row["image"]), axis = 1)

In [25]:
img = []
img = PIL.Image.open(img_path[2345], mode='r')

if img is None:
    sys.exit("Could not read the images.")
img.show()



Read the text labels

In [20]:
labels = df.label
print(labels.unique())

['normal' 'anomaly']


Prepare the inputs


In [28]:
image_input = preprocess(img).unsqueeze(0).to(device)
text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in labels.unique()]).to(device)

Calculate the features

In [29]:
with torch.no_grad():
    image_features = model.encode_image(image_input)
    text_features = model.encode_text(text_inputs)


Get the scores from the features

In [31]:
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
values, indices = similarity[0].topk(2)

print('True label = ', df.label[2345])
for value, index in zip(values, indices):
    print(f"{labels.unique()[index]:>16s}: {100 * value.item():.2f}%")


True label =  anomaly
         anomaly: 65.93%
          normal: 34.07%
