In [29]:
import numpy as np
from sdhelper import SD
import torch
import matplotlib.pyplot as plt
from datasets import load_dataset
import torch
from tqdm.autonotebook import tqdm
from torch.nn import functional as F

In [3]:
imagenet_subset = load_dataset("JonasLoos/imagenet_subset", split="train")
is_up1_anomalies = np.load("../data/data_labeler/high_norm_anomalies_imagenet_subset_step50_seed42_heavy_only.npy")
is_convin_anomalies = np.load("../data/data_labeler/high_norm_anomalies_imagenet_subset_step50_seed42_conv_in.npy")


In [5]:
nyuv2 = load_dataset("0jl/NYUv2", split="train")
nyuv2_up1_anomalies = np.load("../data/data_labeler/high_norm_anomalies_nyuv2_step50_seed42.npy")


In [None]:
sd = SD()

In [None]:
is_reprs = sd.img2repr([x['image'] for x in tqdm(imagenet_subset)], extract_positions=['up_blocks[1]'], step=50, seed=42)
nyuv2_reprs = sd.img2repr([x['image'] for x in tqdm(nyuv2)], extract_positions=['up_blocks[1]'], step=50, seed=42)

In [None]:
is_reprs_torch = torch.stack([x.concat().permute(1,2,0) for x in is_reprs])
nyuv2_reprs_torch = torch.stack([x.concat().permute(1,2,0) for x in nyuv2_reprs])

is_reprs_train = is_reprs_torch[:int(0.8*len(is_reprs_torch))]
is_reprs_test = is_reprs_torch[int(0.8*len(is_reprs_torch)):]
nyuv2_reprs_train = nyuv2_reprs_torch[:int(0.8*len(nyuv2_reprs_torch))]
nyuv2_reprs_test = nyuv2_reprs_torch[int(0.8*len(nyuv2_reprs_torch)):]

print(f'{is_reprs_train.shape = }')
print(f'{is_reprs_test.shape = }')
print(f'{nyuv2_reprs_train.shape = }')
print(f'{nyuv2_reprs_test.shape = }')


In [None]:
# get anomaly representations

is_anomalies_all = np.concatenate([is_up1_anomalies + [[0,i,j]] for i in [0,1] for j in [0,1]])[:,[0,2,1]]  # use the full 2x2 anomaly patches
is_anomalies_train = is_anomalies_all[is_anomalies_all[:,0] < len(is_reprs_train)]
is_reprs_anomalies_train = torch.stack([is_reprs_train[i,j,k,:] for i,j,k in is_anomalies_train])
is_anomalies_test = is_anomalies_all[is_anomalies_all[:,0] >= len(is_reprs_train)] - [[len(is_reprs_train),0,0]]  # image index is shifted to the beginning of the test set
is_reprs_anomalies_test = torch.stack([is_reprs_test[i,j,k,:] for i,j,k in is_anomalies_test])

nyuv2_anomalies_all = np.concatenate([nyuv2_up1_anomalies + [[0,i,j]] for i in [0,1] for j in [0,1]])[:,[0,2,1]]
nyuv2_anomalies_train = nyuv2_anomalies_all[nyuv2_anomalies_all[:,0] < len(nyuv2_reprs_train)]
nyuv2_reprs_anomalies_train = torch.stack([nyuv2_reprs_train[i,j,k,:] for i,j,k in nyuv2_anomalies_train])
nyuv2_anomalies_test = nyuv2_anomalies_all[nyuv2_anomalies_all[:,0] >= len(nyuv2_reprs_train)] - [[len(nyuv2_reprs_train),0,0]]
nyuv2_reprs_anomalies_test = torch.stack([nyuv2_reprs_test[i,j,k,:] for i,j,k in nyuv2_anomalies_test])

print(f'{is_reprs_anomalies_train.shape = }')
print(f'{is_reprs_anomalies_test.shape = }')
print(f'{nyuv2_reprs_anomalies_train.shape = }')
print(f'{nyuv2_reprs_anomalies_test.shape = }')


In [599]:
is_prototype_anomaly = is_reprs_anomalies_train.mean(dim=0)
nyuv2_prototype_anomaly = nyuv2_reprs_anomalies_train.mean(dim=0)

In [None]:
# search for most similar representations and check if they are anomalies

is_sims = F.cosine_similarity(is_prototype_anomaly[None,:], is_reprs_test.flatten(0,2), dim=-1)
is_found_anomalies = np.array(np.unravel_index(torch.arange(len(is_sims))[is_sims > 0.75], is_reprs_test.shape[:3])).T
is_intersection_size = len(set(tuple(x) for x in is_anomalies_test) & set(tuple(x) for x in is_found_anomalies))
print(f'IS:    found - ({len(is_found_anomalies)-is_intersection_size} | {is_intersection_size} | {len(is_anomalies_test)-is_intersection_size}) - test')

nyuv2_sims = F.cosine_similarity(nyuv2_prototype_anomaly[None,:], nyuv2_reprs_test.flatten(0,2), dim=-1)
nyuv2_found_anomalies = np.array(np.unravel_index(torch.arange(len(nyuv2_sims))[nyuv2_sims > 0.75], nyuv2_reprs_test.shape[:3])).T
nyuv2_intersection_size = len(set(tuple(x) for x in nyuv2_anomalies_test) & set(tuple(x) for x in nyuv2_found_anomalies))
print(f'NYUV2: found - ({len(nyuv2_found_anomalies)-nyuv2_intersection_size} | {nyuv2_intersection_size} | {len(nyuv2_anomalies_test)-nyuv2_intersection_size}) - test')


In [None]:
i = torch.randint(len(is_reprs_test), (1,)).item()
img = imagenet_subset[len(is_reprs_train)+i]['image']
norm = is_reprs_test[i].norm(dim=-1)

labeled_anomalies = is_anomalies_test[is_anomalies_test[:,0] == i].tolist()
found_anomalies = is_found_anomalies[is_found_anomalies[:,0] == i].tolist()
wrongly_found_anomalies = np.array([x for x in found_anomalies if x not in labeled_anomalies])
correctly_found_anomalies = np.array([x for x in found_anomalies if x in labeled_anomalies])
missing_anomalies = np.array([x for x in labeled_anomalies if x not in found_anomalies])

print(f'found: ({len(wrongly_found_anomalies)} | {len(correctly_found_anomalies)} | {len(missing_anomalies)})')

plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.imshow(img)
plt.axis('off')
plt.subplot(1,2,2)
if len(wrongly_found_anomalies) > 0: plt.scatter(wrongly_found_anomalies[:,2], wrongly_found_anomalies[:,1], c='orange', marker='x')
if len(correctly_found_anomalies) > 0: plt.scatter(correctly_found_anomalies[:,2], correctly_found_anomalies[:,1], c='green', marker='x')
if len(missing_anomalies) > 0: plt.scatter(missing_anomalies[:,2], missing_anomalies[:,1], c='purple', marker='x')
plt.imshow(norm, cmap='gray')
plt.axis('off')
plt.show()


In [None]:
def find_anomalies(min_sim):
    is_sims = F.cosine_similarity(is_prototype_anomaly[None,:], is_reprs_test.flatten(0,2), dim=-1)
    is_found_anomalies = np.array(np.unravel_index(torch.arange(len(is_sims))[is_sims > min_sim], is_reprs_test.shape[:3])).T
    is_intersection_size = len(set(tuple(x) for x in is_anomalies_test) & set(tuple(x) for x in is_found_anomalies))
    # print(f'IS:    found - ({len(is_found_anomalies)-is_intersection_size} | {is_intersection_size} | {len(is_anomalies_test)-is_intersection_size}) - test')
    return len(is_found_anomalies)-is_intersection_size, is_intersection_size, len(is_anomalies_test)-is_intersection_size


x = np.linspace(0.6, 0.99, 20)
tmp = np.array([find_anomalies(i) for i in x])

plt.bar(x, tmp[:,0]+tmp[:,1], label='false positives', color='orange', width=0.018)
plt.bar(x, tmp[:,1], label='true positives', color='green', width=0.018)
plt.bar(x, -tmp[:,2], label='false negatives', color='purple', width=0.018)
plt.title('imagenet subset - up[1] anomaly search')
plt.xlabel('min. cosine similarity')
plt.ylabel('count')
plt.legend()
plt.show()

plt.plot(x, tmp[:,0], label='false positives', color='orange')
plt.plot(x, tmp[:,1], label='true positives', color='green')
plt.plot(x, tmp[:,2], label='false negatives', color='purple')
plt.xlabel('min. cosine similarity')
plt.ylabel('count')
plt.title('imagenet subset - up[1] anomaly search')
plt.legend()
plt.show()

In [None]:
# anomaly search NYUv2 (train) -> NYUv2 (test)

def find_anomalies(min_sim):
    nyuv2_sims = F.cosine_similarity(nyuv2_prototype_anomaly[None,:], nyuv2_reprs_test.flatten(0,2), dim=-1)
    nyuv2_found_anomalies = np.array(np.unravel_index(torch.arange(len(nyuv2_sims))[nyuv2_sims > min_sim], nyuv2_reprs_test.shape[:3])).T
    nyuv2_intersection_size = len(set(tuple(x) for x in nyuv2_anomalies_test) & set(tuple(x) for x in nyuv2_found_anomalies))
    # print(f'IS:    found - ({len(nyuv2_found_anomalies)-nyuv2_intersection_size} | {nyuv2_intersection_size} | {len(nyuv2_anomalies_test)-nyuv2_intersection_size}) - test')
    return len(nyuv2_found_anomalies)-nyuv2_intersection_size, nyuv2_intersection_size, len(nyuv2_anomalies_test)-nyuv2_intersection_size


x = np.linspace(0.6, 0.99, 20)
tmp = np.array([find_anomalies(i) for i in x])

plt.bar(x, tmp[:,0]+tmp[:,1], label='false positives', color='orange', width=0.018)
plt.bar(x, tmp[:,1], label='true positives', color='green', width=0.018)
plt.bar(x, -tmp[:,2], label='false negatives', color='purple', width=0.018)
plt.title('NYUv2 - up[1] anomaly search')
plt.xlabel('min. cosine similarity')
plt.ylabel('count')
plt.legend()
plt.show()

In [None]:
# anomaly search IM (train) -> NYUv2 (all)

def find_anomalies(min_sim):
    nyuv2_sims = F.cosine_similarity(is_prototype_anomaly[None,:], nyuv2_reprs_torch.flatten(0,2), dim=-1)
    nyuv2_found_anomalies = np.array(np.unravel_index(torch.arange(len(nyuv2_sims))[nyuv2_sims > min_sim], nyuv2_reprs_torch.shape[:3])).T
    nyuv2_intersection_size = len(set(tuple(x) for x in nyuv2_anomalies_all) & set(tuple(x) for x in nyuv2_found_anomalies))
    # print(f'IS:    found - ({len(nyuv2_found_anomalies)-nyuv2_intersection_size} | {nyuv2_intersection_size} | {len(nyuv2_anomalies_all)-nyuv2_intersection_size}) - test')
    return len(nyuv2_found_anomalies)-nyuv2_intersection_size, nyuv2_intersection_size, len(nyuv2_anomalies_all)-nyuv2_intersection_size


x = np.linspace(0.6, 0.99, 20)
tmp = np.array([find_anomalies(i) for i in tqdm(x)])

plt.bar(x, tmp[:,0]+tmp[:,1], label='false positives', color='orange', width=0.018)
plt.bar(x, tmp[:,1], label='true positives', color='green', width=0.018)
plt.bar(x, -tmp[:,2], label='false negatives', color='purple', width=0.018)
plt.title('IS -> NYUv2 - up[1] anomaly search')
plt.xlabel('min. cosine similarity')
plt.ylabel('count')
plt.legend()
plt.show()