# Installs

In [None]:
# Mount Drive
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

In [None]:
#!pip install --quiet cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8-cp37-cp37m-linux_x86_64.whl
!pip install --quiet "torch" "pytorch-lightning" "opencv-python==4.5.2.52" "scikit-learn==0.24.2" "torchmetrics" "torchvision==0.11.3"
#!pip install --quiet "torch" "torchtext==0.9.0" "pytorch-lightning==1.3.3" "opencv-python==4.5.2.52" "scikit-learn==0.24.2" "torchmetrics>=0.3" "torchvision==0.11.3"
#!pip install vaex


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

In [None]:
#!git clone  https://github.com/hcw-00/PatchCore_anomaly_detection.git
#%cd PatchCore_anomaly_detection
#!ls -l

#Imports

In [None]:
from pytorch_lightning import LightningDataModule, LightningModule, Trainer
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import argparse
import torch
import torch.nn.functional as F
from torch import nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
import cv2
import numpy as np
import os
import glob
import shutil
from PIL import Image
from sklearn.metrics import roc_auc_score
from torch import nn

from tqdm.notebook import tqdm

from sklearn.metrics import confusion_matrix
import pickle
from sklearn.random_projection import SparseRandomProjection
from sklearn.neighbors import NearestNeighbors
from scipy.ndimage import gaussian_filter

import tensorflow as tf

from tempfile import mkdtemp
import os.path as path
import numpy as np
import gc

print('Testing gpu availability: ', end='')
print(tf.test.gpu_device_name())



# DATASET

In [None]:
class MVTecDataset(Dataset):
    def __init__(self, root, category, transform, gt_transform, phase, load_complete_data=False):
        if phase=='train':
          if load_complete_data:
            all_categories = [i for i in os.listdir(root) if os.path.isdir(os.path.join(root, i))]
            self.img_paths_root = [os.path.join(root, i_category, 'train') for i_category in all_categories]
          else:
            self.img_paths_root = [os.path.join(root, category, 'train')]
        
        else:
          if load_complete_data:
            all_categories = [i for i in os.listdir(root) if os.path.isdir(os.path.join(root, i))]
            self.img_paths_root = [os.path.join(root, i_category, 'test') for i_category in all_categories]
            self.gt_paths_root = [os.path.join(root, i_category, 'ground_truth') for i_category in all_categories]
          else:
            self.img_paths_root = [os.path.join(root, category, 'test')]
            self.gt_paths_root = [os.path.join(root, category,'ground_truth')]

        self.transform = transform
        self.gt_transform = gt_transform
        # load dataset
        self.img_paths, self.gt_paths, self.labels, self.types = self.load_dataset() # self.labels => good : 0, anomaly : 1

    def load_dataset(self):

        img_tot_paths = []
        gt_tot_paths = []
        tot_labels = []
        tot_types = []

        for ix, img_path in enumerate(self.img_paths_root):
          defect_types = os.listdir(img_path)

          for defect_type in defect_types:
              if defect_type == 'good':
                  img_paths = glob.glob(os.path.join(img_path, defect_type) + "/*.png")
                  img_tot_paths.extend(img_paths)
                  gt_tot_paths.extend([0]*len(img_paths))
                  tot_labels.extend([0]*len(img_paths))
                  tot_types.extend(['good']*len(img_paths))
              else:
                  gt_path = self.gt_paths_root[ix]
                  img_paths = glob.glob(os.path.join(img_path, defect_type) + "/*.png")
                  gt_paths = glob.glob(os.path.join(gt_path, defect_type) + "/*.png")
                  img_paths.sort()
                  gt_paths.sort()
                  img_tot_paths.extend(img_paths)
                  gt_tot_paths.extend(gt_paths)
                  tot_labels.extend([1]*len(img_paths))
                  tot_types.extend([defect_type]*len(img_paths))

        assert len(img_tot_paths) == len(gt_tot_paths), "Something wrong with test and ground truth pair!"
        return img_tot_paths, gt_tot_paths, tot_labels, tot_types

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path, gt, label, img_type = self.img_paths[idx], self.gt_paths[idx], self.labels[idx], self.types[idx]
        img = Image.open(img_path).convert('RGB')
        img = self.transform(img)
        if gt == 0:
            gt = torch.zeros([1, img.size()[-2], img.size()[-2]])
        else:
            gt = Image.open(gt)
            gt = self.gt_transform(gt)
        
        assert img.size()[1:] == gt.size()[1:], "image.size != gt.size !!!"

        return img, gt, label, os.path.basename(img_path[:-4]), img_type

# Filemanagement

In [None]:
def copy_files(src, dst, ignores=[]):
    src_files = os.listdir(src)
    for file_name in src_files:
        ignore_check = [True for i in ignores if i in file_name]
        if ignore_check:
            continue
        full_file_name = os.path.join(src, file_name)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, os.path.join(dst,file_name))
        if os.path.isdir(full_file_name):
            os.makedirs(os.path.join(dst, file_name), exist_ok=True)
            copy_files(full_file_name, os.path.join(dst, file_name), ignores)

def prep_dirs(root):
    # make embeddings dir
    # embeddings_path = os.path.join(root, 'embeddings')
    embeddings_path = os.path.join('./', 'embeddings', args.category)
    os.makedirs(embeddings_path, exist_ok=True)
    # make sample dir
    sample_path = os.path.join(root, 'sample')
    os.makedirs(sample_path, exist_ok=True)
    # make source code record dir & copy
    source_code_save_path = os.path.join(root, 'src')
    os.makedirs(source_code_save_path, exist_ok=True)
    copy_files('./', source_code_save_path, ['.git','.vscode','__pycache__','logs','README','samples','LICENSE']) # copy source code
    return embeddings_path, sample_path, source_code_save_path


# Evaluation

In [None]:
def cal_confusion_matrix(y_true, y_pred_no_thresh, thresh, img_path_list):
    pred_thresh = []
    false_n = []
    false_p = []
    for i in range(len(y_pred_no_thresh)):
        if y_pred_no_thresh[i] > thresh:
            pred_thresh.append(1)
            if y_true[i] == 0:
                false_p.append(img_path_list[i])
        else:
            pred_thresh.append(0)
            if y_true[i] == 1:
                false_n.append(img_path_list[i])

    cm = confusion_matrix(y_true, pred_thresh)
    print(cm)
    print('false positive')
    print(false_p)
    print('false negative')
    print(false_n)
    return cm, false_p, false_n
    

# Sampling

In [None]:
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Returns points that minimizes the maximum distance of any point to a center.
Implements the k-Center-Greedy method in
Ozan Sener and Silvio Savarese.  A Geometric Approach to Active Learning for
Convolutional Neural Networks. https://arxiv.org/abs/1708.00489 2017
Distance metric defaults to l2 distance.  Features used to calculate distance
are either raw features or if a model has transform method then uses the output
of model.transform(X).
Can be extended to a robust k centers algorithm that ignores a certain number of
outlier datapoints.  Resulting centers are solution to multiple integer program.
"""

from sklearn.metrics import pairwise_distances
import abc
import numpy as np

class SamplingMethod(object):
  __metaclass__ = abc.ABCMeta

  @abc.abstractmethod
  def __init__(self, X, y, seed, **kwargs):
    self.X = X
    self.y = y
    self.seed = seed

  def flatten_X(self):
    shape = self.X.shape
    flat_X = self.X
    if len(shape) > 2:
      flat_X = np.reshape(self.X, (shape[0],np.product(shape[1:])))
    return flat_X


  @abc.abstractmethod
  def select_batch_(self):
    return

  def select_batch(self, **kwargs):
    return self.select_batch_(**kwargs)

  def to_dict(self):
    return None

class kCenterGreedy(SamplingMethod):

  def __init__(self, X, y, seed, metric='euclidean'):
    self.X = X
    self.y = y
    self.flat_X = self.flatten_X()
    self.name = 'kcenter'
    self.features = self.flat_X
    self.metric = metric
    self.min_distances = None
    self.n_obs = self.X.shape[0]
    self.already_selected = []

  def update_distances(self, cluster_centers, only_new=True, reset_dist=False):
    """Update min distances given cluster centers.
    Args:
      cluster_centers: indices of cluster centers
      only_new: only calculate distance for newly selected points and update
        min_distances.
      rest_dist: whether to reset min_distances.
    """

    if reset_dist:
      self.min_distances = None
    if only_new:
      cluster_centers = [d for d in cluster_centers
                         if d not in self.already_selected]
    if cluster_centers:
      # Update min_distances for all examples given new cluster center.
      x = self.features[cluster_centers]
      dist = pairwise_distances(self.features, x, metric=self.metric)

      if self.min_distances is None:
        self.min_distances = np.min(dist, axis=1).reshape(-1,1)
      else:
        self.min_distances = np.minimum(self.min_distances, dist)

  def select_batch_(self, model, already_selected, N, **kwargs):
    """
    Diversity promoting active learning method that greedily forms a batch
    to minimize the maximum distance to a cluster center among all unlabeled
    datapoints.
    Args:
      model: model with scikit-like API with decision_function implemented
      already_selected: index of datapoints already selected
      N: batch size
    Returns:
      indices of points selected to minimize distance to cluster centers
    """

    try:
      # Assumes that the transform function takes in original data and not
      # flattened data.
      print('Getting transformed features...')
      if model:
        self.features = model.transform(self.X)
      else:
        self.features = self.X
      
      print('Calculating distances...')
      self.update_distances(already_selected, only_new=False, reset_dist=True)
    except:
      print('Using flat_X as features.')
      self.update_distances(already_selected, only_new=True, reset_dist=False)

    new_batch = []

    for _ in tqdm(range(N)):
      if self.already_selected is None:
        # Initialize centers with a randomly selected datapoint
        ind = np.random.choice(np.arange(self.n_obs))
      else:
        ind = np.argmax(self.min_distances)
      # New examples should not be in already selected since those points
      # should have min_distance of zero to a cluster center.
      assert ind not in already_selected

      self.update_distances([ind], only_new=True, reset_dist=False)
      new_batch.append(ind)

    self.already_selected = already_selected
    return new_batch

# EfficientNetLayerPropagation

In [None]:
class ENLP(LightningModule):

    def __init__(self, hparams):

        self.printed_embedding = False

        super().__init__()

        # enable Lightning to store all the provided arguments within the self.hparams attribute
        self.save_hyperparameters(hparams)

        # Initialize Model
        self.init_model()

        # Define loss
        self.criterion = torch.nn.MSELoss(reduction='sum')

        # Initialize Results
        self.init_results_list()

        # Initialize Transformer for reshaping images
        self.init_transformers()
        
    def init_model(self, pretrained=True):
        
        self.features_extraction = {}

        # Load the dataset
        torch.hub._validate_not_a_forked_repo=lambda a,b,c: True # bug workaround to load resnet correctly
        self.model = models.efficientnet_b7(pretrained=True)#torch.hub.load(args.pytorch_version, args.model_name, pretrained=pretrained)

        # turn of gradient computation for 
        for param in self.model.parameters():
            param.requires_grad = False

        # necessary caller function that appends the weights from the layers to the features
        def hook_t_0(module, input, output):
            self.features_extraction[0].append(output)
        def hook_t_1(module, input, output):
            self.features_extraction[1].append(output)
        def hook_t_2(module, input, output):
            self.features_extraction[2].append(output)
        def hook_t_3(module, input, output):
            self.features_extraction[3].append(output)
        def hook_t_4(module, input, output):
            self.features_extraction[4].append(output)
        def hook_t_5(module, input, output):
            self.features_extraction[5].append(output)
        def hook_t_6(module, input, output):
            self.features_extraction[6].append(output)
        def hook_t_7(module, input, output):
            self.features_extraction[7].append(output)
        def hook_t_8(module, input, output):
            self.features_extraction[8].append(output)

        #layers = [int(x) for x in args.layers_to_process.split('_')]
        self.model._modules["features"][0]._modules['0'].register_forward_hook(hook_t_0)
        self.model._modules["features"][1][-1]._modules['block'][-1]._modules['0'].register_forward_hook(hook_t_1)
        self.model._modules["features"][2][-1]._modules['block'][-1]._modules['0'].register_forward_hook(hook_t_2)
        self.model._modules["features"][3][-1]._modules['block'][-1]._modules['0'].register_forward_hook(hook_t_3)
        self.model._modules["features"][4][-1]._modules['block'][-1]._modules['0'].register_forward_hook(hook_t_4)
        self.model._modules["features"][5][-1]._modules['block'][-1]._modules['0'].register_forward_hook(hook_t_5)
        self.model._modules["features"][6][-1]._modules['block'][-1]._modules['0'].register_forward_hook(hook_t_6)
        self.model._modules["features"][7][-1]._modules['block'][-1]._modules['0'].register_forward_hook(hook_t_7)
        self.model._modules["features"][8]._modules['0'].register_forward_hook(hook_t_8)
      


    def init_transformers(self):

        # Using the mean and std of Imagenet is a common practice. They are calculated based on millions of images. 
        # If you want to train from scratch on your own dataset, you can calculate the new mean and std.
        mean_train, std_train = np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225])

        mean_inv = (mean_train*(-1)/std_train).tolist()
        std_inv = (1/std_train).tolist()

        self.data_transforms = transforms.Compose([
                                transforms.Resize((args.load_size, args.load_size), Image.ANTIALIAS),
                                transforms.ToTensor(),
                                transforms.CenterCrop(args.input_size),
                                transforms.Normalize(mean=mean_train.tolist(), std=std_train.tolist())])
        self.gt_transforms = transforms.Compose([
                                transforms.Resize((args.load_size, args.load_size)),
                                transforms.ToTensor(),
                                transforms.CenterCrop(args.input_size)])

        self.inv_normalize = transforms.Normalize(mean=mean_inv, std=std_inv)

    def init_results_list(self):
        self.gt_list_px_lvl = []
        self.pred_list_px_lvl = []
        self.gt_list_img_lvl = []
        self.pred_list_img_lvl = []
        self.img_path_list = [] 
        self.all_embedding_test = []    

    def forward(self, x_t):
        layers = [int(x) for x in args.layers_to_process.split('_')]
        self.features_extraction = {}
        for ln in range(0, 9):
          self.features_extraction[ln] = []
        _ = self.model(x_t)
        return self.features_extraction

    def train_dataloader(self):
        image_datasets = MVTecDataset(root=os.path.join(args.dataset_path),
                                      category=args.category,
                                      transform=self.data_transforms,
                                      gt_transform=self.gt_transforms,
                                      phase='train',
                                      load_complete_data=args.load_complete_data)
        train_loader = DataLoader(image_datasets,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=4,
                                  pin_memory=True)
        return train_loader

    def test_dataloader(self):
        test_datasets = MVTecDataset(root=os.path.join(args.dataset_path),
                                     category=args.category,
                                     transform=self.data_transforms,
                                     gt_transform=self.gt_transforms,
                                     phase='test',
                                     load_complete_data=args.load_complete_data)
        test_loader = DataLoader(test_datasets,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=4, 
                                 pin_memory=True) # only work on batch_size=1, now.
        return test_loader

    def configure_optimizers(self):
        # No optimization needed
        return None

    def on_train_start(self):
        self.model.eval() # to stop running_var move (maybe not critical)
        self.embedding_dir_path, self.sample_path, self.source_code_save_path = prep_dirs(self.logger.log_dir)
        self.overall_embeddings = {}
        #layers = [int(x) for x in args.layers_to_process.split('_')]
        for ln in range(0,9):
          self.overall_embeddings[ln] = []
    
    def on_test_start(self):
        self.init_results_list()
        self.overall_embeddings_test = {}
        for ln in range(0,9):
          self.overall_embeddings_test[ln] = []
        self.embedding_dir_path, self.sample_path, self.source_code_save_path = prep_dirs(self.logger.log_dir)
                
    def training_step(self, batch, batch_idx, aps=[3, 1, 1]): # save locally aware patch features
        x, _, _, file_name, _ = batch
        features_extraction = self(x)

        embedding_per_layer = {}

        # iterate through layers
        for layer in features_extraction:
          embedding_per_layer[layer] = []
          for feature in features_extraction[layer]:
              m = torch.nn.AvgPool2d(aps[0], aps[1], aps[2])
              embedding_per_layer[layer].append(m(feature))
        
        for layer in embedding_per_layer:
          embeddings = embedding_per_layer[layer]
          if len(embeddings)==1:
            embedding=embeddings[0].cpu().detach().numpy()

          self.overall_embeddings[layer].append(np.array(embedding))

    def training_epoch_end(self, outputs): 
        print('Training end')
        if args.category not in os.listdir('../drive/MyDrive/data/efficientnet_embeddings_b7/'):
          os.mkdir(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{args.category}')
        print('start embedding')
        filename_embedding = path.join(mkdtemp(), 'embedding.dat')
        self.overall_embeddings_corsets = {}

        for layer in self.overall_embeddings:
          for layer in self.overall_embeddings:
            if f'layer_{layer}' not in os.listdir(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{args.category}'):
              os.mkdir(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{args.category}/layer_{layer}/')
            if not os.path.isfile(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{args.category}/layer_{layer}/embedding_train_projected.pickle'):
              with open(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{args.category}/layer_{layer}/embedding_train_projected.pickle', 'wb') as f:
                pickle.dump({'embedding': self.overall_embeddings[layer]}, f) 

    def test_step(self, batch, batch_idx, aps=[3, 1, 1]):
        x, gt, label, file_name, x_type = batch
        # extract embedding
        features_extraction = self(x)
        embedding_per_layer = {}
        # iterate through layers
        
        for layer in features_extraction:
          embedding_per_layer[layer] = []
          for feature in features_extraction[layer]:
              m = torch.nn.AvgPool2d(aps[0], aps[1], aps[2])
              embedding_per_layer[layer].append(m(feature))
        
        for layer in embedding_per_layer:
          embeddings = embedding_per_layer[layer]
          if len(embeddings)==1:
            embedding=embeddings[0].cpu().detach().numpy()

          self.overall_embeddings_test[layer].append(np.array(embedding))
      
        self.gt_list_img_lvl.append(label.cpu().numpy()[0])
        self.img_path_list.extend(file_name)

    def test_epoch_end(self, outputs):
      for layer in self.overall_embeddings_test:
        with open(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{args.category}/layer_{layer}/embedding_test.pickle', 'wb') as f:
          pickle.dump({'embedding': self.overall_embeddings_test[layer],
                        'labels': self.gt_list_img_lvl}, f)
        with open(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{args.category}/img_paths.pickle', 'wb') as f:
          pickle.dump({'img_paths': self.img_path_list}, f)

        # with open(os.path.join(self.embedding_dir_path, 'embedding_overall_test.pickle'), 'wb') as f:
        #     pickle.dump({'embedding': self.overall_embeddings_test,
        #                  'labels': self.gt_list_img_lvl,
        #                  'img_paths': self.img_path_list}, f)


# RUN

In [None]:
import os
import shutil
if not 'run_from_here' in os.getcwd():
  !mkdir run_from_here
  %cd run_from_here
else:
  print( os.getcwd())
def _empty_cache():
  torch.cuda.empty_cache()
  gc.collect()
  gc.collect()

In [None]:
  def get_args(name='carpet', layers=None):
    parser = argparse.ArgumentParser(description='ANOMALYDETECTION')
    parser.add_argument('--phase', choices=['train','test', 'train_test'], default='train_test')
    parser.add_argument('--dataset_path', default='../drive/MyDrive/data/mvtec_anomaly_detection')
    parser.add_argument('--category', default=None)
    parser.add_argument('--num_epochs', default=1)
    parser.add_argument('--batch_size', default=32)
    parser.add_argument('--load_size', default=256) # 256
    parser.add_argument('--input_size', default=224)
    parser.add_argument('--layers_to_process', default=None)
    parser.add_argument('--pytorch_version', default='NVIDIA/DeepLearningExamples:torchhub')#'pytorch/vision:v0.10.0')
    #parser.add_argument('--model_name', default='nvidia_efficientnet_b4')#'wide_resnet50_2')
    parser.add_argument('--coreset_sampling_ratio', default=0.1)
    parser.add_argument('--project_root_path', default='./')
    parser.add_argument('--result_path', default='../drive/MyDrive/data/mvtec_results/')
    parser.add_argument('--save_src_code', default=False)
    parser.add_argument('--save_anomaly_map', default=False)
    #parser.add_argument('--n_neighbors', type=int, default=9)
    parser.add_argument('--load_complete_data', default=False)
    parser.add_argument('--nearest_neighbors', type=int, default=9)
    parser.add_argument('--distance_batch_size', default=1000)
    
    args = parser.parse_args(['--category', name,
                              '--layers_to_process', layers])

    return args


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

_to_test = ['bottle', 'cable', 'capsule', 'carpet', 'grid', 
            'hazelnut', 'leather', 'metalnut',
            'pill', 'screw', 
            'tile', 'toothbrush', 'transistor', 'wood', 'zipper']
#_to_test = ['capsule']

for name in _to_test:

    layers = '0'#_2'
    args = get_args(name, layers)
    print(args.category)

    trainer = Trainer.from_argparse_args(args,
                                         default_root_dir=os.path.join(args.project_root_path, args.category),
                                         max_epochs=args.num_epochs,
                                         gpus=1)#, tpu_cores=[4])) #, check_val_every_n_epoch=args.val_freq,  num_sanity_val_steps=0) # ,fast_dev_run=True)
    model = ENLP(hparams=args)
    if args.phase == 'train':
        trainer.fit(model)
        _empty_cache()
    elif args.phase == 'test':
        trainer.test(model)
        _empty_cache()
    elif args.phase == 'train_test':
      trainer.fit(model)
      _empty_cache()
      trainer.test(model)
    _empty_cache()
    if name in os.listdir():
      shutil.rmtree(name)


# Evaluation

In [None]:
import os
if not 'run_from_here' in os.getcwd():
  !mkdir run_from_here
  %cd run_from_here
else:
  print( os.getcwd())

def _empty_cache():
  torch.cuda.empty_cache()
  gc.collect()
  gc.collect()

contamination = 0.01
_to_test = ['bottle', 'cable', 'capsule', 'carpet', 'grid',
            'hazelnut', 'leather', 'metal_nut',
            'pill', 'screw', 'tile', 'toothbrush', 'transistor', 'wood', 'zipper']

In [None]:
for category in _to_test:

  print(category)
  # Load embeddings
  results = {category: {}}

  for layer in range(5,9):
    print(f'\tLayer: {layer}', end=' ')

    embeddings_path = os.path.join('embeddings', category)
    with open(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{category}/layer_{layer}/embedding_train_projected.pickle', 'rb') as f:
      embeddings_train = pickle.load(f)['embedding']

    with open(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{category}/layer_{layer}/embedding_test.pickle', 'rb') as f:
      p = pickle.load(f)
      embeddings_test = p['embedding']
      labels = p['labels']

    # Prepare training embeddings
    embeddings_train_layer = np.concatenate(embeddings_train, axis=0)
    X_train = np.reshape(embeddings_train_layer, (embeddings_train_layer.shape[0], -1))
    print(f'reshaped', end='')
    
    # prepare test embeddings
    embedding_test_layer = np.concatenate(embeddings_test, axis=0)
    X_test = np.reshape(embedding_test_layer, (embedding_test_layer.shape[0], -1))
    print(f'x2', end=' ')
    m= ABOD(contamination=contamination, method='fast', n_neighbors=10)
    m.fit(X_train)
    print('- fitted', end=' ')

    y_test_scores = m.decision_function(X_test)
    print('- tested', end=' ')
  
    img_auc = roc_auc_score(labels, y_test_scores)
    score_str = f"- AUC-ROC: {img_auc}"
    print(score_str)
    results[category][layer] = img_auc
     

In [None]:
results = {'n_neighbors': [10, 'default']}
coreset_sampling_ratio = 0.3

for category in _to_test:
  print(category)
  results[category] = {}

  for layers in ['5_6', '5_7']:
    # Load embeddings
    print(f'\tLayer: {layers}', end=' ')
    results[category][layers] = {}

    embeddings_path = os.path.join('embeddings', category)
    embeddings_train_multi = []
    embeddings_test_multi = []

    for layer in layers.split('_'):
      with open(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{category}/layer_{layer}/embedding_train_projected.pickle', 'rb') as f:
        embeddings_train = pickle.load(f)['embedding']
        embeddings_train_layer = np.concatenate(embeddings_train, axis=0)
        embedding_train_reshaped = np.reshape(embeddings_train_layer, (embeddings_train_layer.shape[0], -1))
        embeddings_train_multi.append(embedding_train_reshaped)

      with open(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{category}/layer_{layer}/embedding_test.pickle', 'rb') as f:
        p = pickle.load(f)
        embeddings_test = p['embedding']
        embedding_test_layer = np.concatenate(embeddings_test, axis=0)
        embedding_test_reshaped = np.reshape(embedding_test_layer, (embedding_test_layer.shape[0], -1))
        embeddings_test_multi.append(embedding_test_reshaped)
        labels = p['labels']

    # Prepare training embeddings
    # Prepare training embeddings
    embeddings_train_layer = np.concatenate(embeddings_train_multi, axis=1)

    randomprojector = SparseRandomProjection(n_components='auto', eps=0.9)
    randomprojector.fit(embeddings_train_layer)
    
    # Coreset Subsampling
    selector = kCenterGreedy(embeddings_train_layer,0,0) #TODO MAKE THIS CUSTOMIZABLE
    selected_idx = selector.select_batch(model=randomprojector, already_selected=[], N=int(embeddings_train_layer.shape[0]*coreset_sampling_ratio))
    embedding_coreset = embeddings_train_layer[selected_idx]

    X_train = embedding_coreset
    print(f'projected - {X_train.shape}', end='')
    
    # prepare test embeddings
    X_test = np.concatenate(embeddings_test_multi, axis=1)
    print(f'- {X_test.shape}', end = ' ')

    print(f'- fit')
    for n in results['n_neighbors']:
      print(f'N_neighbors: {n}', end=' ')
      if n == 'default':
        m= ABOD(contamination=contamination, method='default')
      else:
        m= ABOD(contamination=contamination, method='fast', n_neighbors=n)
      m.fit(X_train)
      print('- fitted', end=' ')

      y_test_scores = m.decision_function(X_test)
      print('- tested', end=' ')

      img_auc = roc_auc_score(labels, y_test_scores)
      score_str = f"- AUC-ROC: {img_auc}"
      print(score_str)
      results[category][layers][n] = img_auc
      with open(f'../drive/MyDrive/data/efficientnet_evaluation/abod/results_projected.pickle', 'wb') as f:
        pickle.dump(results, f) 

In [None]:
results = {'n_neighbors': [10]}#, 'default']}
with_random_projection = False
coreset_sampling_ratio = 0.1

for category in _to_test:
  print(category)
  results[category] = {}

  for layers in ['5', '6']:
    # Load embeddings
    print(f'\tLayer: {layers}', end=' ')
    results[category][layers] = {}

    embeddings_path = os.path.join('embeddings', category)
    embeddings_train_multi = []
    embeddings_test_multi = []

    for layer in layers.split('_'):
      with open(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{category}/layer_{layer}/embedding_train_projected.pickle', 'rb') as f:
        embeddings_train = pickle.load(f)['embedding']
        embeddings_train_layer = np.concatenate(embeddings_train, axis=0)
        embedding_train_reshaped = np.reshape(embeddings_train_layer, (-1, embeddings_train_layer.shape[1]))
        embeddings_train_multi.append(embedding_train_reshaped)

      # prepare test embeddings
      for layer in layers.split('_'):
        with open(f'../drive/MyDrive/data/efficientnet_embeddings_b7/{category}/layer_{layer}/embedding_test.pickle', 'rb') as f:
          p = pickle.load(f)
          embeddings_test = p['embedding']
          embedding_test_layer = np.concatenate(embeddings_test, axis=0)
          labels = p['labels']
    # Prepare training embeddings
    embeddings_train_layer = np.concatenate(embeddings_train_multi, axis=1)
    
    if with_random_projection:
      randomprojector = SparseRandomProjection(n_components=embeddings_train_layer.shape[1])#'auto', eps=0.9)
      randomprojector.fit(embeddings_train_layer)
    else:
      randomprojector = None
    
    # Coreset Subsampling
    #selector = kCenterGreedy(embeddings_train_layer,0,0) #TODO MAKE THIS CUSTOMIZABLE
    #selected_idx = selector.select_batch(model=randomprojector, already_selected=[], N=int(embeddings_train_layer.shape[0]*coreset_sampling_ratio))
    embedding_coreset = embeddings_train_layer#[selected_idx]

    X_train = embedding_coreset
    print(f'projected - {X_train.shape}', end='')
    
    print(f'- fit')
    for n in results['n_neighbors']:
      print(f'N_neighbors: {n}', end=' ')
      if n == 'default':
        m= ABOD(contamination=contamination, method='default')
      else:
        m= ABOD(contamination=contamination, method='fast', n_neighbors=n)
      m.fit(X_train)
      print('- fitted', end=' ')
      y_test_scores = []
      for i in range(embedding_test_layer.shape[0]):
        e_i = embedding_test_layer[i:i+1, :]
        embedding_test_reshaped = np.reshape(e_i, (-1, e_i.shape[1]))
        X_test = embedding_test_reshaped
        if i==0:
          print(f'- {X_test.shape}', end = ' ')
        score_patches = m.decision_function(X_test)
      
        #N_b = score_patches[np.argmax(score_patches[:,0])]
        #w = (1 - (np.max(np.exp(N_b))/np.sum(np.exp(N_b))))
        #score = w*max(score_patches[:,0]) # Image-level score
        w = (1 - (np.max(np.exp(score_patches))/np.sum(np.exp(score_patches))))
        score = w*max(score_patches) # Image-level score
        y_test_scores.append(score)

      print('- tested', end=' ')
      img_auc = roc_auc_score(labels, y_test_scores)
      score_str = f"- AUC-ROC: {img_auc}"
      print(score_str)
      results[category][layers][n] = img_auc
      with open(f'../drive/MyDrive/data/efficientnet_evaluation/abod/results_projected_patches.pickle', 'wb') as f:
        pickle.dump(results, f) 
    print('---------------------------------')

In [None]:

#N_b = np.min(score_patches)
w = (1 - (np.min(np.exp(score_patches))/np.sum(np.exp(score_patches))))
score = w*min(score_patches) # Image-level score
score

In [None]:
plt.figure(figsize=(16,4))
plt.bar([i for i in range(len(y_test_scores))], y_test_scores)

In [None]:
import pandas as pd
import pickle
with open('drive/MyDrive/data/efficientnet_evaluation/abod/results.pickle', 'rb') as f:
  results = pickle.load(f)

In [None]:
results_df = {'category': []}

for category in results:
  if category == 'n_neighbors':
    continue
  results_df['category'].append(category)
  for layer in results[category]:
    for n in results[category][layer]:
      if f'{layer}__{n}' not in results_df:
        results_df[f'{layer}__{n}'] = []
      results_df[f'{layer}__{n}'].append(results[category][layer][n])
df = pd.DataFrame(results_df).set_index('category')
df.loc['mean'] = df.mean()
df.sort_values('mean', axis=1, ascending=False)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(60,15))
plt.title('Evaluation Abod layer combinnation', fontdict={'fontsize': 40})
ax = sns.heatmap(df, annot=True, cmap="YlGnBu")
plt.savefig('drive/MyDrive/data/efficientnet_evaluation/abod/efficient_net_abod_evaluation.png')
plt.show()