# Mounts and import

In [None]:
# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install --quiet "pyod==1.0.1" "pywin32-ctypes" "faiss-gpu==1.7.1.post3" "p_tqdm"

In [None]:
from tqdm.notebook import tqdm

from sklearn.metrics import confusion_matrix, roc_auc_score
import pickle
from sklearn.random_projection import SparseRandomProjection
from sklearn.neighbors import NearestNeighbors
from scipy.ndimage import gaussian_filter

import tensorflow as tf

from pyod.models.hbos import HBOS
from pyod.models.copod import COPOD
#from pyod.models.auto_encoder_torch import AutoEncoder
from pyod.models.cblof  import CBLOF
from pyod.models.cof  import COF
from pyod.models.deep_svdd  import DeepSVDD
from pyod.models.iforest  import IForest
from pyod.models.ecod import ECOD
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.pca  import PCA
from pyod.models.rod import ROD
from pyod.models.abod import ABOD

from p_tqdm import p_map

from tempfile import mkdtemp
import os
import os.path as path
import numpy as np
import gc

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

# Manual Autoencoder definition

In [None]:
from __future__ import division
from __future__ import print_function

import torch
from torch import nn

import numpy as np
from numpy.testing import assert_almost_equal
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted

from pyod.models.base import BaseDetector
from pyod.utils.torch_utility import get_activation_by_name
from pyod.utils.stat_models import pairwise_distances_no_broadcast

class PyODDataset(torch.utils.data.Dataset):
    """PyOD Dataset class for PyTorch Dataloader
    """

    def __init__(self, X, y=None, mean=None, std=None):
        super(PyODDataset, self).__init__()
        self.X = X
        self.mean = mean
        self.std = std

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        sample = self.X[idx, :]

        if self.mean is not None and self.std is not None:
            sample = (sample - self.mean) / self.std
            # assert_almost_equal (0, sample.mean(), decimal=1)

        return torch.from_numpy(sample), idx



class inner_autoencoder(nn.Module):
    def __init__(self,
                 n_features,
                 hidden_neurons=[128, 64],
                 dropout_rate=0.2,
                 batch_norm=True,
                 hidden_activation='relu'):
        super(inner_autoencoder, self).__init__()
        self.n_features = n_features
        self.dropout_rate = dropout_rate
        self.batch_norm = batch_norm
        self.hidden_activation = hidden_activation

        self.activation = get_activation_by_name(hidden_activation)

        self.layers_neurons_ = [self.n_features, *hidden_neurons]
        self.layers_neurons_decoder_ = self.layers_neurons_[::-1]
        self.encoder = nn.Sequential()
        self.decoder = nn.Sequential()

        for idx, layer in enumerate(self.layers_neurons_[:-1]):
            if batch_norm:
                self.encoder.add_module("batch_norm" + str(idx),
                                        nn.BatchNorm1d(
                                            self.layers_neurons_[idx]))
            self.encoder.add_module("linear" + str(idx),
                                    torch.nn.Linear(self.layers_neurons_[idx],
                                                    self.layers_neurons_[
                                                        idx + 1]))
            self.encoder.add_module(self.hidden_activation + str(idx),
                                    self.activation)
            self.encoder.add_module("dropout" + str(idx),
                                    torch.nn.Dropout(dropout_rate))

        for idx, layer in enumerate(self.layers_neurons_[:-1]):
            if batch_norm:
                self.decoder.add_module("batch_norm" + str(idx),
                                        nn.BatchNorm1d(
                                            self.layers_neurons_decoder_[idx]))
            self.decoder.add_module("linear" + str(idx), torch.nn.Linear(
                self.layers_neurons_decoder_[idx],
                self.layers_neurons_decoder_[idx + 1]))
            self.encoder.add_module(self.hidden_activation + str(idx),
                                    self.activation)
            self.decoder.add_module("dropout" + str(idx),
                                    torch.nn.Dropout(dropout_rate))
    def forward(self, x):
        # we could return the latent representation here after the encoder as the latent representation
        x = self.encoder(x)
        x = self.decoder(x)
        return x

class AutoEncoder(BaseDetector):
    """Auto Encoder (AE) is a type of neural networks for learning useful data
    representations in an unsupervised manner. Similar to PCA, AE could be used
    to detect outlying objects in the data by calculating the reconstruction
    errors. See :cite:`aggarwal2015outlier` Chapter 3 for details.

    Notes
    -----
        This is the PyTorch version of AutoEncoder. See auto_encoder.py for
        the TensorFlow version.

        The documentation is not finished!

    Parameters
    ----------
    hidden_neurons : list, optional (default=[64, 32])
        The number of neurons per hidden layers. So the network has the
        structure as [n_features, 64, 32, 32, 64, n_features]

    hidden_activation : str, optional (default='relu')
        Activation function to use for hidden layers.
        All hidden layers are forced to use the same type of activation.
        See https://keras.io/activations/

    batch_norm : boolean, optional (default=True)
        Whether to apply Batch Normalization,
        See https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html

    loss : str or obj, optional (default=torch.nn.MSELoss)
        String (name of objective function) or objective function.
        NOT SUPPORT FOR CHANGE YET.

    optimizer : str, optional (default='adam')
        String (name of optimizer) or optimizer instance.
        NOT SUPPORT FOR CHANGE YET.

    epochs : int, optional (default=100)
        Number of epochs to train the model.

    batch_size : int, optional (default=32)
        Number of samples per gradient update.

    dropout_rate : float in (0., 1), optional (default=0.2)
        The dropout to be used across all layers.

    l2_regularizer : float in (0., 1), optional (default=0.1)
        The regularization strength of activity_regularizer
        applied on each layer. By default, l2 regularizer is used. See
        https://keras.io/regularizers/

    validation_size : float in (0., 1), optional (default=0.1)
        The percentage of data to be used for validation.

    preprocessing : bool, optional (default=True)
        If True, apply standardization on the data.

    verbose : int, optional (default=1)
        Verbosity mode.

        - 0 = silent
        - 1 = progress bar
        - 2 = one line per epoch.

        For verbose >= 1, model summary may be printed.

    random_state : random_state: int, RandomState instance or None, optional
        (default=None)
        If int, random_state is the seed used by the random
        number generator; If RandomState instance, random_state is the random
        number generator; If None, the random number generator is the
        RandomState instance used by `np.random`.

    contamination : float in (0., 0.5), optional (default=0.1)
        The amount of contamination of the data set, i.e.
        the proportion of outliers in the data set. When fitting this is used
        to define the threshold on the decision function.

    Attributes
    ----------
    encoding_dim_ : int
        The number of neurons in the encoding layer.

    compression_rate_ : float
        The ratio between the original feature and
        the number of neurons in the encoding layer.

    model_ : Keras Object
        The underlying AutoEncoder in Keras.

    history_: Keras Object
        The AutoEncoder training history.

    decision_scores_ : numpy array of shape (n_samples,)
        The outlier scores of the training data.
        The higher, the more abnormal. Outliers tend to have higher
        scores. This value is available once the detector is
        fitted.

    threshold_ : float
        The threshold is based on ``contamination``. It is the
        ``n_samples * contamination`` most abnormal samples in
        ``decision_scores_``. The threshold is calculated for generating
        binary outlier labels.

    labels_ : int, either 0 or 1
        The binary labels of the training data. 0 stands for inliers
        and 1 for outliers/anomalies. It is generated by applying
        ``threshold_`` on ``decision_scores_``.
    """

    def __init__(self,
                 hidden_neurons=None,
                 hidden_activation='relu',
                 batch_norm=True,
                 # loss='mse',
                 # optimizer='adam',
                 learning_rate=1e-3,
                 epochs=100,
                 batch_size=32,
                 dropout_rate=0.2,
                 # l2_regularizer=0.1,
                 weight_decay=1e-5,
                 # validation_size=0.1,
                 preprocessing=True,
                 loss_fn=None,
                 # verbose=1,
                 # random_state=None,
                 contamination=0.1,
                 device=None):
        super(AutoEncoder, self).__init__(contamination=contamination)
        self.hidden_neurons = hidden_neurons
        self.hidden_activation = hidden_activation
        self.batch_norm = batch_norm
        self.learning_rate = learning_rate

        self.epochs = epochs
        self.batch_size = batch_size

        self.dropout_rate = dropout_rate
        self.weight_decay = weight_decay
        self.preprocessing = preprocessing

        if loss_fn is None:
            self.loss_fn = torch.nn.MSELoss()

        if device is None:
            self.device = torch.device(
                "cuda:0" if torch.cuda.is_available() else "cpu")
        else:
            self.device = device

        # default values
        if self.hidden_neurons is None:
            self.hidden_neurons = [64, 32]

        # self.verbose = verbose

    # noinspection PyUnresolvedReferences
    def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        n_samples, n_features = X.shape[0], X.shape[1]

        # conduct standardization if needed
        if self.preprocessing:
            self.mean, self.std = np.mean(X, axis=0), np.std(X, axis=0)
            train_set = PyODDataset(X=X, mean=self.mean, std=self.std)

        else:
            train_set = PyODDataset(X=X)

        train_loader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=self.batch_size,
                                                   shuffle=True)

        # initialize the model
        self.model = inner_autoencoder(
            n_features=n_features,
            hidden_neurons=self.hidden_neurons,
            dropout_rate=self.dropout_rate,
            batch_norm=self.batch_norm,
            hidden_activation=self.hidden_activation)

        # move to device and print model information
        self.model = self.model.to(self.device)
        #print(self.model)

        # train the autoencoder to find the best one
        self._train_autoencoder(train_loader)

        self.model.load_state_dict(self.best_model_dict)
        self.decision_scores_ = self.decision_function(X)

        self._process_decision_scores()
        return self


    def _train_autoencoder(self, train_loader):
        """Internal function to train the autoencoder

        Parameters
        ----------
        train_loader : torch dataloader
            Train data.
        """
        optimizer = torch.optim.Adam(
            self.model.parameters(), lr=self.learning_rate,
            weight_decay=self.weight_decay)

        self.best_loss = float('inf')
        self.best_model_dict = None

        for epoch in tqdm(range(self.epochs)):
            overall_loss = []
            for data, data_idx in train_loader:
                data = data.to(self.device).float()
                loss = self.loss_fn(data, self.model(data))

                self.model.zero_grad()
                loss.backward()
                optimizer.step()
                overall_loss.append(loss.item())
            #print('epoch {epoch}: training loss {train_loss} '.format(
            #    epoch=epoch, train_loss=np.mean(overall_loss)))

            # track the best model so far
            if np.mean(overall_loss) <= self.best_loss:
                # print("epoch {ep} is the current best; loss={loss}".format(ep=epoch, loss=np.mean(overall_loss)))
                self.best_loss = np.mean(overall_loss)
                self.best_model_dict = self.model.state_dict()

    def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['model', 'best_model_dict'])
        X = check_array(X)

        # note the shuffle may be true but should be False
        if self.preprocessing:
            dataset = PyODDataset(X=X, mean=self.mean, std=self.std)
        else:
            dataset = PyODDataset(X=X)

        dataloader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=self.batch_size,
                                                 shuffle=False)
        # enable the evaluation mode
        self.model.eval()

        # construct the vector for holding the reconstruction error
        outlier_scores = np.zeros([X.shape[0], ])
        with torch.no_grad():
            for data, data_idx in dataloader:
                data_cuda = data.to(self.device).float()
                # this is the outlier score
                outlier_scores[data_idx] = pairwise_distances_no_broadcast(
                    data, self.model(data_cuda).cpu().numpy())

        return outlier_scores

    def _pairwise_distances_1(self, X, Y):  # pragma: no cover
        """Internal function for calculating the distance with numba. Do not use.

        Parameters
        ----------
        X : array of shape (n_samples, n_features)
            First input samples

        Y : array of shape (n_samples, n_features)
            Second input samples

        Returns
        -------
        distance : array of shape (n_samples, n_features)

        """
        r = np.abs(Y - X)
        return r


    def decision_function_pixel_lvl(self, X):

        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['model', 'best_model_dict'])
        X = check_array(X)

        # note the shuffle may be true but should be False
        if self.preprocessing:
            dataset = PyODDataset(X=X, mean=self.mean, std=self.std)
        else:
            dataset = PyODDataset(X=X)

        dataloader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=self.batch_size,
                                                 shuffle=False)
        # enable the evaluation mode
        self.model.eval()

        # construct the vector for holding the reconstruction error
        outlier_scores = np.zeros((X.shape[0], X.shape[1]))
        with torch.no_grad():
            for data, data_idx in dataloader:
                data_cuda = data.to(self.device).float()
                # this is the outlier score
                outlier_scores[data_idx, :] = self._pairwise_distances_1(
                    data.cpu().numpy(), self.model(data_cuda).cpu().numpy())

        return outlier_scores

# Efficientnet - Image level

In [None]:
def complete_model_eval_per_layer(embeddings_path, save_path, contamination=0.01, n_neighbors=9):

  _to_test = [
              'bottle', 'cable', 'capsule',
              'carpet', 'grid', 'hazelnut',
              'leather', 'metalnut', 'pill',
              'screw', 'tile', 'toothbrush',
              'transistor', 'wood', 'zipper'
              ]

  for category in _to_test:
    print('\n\n\n----------------------------------')
    print(category)

    for layer in range(1,9):
      print('\tLayer:', str(layer), end=' ')
      # create results folder
      results_save_folder_path = f'{save_path}/{category}/'
      if not os.path.exists(results_save_folder_path):
        os.makedirs(results_save_folder_path)

      results_save_path = f'{results_save_folder_path}results_layer_{layer}.pickle'
      if os.path.isfile(results_save_path):
        with open(results_save_path, 'rb') as f:
          p = pickle.load(f)
          print()
          for i in p:
            print('\t\t', i, ':', p[i])
        continue 

      # Load embeddings
      with open(f'{embeddings_path}/{category}/layer_{layer}/embedding_train_projected.pickle', 'rb') as f:
        embeddings_train = pickle.load(f)['embedding']
      with open(f'{embeddings_path}/{category}/layer_{layer}/embedding_test.pickle', 'rb') as f:
        p = pickle.load(f)
        embeddings_test = p['embedding']
        labels = p['labels']

      # Prepare training embeddings
      embeddings_train_layer = np.concatenate(embeddings_train, axis=0)
      X_train = np.reshape(embeddings_train_layer, (embeddings_train_layer.shape[0], -1))
      print(X_train.shape)
      print(f'--> train reshaped', end='')
      
      # prepare test embeddings
      embedding_test_layer = np.concatenate(embeddings_test, axis=0)
      X_test = np.reshape(embedding_test_layer, (embedding_test_layer.shape[0], -1))
      print('--> test reshaped')

      # initialize models:
      eval_models = {
          'ABOD': ABOD(contamination=contamination, n_neighbors=n_neighbors),
          'AutoEncoder': AutoEncoder(hidden_neurons=[1024, 512, 264, 64], epochs=150),
          'CBLOF': CBLOF(contamination=contamination),
          'COF': COF(contamination=contamination, n_neighbors=n_neighbors),
          'COPOD': COPOD(contamination=contamination),
          'DeepSVDD': DeepSVDD(verbose=0, hidden_neurons=[1024, 512, 264, 64], epochs=150),
          'ECOD': ECOD(contamination=contamination),
          'HBOS': HBOS(contamination=contamination, n_bins=50),
          'IForest': IForest(contamination=contamination),
          'PCA': PCA(contamination=contamination),
          }
      if layer < 2:
        del eval_models['AutoEncoder']
        del eval_models['DeepSVDD']

      results_per_layer = {}
      for model in eval_models:
        gc.collect()
        print('\t\t', model, ': ', end=' ')
        try:
          m = eval_models[model]
          m.fit(X_train)
          y_test_scores = m.decision_function(X_test)
          img_auc = roc_auc_score(labels, y_test_scores)
          print(img_auc)
          results_per_layer[model] = img_auc
        except Exception as e:
          print(e)
          results_per_layer[model] = np.nan


      with open(results_save_path, 'wb') as f:
        pickle.dump(results_per_layer, f) 

In [None]:
embeddings_path = 'drive/MyDrive/data/efficientnet_embeddings_b7'
save_path = 'drive/MyDrive/data/efficientnet_b7_results/model_eval'
complete_model_eval_per_layer(embeddings_path, save_path, contamination=0.001)

# EfficientNets with coreset sampling on pixel level

In [None]:
from tqdm.auto import tqdm

def reshape_embedding(embedding):
  embedding_list = []
  for k in range(embedding.shape[0]):
    for i in range(embedding.shape[2]):
      for j in range(embedding.shape[3]):
        embedding_list.append(embedding[k, :, i, j])
  return embedding_list

In [None]:
def complete_model_eval_per_layer_patch_level(embeddings_path, save_path, contamination=0.01, n_neighbors=9):

  _to_test = [
              'bottle', 'cable', 'capsule',
              'carpet', 'grid', 'hazelnut',
              'leather', 'metalnut', 'pill',
              'screw', 'tile', 'toothbrush',
              'transistor', 'wood', 'zipper'
              ]

  for category in _to_test:
    print('\n\n\n----------------------------------')
    print(category)

    for layer in range(1,9):
      print('\tLayer:', str(layer), end=' ')
      
      # create results folder
      results_save_folder_path = f'{save_path}/{category}/'
      if not os.path.exists(results_save_folder_path):
        os.makedirs(results_save_folder_path)

      results_save_path = f'{results_save_folder_path}results_layer_{layer}.pickle'
      if os.path.isfile(results_save_path):
        with open(results_save_path, 'rb') as f:
          p = pickle.load(f)
          maxi = 0
          best_algo = []
          for i in p:
            if p[i] > maxi:
              maxi = p[i]
              best_algo = [i]
            elif p[i] == maxi:
              best_algo.append(i)
            else:
              continue
        print('\t\t', best_algo, ':', maxi)
        continue 

      # Load embeddings
      # embeddings_path}/{category}/layer_{layer}/embedding_train_projected_greedy.pickle'
      f_name = f'{embeddings_path}/{category}/layer_{layer}/embedding_train_projected_greedy.pickle'
      with open(f_name, 'rb') as f:
        embeddings_train = pickle.load(f)['embedding']
      with open(f'{embeddings_path}/{category}/layer_{layer}/embedding_test.pickle', 'rb') as f:
        p = pickle.load(f)
        embeddings_test = p['embedding']
        labels = p['labels']

      # Prepare training embeddings
      X_train = embeddings_train
      print(X_train.shape, end=' - ')
      
      layers = [264, 128, 64, 32]
      print('Layers:', layers)

      # initialize models:
      eval_models = {
          'PATCH': k_nearest_neighbors(n_neighbors=9),
          'ABOD': ABOD(contamination=contamination, n_neighbors=n_neighbors),
          'AutoEncoder': AutoEncoder(hidden_neurons=layers, epochs=150),
          'CBLOF': CBLOF(contamination=contamination),
          'COF': COF(contamination=contamination, n_neighbors=n_neighbors),
          'COPOD': COPOD(contamination=contamination),
          'DeepSVDD': DeepSVDD(verbose=0, hidden_neurons=layers, epochs=150),
          'ECOD': ECOD(contamination=contamination),
          'HBOS': HBOS(contamination=contamination, n_bins=50),
          'IForest': IForest(contamination=contamination),
          'PCA': PCA(contamination=contamination),
          }

      results_per_layer = {}
      for model in eval_models:
        gc.collect()
        print('\t\t', model, ': ', end=' ')
        try:
          m = eval_models[model]
          m.fit(X_train)
          y_test_scores = []
          for img_emb in embeddings_test:
            X_test = np.array(reshape_embedding(np.array(img_emb)))
            score_patches = m.decision_function(X_test)

            score = max(score_patches)
            y_test_scores.append(score)

          img_auc = roc_auc_score(labels, y_test_scores)
          print(img_auc)
          results_per_layer[model] = img_auc
        except Exception as e:
          print(e)
          results_per_layer[model] = np.nan


      with open(results_save_path, 'wb') as f:
        pickle.dump(results_per_layer, f) 

In [None]:
embeddings_path = 'drive/MyDrive/data/efficientnet_embeddings_b7'
save_path = 'drive/MyDrive/data/efficientnet_b7_results/model_eval_patchlevel'
complete_model_eval_per_layer_patch_level(embeddings_path, save_path)

# EfficientNet B7 Patch multi optimization

In [None]:
import torch
import torch.nn.functional as F
import os

import faiss
from datetime import datetime
from tqdm.auto import tqdm

def reshape_embedding(embedding):
  embedding_list = []
  for k in range(embedding.shape[0]):
    for i in range(embedding.shape[2]):
      for j in range(embedding.shape[3]):
        embedding_list.append(embedding[k, :, i, j])
  return embedding_list

class k_nearest_neighbors():

  def __init__(self, n_neighbors=10):
    self.n_neighbors = n_neighbors

  def fit(self, X):
    self.index = faiss.IndexFlatL2(X.shape[1])
    self.index.add(X)
  
  def decision_function(self, X_test):
    score_patches, indices = self.index.search(X_test , k=self.n_neighbors)
    
    return score_patches, indices

def embedding_concat(x, y):
    # from https://github.com/xiahaifeng1995/PaDiM-Anomaly-Detection-Localization-master
    B, C1, H1, W1 = x.size()
    _, C2, H2, W2 = y.size()
    s = int(H1 / H2)
    x = F.unfold(x, kernel_size=s, dilation=1, stride=s)
    x = x.view(B, C1, -1, H2, W2)
    z = torch.zeros(B, C1 + C2, x.size(2), H2, W2)
    for i in range(x.size(2)):
        z[:, :, i, :, :] = torch.cat((x[:, :, i, :, :], y), 1)
    z = z.view(B, -1, H2 * W2)
    z = F.fold(z, kernel_size=s, output_size=(H1, W1), stride=s)

    return z

def complete_model_eval_per_layer_patch_level(embeddings_path, save_path, contamination=0.01, n_neighbors=9):

  _to_test = [
              'bottle', 'cable','capsule',
              'carpet', 'grid', 'hazelnut',
              'leather','metalnut', 'pill',
              'screw', 'tile', 'toothbrush',
              'transistor', 'wood', 'zipper'
              ]

  for category in _to_test:
    print('\n\n\n----------------------------------')
    print(category)

    layer_combis = [["3", "4"],
                    ["4", "5"],
                    ["3", "5"],
                    ["3", "4", "5"]
                    ]
    for layer_combi in layer_combis:
      print('\tLayer:', '_'.join(layer_combi))
      results_save_folder_path = f'{save_path}/{category}/'
      if not os.path.exists(results_save_folder_path):
        os.makedirs(results_save_folder_path)

      results_save_path = f'{results_save_folder_path}results_layer_{layer_combi}.pickle'
      
      # Load embeddings
      # embeddings_path}/{category}/layer_{layer}/embedding_train_projected_greedy.pickle'
      f_name = f'{embeddings_path}/{category}/layer_{"_".join(layer_combi)}/embedding_train_projected_greedy.pickle'
      with open(f_name, 'rb') as f:
        embeddings_train = pickle.load(f)['embedding']
      
      et = []
      for layer in layer_combi:
        with open(f'{embeddings_path}/{category}/layer_{layer}/embedding_test.pickle', 'rb') as f:
          p = pickle.load(f)
          et.append(p['embedding'])
          labels = p['labels']

      embeddings_test = []
      for ix, _ in enumerate(et[0]):
        embedding = embedding_concat(torch.FloatTensor(et[0][ix]),
                                    torch.FloatTensor(et[1][ix])
                                    )
        
        if len(layer_combi) == 3:
          embedding = embedding_concat(embedding, torch.FloatTensor(et[2][ix]))
        embeddings_test.append(np.array(reshape_embedding(embedding.cpu().detach().numpy())))

      # Prepare training embeddings
      X_train = embeddings_train

      nneighbors = [1, 5, 10, 15, 20, 25, 30]


      if not os.path.isfile(results_save_path):
        results_per_layer = {}
      else:
        with open(results_save_path, 'rb') as f:
          results_per_layer = pickle.load(f) 

      patch_scores = {}
      abod_scores = {}

      gc.collect()
      n_new = [n_n for n_n in nneighbors if f'PATCH_{n_n}' not in results_per_layer]
      if len(n_new) > 0:
        model_patch = k_nearest_neighbors(n_neighbors=max(n_new))
        model_patch.fit(X_train)
        for X_test in embeddings_test:
          gc.collect()
          score_patches, idx = model_patch.decision_function(X_test)

          for n_n in n_new:
            if f'PATCH_{n_n}' in results_per_layer:
              continue
            if n_n not in patch_scores:
              patch_scores[n_n] = {'y_test_scores_top1': [],
                                    'y_test_scores_top10': []}
            score_patches_i = score_patches[:, :n_n].mean(axis=1).flatten()
            score = max(score_patches_i)
            patch_scores[n_n]['y_test_scores_top1'].append(score)

            sorted_index_array = np.argsort(score_patches_i)
            sorted_array = score_patches_i[sorted_index_array]
            rslt = sorted_array[-10:]
            patch_scores[n_n]['y_test_scores_top10'].append(rslt.mean())

      for n_n in nneighbors:
        print(f'\t\tPatch_{n_n}: ', end='\t')
        if f'PATCH_{n_n}' in results_per_layer:
            print('Img_auc_top1:', results_per_layer[f'PATCH_{n_n}']['img_auc_top1'], 
                  ' -- Img_auc_top10:', results_per_layer[f'PATCH_{n_n}']['img_auc_top10'])
        else:
          img_auc_top1 = roc_auc_score(labels, patch_scores[n_n]['y_test_scores_top1'])
          print('Img_auc_top1:', img_auc_top1, end=' -- ')
          img_auc_top10 = roc_auc_score(labels, patch_scores[n_n]['y_test_scores_top10'])
          print('Img_auc_top10:', img_auc_top10)
          results_per_layer[f'PATCH_{n_n}'] = {'img_auc_top1': img_auc_top1,
                                                'img_auc_top10': img_auc_top10}

      with open(results_save_path, 'wb') as f:
        pickle.dump(results_per_layer, f) 

          ##### ABOD ####

      def f(x):
        X_test, m, label = x[0], x[1], x[2]
        score_patches = m.decision_function(X_test)
        return score_patches, label

      for n_n in nneighbors:
        if n_n == 1 or f'ABOD_{n_n}' in results_per_layer:
          continue

        if n_n not in abod_scores:
          abod_scores[n_n] = {'y_test_scores_top1': [],
                              'y_test_scores_top10': []}

        m = ABOD(contamination=contamination, n_neighbors=n_n)
        m.fit(X_train)

        _to_multi_process = []
        for iii, X_test in enumerate(embeddings_test):
          _to_multi_process.append((X_test, m, labels[iii]))

        result_m = p_map(f, _to_multi_process)
        labels_new = []
        for score_patches, label in result_m:
          labels_new.append(label)

          score = max(score_patches)
          abod_scores[n_n]['y_test_scores_top1'].append(score)

          sorted_index_array = np.argsort(score_patches)
          sorted_array = score_patches[sorted_index_array]
          rslt = sorted_array[-10:]
          abod_scores[n_n]['y_test_scores_top10'].append(rslt.mean())

      for n_n in nneighbors:
        if n_n == 1:
          continue
        print(f'\t\tABOD_{n_n}: ', end='\t')
        if f'ABOD_{n_n}' in results_per_layer:
          print('Img_auc_top1:', results_per_layer[f'ABOD_{n_n}']['img_auc_top1'], 
                ' -- Img_auc_top10:', results_per_layer[f'ABOD_{n_n}']['img_auc_top10'])
        else:
          img_auc_top1 = roc_auc_score(labels_new, abod_scores[n_n]['y_test_scores_top1'])
          print('Img_auc_top1:', img_auc_top1, end=' -- ')
          img_auc_top10 = roc_auc_score(labels_new, abod_scores[n_n]['y_test_scores_top10'])
          print('Img_auc_top10:', img_auc_top10)
          results_per_layer[f'ABOD_{n_n}'] = {'img_auc_top1': img_auc_top1,
                                              'img_auc_top10': img_auc_top10}

      with open(results_save_path, 'wb') as f:
        pickle.dump(results_per_layer, f) 

In [None]:
embeddings_path = 'drive/MyDrive/data/efficientnet_embeddings_b7'
save_path = 'drive/MyDrive/data/efficientnet_b7_results/model_eval_patchlevel_multi'
complete_model_eval_per_layer_patch_level(embeddings_path, save_path)

# EfficientNet-B3 Image Multi Optimization

In [None]:
import torch
import torch.nn.functional as F
import os

import faiss
from datetime import datetime
from tqdm.auto import tqdm

def complete_model_eval_per_layer_img_level(embeddings_path, save_path, contamination=0.01, n_neighbors=9):

  _to_test = [
              'bottle', 'cable',
              'capsule', 'carpet', 
              'grid', 'hazelnut',
              'leather','metalnut', 
              'pill', 'screw', 
              'tile', 'toothbrush',
              'transistor', 'wood', 'zipper'
              ]

  for category in _to_test:
    print('\n\n\n----------------------------------')
    print(category)

    layer_combis = [["5", "6"],
                    ["5", "7"],
                    ["6", "7"],
                    ["5", "6", "7"]
                    ]
    for layer_combi in layer_combis:
      print('\tLayer:', '_'.join(layer_combi))
      results_save_folder_path = f'{save_path}/{category}/'
      if not os.path.exists(results_save_folder_path):
        os.makedirs(results_save_folder_path)

      results_save_path = f'{results_save_folder_path}results_layer_{layer_combi}.pickle'
      if os.path.isfile(results_save_path):
        with open(results_save_path, 'rb') as f:
          print(pickle.load(f))
          continue
      e_test = []
      e_train = []
      for layer in layer_combi:
        f_name = f'{embeddings_path}/{category}/layer_{layer}/embedding_train_projected.pickle'
        with open(f_name, 'rb') as f:
          embeddings_train = pickle.load(f)['embedding']
          embeddings_train_layer = np.concatenate(embeddings_train, axis=0)
          e_train.append(np.reshape(embeddings_train_layer, (embeddings_train_layer.shape[0], -1)))

        with open(f'{embeddings_path}/{category}/layer_{layer}/embedding_test.pickle', 'rb') as f:
          p = pickle.load(f)
          embeddings_test = p['embedding']
          embedding_test_layer = np.concatenate(embeddings_test, axis=0)
          e_test.append(np.reshape(embedding_test_layer, (embedding_test_layer.shape[0], -1)))
          labels = p['labels']

      X_train = np.concatenate(e_train, axis=1)
      X_test = np.concatenate(e_test, axis=1)
      print('X_train shape:', X_train.shape)
      print('X_test shape:', X_test.shape)
      
      layer_combinations = [
          #[264, 64, 32],
          [512, 264, 64],
          [1024, 512, 264, 64],
          [2048, 1024, 512, 264],
          [2048, 1024, 512, 264, 64],
          [4096, 2048, 1024, 512, 264],
          [4096, 2048, 1024, 512, 264, 64]
      ]
      dropout_rates = [0.1, 0.2]

      results = {}
      for layer_combis in layer_combinations:
        if '_'.join([str(i) for i in layer_combis]) not in results:
          results['_'.join([str(i) for i in layer_combis])] = {j:{} for j in dropout_rates}
        for l2 in dropout_rates:
          m = AutoEncoder(hidden_neurons=layer_combis, epochs=150, dropout_rate=l2)
          m.fit(X_train)
          y_test_scores = m.decision_function(X_test)
          img_auc = roc_auc_score(labels, y_test_scores)
          print(f'Layers: {layer_combis}, dropout_rate: {l2}, Epochs trained: {150}', img_auc)
          results['_'.join([str(i) for i in layer_combis])][l2][150] = img_auc
            
      with open(results_save_path, 'wb') as f:
        pickle.dump(results, f) 

In [None]:
embeddings_path = 'drive/MyDrive/data/efficientnet_embeddings_b3'
save_path = 'drive/MyDrive/data/efficientnet_b3_results/model_eval_imglevel_multi'
complete_model_eval_per_layer_img_level(embeddings_path, save_path)

# EfficientNet-B3 Feature Level AutoEncoder

In [None]:
import torch
import torch.nn.functional as F
import os

import faiss
from datetime import datetime
from tqdm.auto import tqdm

def complete_model_eval_per_layer_feature_level(embeddings_path, save_path, contamination=0.01, n_neighbors=9):

  _to_test = [
              'bottle', 'cable',
              'capsule', 'carpet', 
              'grid', 'hazelnut',
              'leather','metalnut', 
              'pill', 'screw', 
              'tile', 'toothbrush',
              'transistor', 'wood', 'zipper'
              ]

  for category in _to_test:
    print('\n\n\n----------------------------------')
    print(category)

    layer_combis = [#["5", "6"],
                    #["5", "7"],
                    ["7"],
                    ["6", "7"],
                    #["5", "6", "7"]
                    ]
    for layer_combi in layer_combis:
      print('\tLayer:', '_'.join(layer_combi))
      results_save_folder_path = f'{save_path}/{category}/'
      if not os.path.exists(results_save_folder_path):
        os.makedirs(results_save_folder_path)

      results_save_path = f'{results_save_folder_path}results_layer_{layer_combi}_feature_level.pickle'
      #if os.path.isfile(results_save_path):
      #  with open(results_save_path, 'rb') as f:
      #    print(pickle.load(f))
      #    continue
      e_test = []
      e_train = []
      for layer in layer_combi:
        f_name = f'{embeddings_path}/{category}/layer_{layer}/embedding_train_projected.pickle'
        with open(f_name, 'rb') as f:
          embeddings_train = pickle.load(f)['embedding']
          embeddings_train_layer = np.concatenate(embeddings_train, axis=0)
          e_train.append(np.reshape(embeddings_train_layer, (embeddings_train_layer.shape[0], -1)))

        with open(f'{embeddings_path}/{category}/layer_{layer}/embedding_test.pickle', 'rb') as f:
          p = pickle.load(f)
          embeddings_test = p['embedding']
          embedding_test_layer = np.concatenate(embeddings_test, axis=0)
          e_test.append(np.reshape(embedding_test_layer, (embedding_test_layer.shape[0], -1)))
          labels = p['labels']

      X_train = np.concatenate(e_train, axis=1)
      X_test = np.concatenate(e_test, axis=1)
      print('X_train shape:', X_train.shape)
      print('X_test shape:', X_test.shape)
      
      layer_combinations = [
          #[264, 64, 32],
          [512, 264, 64],
          #[1024, 512, 264, 64],
          #[2048, 1024, 512, 264],
          #[2048, 1024, 512, 264, 64],
          [4096, 2048, 1024, 512, 264],
          #[4096, 2048, 1024, 512, 264, 64]
      ]
      dropout_rates = [0.1]

      results = {}
      for layer_combis in layer_combinations:

        time.sleep(1)
        torch.cuda.empty_cache()
        gc.collect()
        

        if '_'.join([str(i) for i in layer_combis]) not in results:
          results['_'.join([str(i) for i in layer_combis])] = {j:{} for j in dropout_rates}
        for l2 in dropout_rates:
          m = AutoEncoder(hidden_neurons=layer_combis, epochs=200, dropout_rate=l2)
          m.fit(X_train)
          y_test_scores = m.decision_function_pixel_lvl(X_test)

          scores = np.max(y_test_scores, axis=1)
          img_auc_max = roc_auc_score(labels, scores)

          res = {'max': img_auc_max}
          M=500
          idx = np.argpartition(y_test_scores,range(M))[:,:-M-1:-1] # topM_ind
          out = y_test_scores[np.arange(y_test_scores.shape[0])[:,None],idx]
          for n in [100, 200, 300, 400, 500]:
            out = y_test_scores[np.arange(y_test_scores.shape[0])[:,None],idx][:, :n]
            
            res[f'{n}-mean'] = roc_auc_score(labels,np.mean(out, axis=1))

          results['_'.join([str(i) for i in layer_combis])][l2][150] = res

          for i, v in res.items():

            print(f'Layers: {layer_combis}, dropout_rate: {l2}, Epochs trained: {150}', i, v)


          if m:
            del m
          torch.cuda.empty_cache()
          gc.collect()
          time.sleep(2)
      with open(results_save_path, 'wb') as f:
        pickle.dump(results, f) 

In [None]:
import time

embeddings_path = 'drive/MyDrive/data/efficientnet_embeddings_b3'
save_path = 'drive/MyDrive/data/efficientnet_b3_results/model_eval_imglevel_multi'
complete_model_eval_per_layer_feature_level(embeddings_path, save_path)

In [None]:
import torch
torch.cuda.empty_cache()
gc.collect()
print(torch.cuda.memory_summary(device=None, abbreviated=False))

# PatchCore mit Wide ResNet

In [None]:
import torch
import torch.nn.functional as F
import os

import faiss
from datetime import datetime
from tqdm.auto import tqdm

class k_nearest_neighbors():

  def __init__(self, n_neighbors=10):
    self.n_neighbors = n_neighbors

  def fit(self, X):
    self.index = faiss.IndexFlatL2(X.shape[1])
    self.index.add(X)
  
  def decision_function(self, X_test):
    score_patches, indices = self.index.search(X_test , k=self.n_neighbors)
    
    return score_patches, indices

def wideresnet_eval(contamination=0.01):

  _to_test = [
              'bottle', 'cable',
              'capsule', 'carpet', 
              'grid', 'hazelnut',
              'leather','metal_nut', 
              'pill', 'screw',
              'tile', 'toothbrush',
              'transistor', 'wood', 'zipper'
              ]

  for category in _to_test:
    print('\n\n\n----------------------------------')
    print(category)


    base = f'drive/MyDrive/data/wideresnet_embedding/{category}/'
    results_dir = f'drive/MyDrive/data/wideresnet_evaluation/patch_vs_abod'
    if not os.path.isdir(results_dir):
      os.mkdir(results_dir)
    results_save_path = f'{results_dir}/{category}.pickle'
    # Load embeddings

    f_name = f'{base}/embedding.pickle'
    with open(f_name, 'rb') as f:
      X_train = pickle.load(f)
    
    f_name = f'{base}/embedding_test.pickle'
    with open(f_name, 'rb') as f:
      p = pickle.load(f)
      embeddings_test = p['embedding']
      labels = p['labels']

    if not os.path.isfile(results_save_path):
      results = {}
    else:
      with open(results_save_path, 'rb') as f:
        results = pickle.load(f) 

    nneighbors = [1, 5, 10, 15, 20, 25, 30]
    

    patch_scores = {}
    abod_scores = {}

    gc.collect()
    n_new = [n_n for n_n in nneighbors if f'PATCH_{n_n}' not in results]
    if len(n_new) > 0:
      model_patch = k_nearest_neighbors(n_neighbors=max(n_new))
      model_patch.fit(X_train)
      for X_test in embeddings_test:
        gc.collect()
        score_patches, idx = model_patch.decision_function(X_test)

        for n_n in n_new:
          if f'PATCH_{n_n}' in results:
            continue
          if n_n not in patch_scores:
            patch_scores[n_n] = {'y_test_scores_top1': [],
                                 'y_test_scores_top10': []}
          score_patches_i = score_patches[:, :n_n].mean(axis=1).flatten()
          score = max(score_patches_i)
          patch_scores[n_n]['y_test_scores_top1'].append(score)

          sorted_index_array = np.argsort(score_patches_i)
          sorted_array = score_patches_i[sorted_index_array]
          rslt = sorted_array[-10:]
          patch_scores[n_n]['y_test_scores_top10'].append(rslt.mean())

    for n_n in nneighbors:
      print(f'\t\tPatch_{n_n}: ', end='\t')
      if f'PATCH_{n_n}' in results:
          print('Img_auc_top1:', results[f'PATCH_{n_n}']['img_auc_top1'], 
                ' -- Img_auc_top10:', results[f'PATCH_{n_n}']['img_auc_top10'])
      else:
        img_auc_top1 = roc_auc_score(labels, patch_scores[n_n]['y_test_scores_top1'])
        print('Img_auc_top1:', img_auc_top1, end=' -- ')
        img_auc_top10 = roc_auc_score(labels, patch_scores[n_n]['y_test_scores_top10'])
        print('Img_auc_top10:', img_auc_top10)
        results[f'PATCH_{n_n}'] = {'img_auc_top1': img_auc_top1,
                                   'img_auc_top10': img_auc_top10}

    with open(results_save_path, 'wb') as f:
      pickle.dump(results, f) 

    ##### ABOD ####
    def f(x):
      X_test, m, label = x[0], x[1], x[2]
      score_patches = m.decision_function(X_test)
      return score_patches, label

    for n_n in nneighbors:
      if n_n == 1 or f'ABOD_{n_n}' in results:
        continue

      if n_n not in abod_scores:
        abod_scores[n_n] = {'y_test_scores_top1': [],
                            'y_test_scores_top10': []}

      m = ABOD(contamination=contamination, n_neighbors=n_n)
      m.fit(X_train)

      _to_multi_process = []
      for iii, X_test in enumerate(embeddings_test):
        _to_multi_process.append((X_test, m, labels[iii]))

      result_m = p_map(f, _to_multi_process)
      labels_new = []
      for score_patches, label in result_m:
        labels_new.append(label)

        score = max(score_patches)
        abod_scores[n_n]['y_test_scores_top1'].append(score)

        sorted_index_array = np.argsort(score_patches)
        sorted_array = score_patches[sorted_index_array]
        rslt = sorted_array[-10:]
        abod_scores[n_n]['y_test_scores_top10'].append(rslt.mean())

    for n_n in nneighbors:
      if n_n == 1:
        continue
      print(f'\t\tABOD_{n_n}: ', end='\t')
      if f'ABOD_{n_n}' in results:
        print('Img_auc_top1:', results[f'ABOD_{n_n}']['img_auc_top1'], 
              ' -- Img_auc_top10:', results[f'ABOD_{n_n}']['img_auc_top10'])
      else:
        img_auc_top1 = roc_auc_score(labels_new, abod_scores[n_n]['y_test_scores_top1'])
        print('Img_auc_top1:', img_auc_top1, end=' -- ')
        img_auc_top10 = roc_auc_score(labels_new, abod_scores[n_n]['y_test_scores_top10'])
        print('Img_auc_top10:', img_auc_top10)
        results[f'ABOD_{n_n}'] = {'img_auc_top1': img_auc_top1,
                                  'img_auc_top10': img_auc_top10}

    with open(results_save_path, 'wb') as f:
      pickle.dump(results, f) 

In [None]:
wideresnet_eval(contamination=0.01)
