In [9]:
import torch
from torchvision.models import wide_resnet50_2, resnet18
import cv2
import matplotlib.pyplot as plt

from padim_utils import AnomalyDetector, extractEmbeddingVectors, getParameters, extractEmbeddingVectorsBatched
from torch.utils.data import DataLoader
import datasets.mvtec as mvtec

import time
import numpy as np 
from sklearn.covariance import LedoitWolf
from collections import OrderedDict
import torch.nn.functional as F
import random
from random import sample

from feature_extraction import Resnet18Features

In [10]:
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
# device = torch.device('cpu')
device

device(type='cpu')

In [11]:
class_name = 'bottle'
data_path = '../mvtec_dataset/'
dataset = mvtec.MVTecDataset(data_path, class_name=class_name, is_train=True)
dataloader = DataLoader(dataset, batch_size=32, pin_memory=True)
dataset.__len__()

209

In [12]:
x, y, mask = next(iter(dataloader))

In [13]:
# model = resnet18(pretrained=True, progress=True)
# model = wide_resnet50_2(pretrained=True, progress=True)
model = resnet18(pretrained=True)

model.to(device)
model.eval()
pass

In [14]:
tic = time.perf_counter()
outputs = []

def hook(module, input, output):
    outputs.append(output)

model.layer1[-1].register_forward_hook(hook)
model.layer2[-1].register_forward_hook(hook)
model.layer3[-1].register_forward_hook(hook)    
#     model.layer4[-1].register_forward_hook(hook)    

train_outputs = OrderedDict([('layer1', []), ('layer2', []), ('layer3', [])])


with torch.no_grad():
    _ = model(x.to(device))
# get intermediate layer outputs
for k, v in zip(train_outputs.keys(), outputs):
    train_outputs[k].append(v.cpu().detach())

# initialize hook outputs
outputs = []

for k, v in train_outputs.items():
    train_outputs[k] = torch.cat(v, 0)

    
    
embedding_vectors = expandFeatures([train_outputs['layer1'], train_outputs['layer2'], train_outputs['layer3']])

print(time.perf_counter()-tic)


1.917397912000002


In [15]:
print(embedding_vectors.shape)
print(embedding_vectors[0][0][0])


torch.Size([32, 512, 56, 56])
tensor([0.0080, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0602, 0.1600, 0.2776, 0.3511, 0.3092,
        0.2175, 0.1496, 0.1211, 0.4365, 0.4459, 0.4752, 0.5137, 0.5318, 0.5286,
        0.5006, 0.4708, 0.4541, 0.4969, 0.5557, 0.5626, 0.4758, 0.2799, 0.1860,
        0.0000, 0.1041, 0.3207, 0.3926, 0.5066, 0.3453, 0.0675, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0186,
        0.2225, 0.2169])


In [16]:
resnet18_features = Resnet18Features(device)


In [17]:
tic = time.perf_counter()
with torch.no_grad():
    features = resnet18_features(x)
print(time.perf_counter()-tic)


1.372056385999997


In [18]:
print(type(features))
len(features)

<class 'torch.Tensor'>


32

In [20]:
print(features.shape)
print(features[0][0][0])


torch.Size([32, 448, 56, 56])
tensor([0.0080, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0602, 0.1600, 0.2776, 0.3511, 0.3092,
        0.2175, 0.1496, 0.1211, 0.4365, 0.4459, 0.4752, 0.5137, 0.5318, 0.5286,
        0.5006, 0.4708, 0.4541, 0.4969, 0.5557, 0.5626, 0.4758, 0.2799, 0.1860,
        0.0000, 0.1041, 0.3207, 0.3926, 0.5066, 0.3453, 0.0675, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0186,
        0.2225, 0.2169])


In [118]:
# Embedding concat
embedding_vectors = expandFeatures([train_outputs['layer1'], train_outputs['layer2'], train_outputs['layer3']])

# idx = getSeedIndices(100, 448, device)  
# embedding_vectors = torch.index_select(embedding_vectors, 1, idx)
print(embedding_vectors.shape)
print(embedding_vectors[0][0])

torch.Size([32, 512, 56, 56])
tensor([[0.0080, 0.0000, 0.0000,  ..., 0.0186, 0.2225, 0.2169],
        [0.3165, 0.4252, 0.3331,  ..., 0.4363, 0.6555, 0.5827],
        [0.0690, 0.1611, 0.0030,  ..., 0.0766, 0.3925, 0.4344],
        ...,
        [0.2314, 0.3079, 0.1707,  ..., 0.2362, 0.3392, 0.3081],
        [0.4463, 0.5012, 0.3271,  ..., 0.3008, 0.4765, 0.3464],
        [0.2140, 0.3058, 0.2108,  ..., 0.1384, 0.3433, 0.3809]])


In [146]:
tic = time.perf_counter()
ef = expandFeatures2([train_outputs['layer1'], train_outputs['layer2'], train_outputs['layer3']])
print(time.perf_counter()-tic)
print(ef[0][0][0])

0.15946829599988632
tensor([0.0080, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0602, 0.1600, 0.2776, 0.3511, 0.3092,
        0.2175, 0.1496, 0.1211, 0.4365, 0.4459, 0.4752, 0.5137, 0.5318, 0.5286,
        0.5006, 0.4708, 0.4541, 0.4969, 0.5557, 0.5626, 0.4758, 0.2799, 0.1860,
        0.0000, 0.1041, 0.3207, 0.3926, 0.5066, 0.3453, 0.0675, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0186,
        0.2225, 0.2169])


In [145]:
tic = time.perf_counter()
ef = expandFeatures([train_outputs['layer1'], train_outputs['layer2'], train_outputs['layer3']])
print(time.perf_counter()-tic)
print(ef[0][0][0])

0.547424023000076
tensor([0.0080, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0602, 0.1600, 0.2776, 0.3511, 0.3092,
        0.2175, 0.1496, 0.1211, 0.4365, 0.4459, 0.4752, 0.5137, 0.5318, 0.5286,
        0.5006, 0.4708, 0.4541, 0.4969, 0.5557, 0.5626, 0.4758, 0.2799, 0.1860,
        0.0000, 0.1041, 0.3207, 0.3926, 0.5066, 0.3453, 0.0675, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0186,
        0.2225, 0.2169])


In [139]:
def expandFeatures2(features):
    
    tot_depth = sum(feature.shape[1] for feature in features)
    
    concatenated_features = torch.zeros((features[0].shape[0], tot_depth, features[0].shape[2], features[0].shape[3]))
    concatenated_features[:, 0:features[0].shape[1], :, :] = features[0]

    last_depth = features[0].shape[0]
    
    for feature in features[1:]:
        scale_factor = features[0].shape[3]/feature.shape[3]
        upsampled_feature = torch.nn.Upsample(scale_factor=scale_factor, mode='nearest')(feature)
        concatenated_features[:,last_depth:last_depth+upsampled_feature.shape[1], :, :] = upsampled_feature
        last_depth += upsampled_feature.shape[1]

    return concatenated_features

In [39]:
m = torch.nn.Upsample(scale_factor=2, mode='nearest')
m(train_outputs['layer2']).shape

torch.Size([32, 128, 56, 56])

In [8]:
def extractEmbeddingVectorsBatched(x, model, device):

    outputs = []

    def hook(module, input, output):
        outputs.append(output)

    model.layer1[-1].register_forward_hook(hook)
    model.layer2[-1].register_forward_hook(hook)
    model.layer3[-1].register_forward_hook(hook)    
#     model.layer4[-1].register_forward_hook(hook)    

    train_outputs = OrderedDict([('layer1', []), ('layer2', []), ('layer3', [])])


    with torch.no_grad():
        _ = model(x.to(device))
    # get intermediate layer outputs
    for k, v in zip(train_outputs.keys(), outputs):
        train_outputs[k].append(v.cpu().detach())

    # initialize hook outputs
    outputs = []
        
    for k, v in train_outputs.items():
        train_outputs[k] = torch.cat(v, 0)


    # Embedding concat
    embedding_vectors = expandFeatures([train_outputs['layer1'], train_outputs['layer2'], train_outputs['layer3']])
        
    idx = getSeedIndices(100, 448, device)  
    embedding_vectors = torch.index_select(embedding_vectors, 1, idx)
    
    return embedding_vectors


# Expects the largest dimension first
def expandFeatures(features):
    expanded_features = features[0]
    for feature in features:
        expanded_features = embedding_concat(expanded_features, feature)
    return expanded_features
    


def embedding_concat(x, y):
    B, C1, H1, W1 = x.size()
    _, C2, H2, W2 = y.size()
    s = int(H1 / H2)
    x = F.unfold(x, kernel_size=s, dilation=1, stride=s)
    x = x.view(B, C1, -1, H2, W2)
    z = torch.zeros(B, C1 + C2, x.size(2), H2, W2)
    for i in range(x.size(2)):
        z[:, :, i, :, :] = torch.cat((x[:, :, i, :, :], y), 1)
    del x
    del y
    z = z.view(B, -1, H2 * W2)
    z = F.fold(z, kernel_size=s, output_size=(H1, W1), stride=s)
    return z


def getSeedIndices(choose, total, device):
    random.seed(1024)
    torch.manual_seed(1024)
        
    if device.type=='cuda':
        torch.cuda.manual_seed_all(1024)
    return torch.tensor(sample(range(0, total), choose))

In [25]:
ev = extractEmbeddingVectorsBatched(x, model, device)

In [7]:
ev.shape

torch.Size([32, 100, 56, 56])

In [38]:
def getParameters(embedding_vectors):
    B, C, H, W = embedding_vectors.size()
    embedding_vectors = embedding_vectors.view(B, C, H * W)
    mean = torch.mean(embedding_vectors, dim=0).numpy()
    cov = torch.zeros(C, C, H * W).numpy()
    I = np.identity(C)

    # embedding_vectors = torch.from_numpy(embedding_vectors)

    for i in range(H * W):
#         cov[:, :, i] = LedoitWolf().fit(embedding_vectors[:, :, i].numpy()).covariance_
        cov[:, :, i] = np.cov(embedding_vectors[:, :, i].numpy(), rowvar=False) + 0.01 * I
        
    return mean, cov

In [1]:
def calculateMeanAndCovarianceMatrix(embedding_vectors):
    
    B, C, H, W = embedding_vectors.size()
    print(embedding_vectors.shape)
    
#     embedding_vectors = embedding_vectors.permute(1, 0, 2, 3)
#     print(embedding_vectors.shape)
        
#     embedding_vectors = embedding_vectors.reshape(C, B * H * W)
#     print(embedding_vectors.shape)
    
    embedding_vectors = embedding_vectors.view(B, C, H*W)
    print('ev', embedding_vectors.shape)
    
    
    mean = torch.mean(embedding_vectors, dim=0)
    print('mean', mean.shape)
    
    
    mean_expanded = mean.expand(B, C, H*W)
    print('mean', mean_expanded.shape)

    
    diff = mean - embedding_vectors
    print('diff:', diff.shape)
    
    

    
    diff = diff.permute(0,2,1)
    print('diff:', diff.shape)
    
#     diff = diff.reshape(B*H*W, C)
#     print('diff:', diff.shape)
    
    
#     diff = diff.unsqueeze(3)
#     print('diff:', diff.shape)
    
#     diff_transpose = torch.transpose(diff, 2, 3)
#     print('diff_transpose:', diff_transpose.shape)

    diff_transpose = torch.transpose(diff, 1, 2)
    print('diff_transpose:', diff_transpose.shape)
    
    
    mult = torch.bmm(diff, diff_transpose)
    
    
#     concat_mult = None
#     for i in range(B):
#         print(i)
# #         print(diff[i].shape)
# #         print(diff_transpose[i].shape)
#         mult = torch.bmm(diff[i], diff_transpose[i])
#         mult = mult.unsqueeze(0)
        
# #         print(mult.shape)
        
#         if concat_mult == None:
#             concat_mult = mult
#         else:
# #             print(concat_mult.shape)
#             concat_mult = torch.cat((concat_mult, mult), dim=0)
        
        
#     print(concat_mult.shape)

In [60]:
tic = time.perf_counter()
# mean, cov = getParameters(ev)
calculateMeanAndCovarianceMatrix(ev)
print(time.perf_counter()-tic)

torch.Size([32, 100, 56, 56])
ev torch.Size([32, 100, 3136])
mean torch.Size([100, 3136])
mean torch.Size([32, 100, 3136])
diff: torch.Size([32, 100, 3136])
diff: torch.Size([32, 3136, 100])
diff_transpose: torch.Size([32, 100, 3136])
0.9893512290000217


In [20]:
mean

array([[1.0769770e+00, 1.0769770e+00, 1.0769770e+00, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [3.4343752e-01, 4.9410871e-01, 4.1614291e-01, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 1.2526109e-02,
        1.2526109e-02, 1.2526109e-02],
       ...,
       [6.4803241e-04, 6.4803241e-04, 6.4803241e-04, ..., 8.8993922e-02,
        8.8993922e-02, 8.8993922e-02],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [9.4800466e-01, 1.0566857e+00, 8.2606596e-01, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00]], dtype=float32)

In [15]:


cov

array([[[ 1.0210126e-02,  1.0210126e-02,  1.0210126e-02, ...,
          9.9999998e-03,  9.9999998e-03,  9.9999998e-03],
        [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
          0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
        [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
          0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
        ...,
        [-2.5765974e-06, -2.5765974e-06, -2.5765974e-06, ...,
          0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
        [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
          0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
        [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
          0.0000000e+00,  0.0000000e+00,  0.0000000e+00]],

       [[ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
          0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
        [ 9.9999998e-03,  9.9999998e-03,  9.9999998e-03, ...,
          9.9999998e-03,  9.9999998e-03,  9.9999998e-03],
        [ 0.0000000e+00, 