In [1]:
import os
import numpy as np
from tqdm import tqdm
import yaml
import onnxruntime as ort
import numpy as np

# PyTorch and torchvision imports
import torch
from torch.utils.data import DataLoader 
import torchvision.transforms as T

# Custom models and datasets
from dataset.video_dataset import AdImageSequenceDataset
from models.features_extractors import EfficientNet_feature_B5

# Custom utils
from utils.utils import CosineLoss, GaussianSmoothing

# Sklearn for evaluation
from sklearn.metrics import roc_auc_score

import yaml
import torch

# Clear CUDA cache and enable CuDNN for better performance
torch.cuda.empty_cache()
torch.backends.cudnn.enabled = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load configuration from YAML file
with open('configs/config_inference.yaml', 'r') as file:
    config = yaml.safe_load(file)


In [3]:
# Define image preprocessing transformations for EfficientNet
preprocessing_efficientnet = T.Compose([
    T.Resize((config['general_config']['image_size'], 
              config['general_config']['image_size']), 
             interpolation=T.InterpolationMode.BICUBIC),
    T.ToTensor(),
    T.Normalize(mean=[0.4850, 0.4560, 0.4060],
                std=[0.2290, 0.2240, 0.2250])
])

# Define image preprocessing transformations for labels
preprocessing_labels = T.Compose([
    T.Resize((config['general_config']['feature_size'], config['general_config']['feature_size'])),
    T.ToTensor()
])

# Create validation dataset
dataset_val = AdImageSequenceDataset(
    os.path.join(config['general_config']['data_path']),
    preprocessing_efficientnet,
    preprocessing_labels
)

dataloader_val = DataLoader(
    dataset_val,
    batch_size=1,
    shuffle=False
)


gs_filter = GaussianSmoothing()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [4]:
model_SN = torch.load(config['general_config']['weight_path_SimpleNet']).to(device)
model_SN.eval()

model_GeneralAD = torch.load(config['general_config']['weight_path_generalAD']).to(device)
model_GeneralAD.eval()


model_PatchCore = torch.load(config['general_config']['weight_path_PatchCore'])

model_VMTAD = torch.load(config['general_config']['weight_path_VMTAD']).to(device)
model_VMTAD.mode = 'stream'
model_VMTAD.eval()

onnx_model_path = config['general_config']['weight_path_VMTAD_onnx']
VMTAD_session = ort.InferenceSession(onnx_model_path, providers=['CUDAExecutionProvider'])

feature_extraction  = EfficientNet_feature_B5(config)
feature_extraction.eval() 

Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.


EfficientNet_feature_B5(
  (model): EfficientNetFeatures(
    (conv_stem): Conv2dSame(3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNormAct2d(
      48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
          (bn1): BatchNormAct2d(
            48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (aa): Identity()
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv

## Vanilla Version of VMTAD

The vanilla version of VMTAD is used for AUROC results and inference time.

In [5]:
# Evaluation setup 
eval_seg = np.zeros((len(dataset_val),config['general_config']['feature_size'],config['general_config']['feature_size']))
labels_seg = np.zeros((len(dataset_val),config['general_config']['feature_size'],config['general_config']['feature_size']))
eval_det = np.zeros(len(dataset_val))
labels_det = np.zeros(len(dataset_val))

# Loss setup
loss_visu = CosineLoss()

# tqdm setup
t = tqdm(total = len(dataloader_val))

# loop setup
folder_indice = None

# evaluation loop
for k,(_, folder, images, labels) in enumerate(dataloader_val):

    images_cuda = images.to('cuda')
    labels_np = labels[0,1].numpy().astype(int)
    with torch.no_grad():
        i  = 1
        if folder_indice != folder :
            folder_indice = folder
            i = 0
            
        outputs = model_VMTAD(feature_extraction(images_cuda),i)
        pred_frames = outputs['reconstructed_frames']
        original_frame = feature_extraction(images_cuda)
        
        anomaly_map = loss_visu(pred_frames,original_frame)
        anomaly_map_smooth = gs_filter(anomaly_map)[0].detach().cpu().numpy()
        
        eval_seg[k] = anomaly_map_smooth
        labels_seg[k] = labels_np
                
        eval_det[k] = np.max(anomaly_map_smooth)
        labels_det[k] = np.max(labels_np)
        
        t.update()
        t.set_description_str(str(folder))
        
# Print evaluation results        
print("AUROC Det: ",roc_auc_score(labels_seg.flatten(),eval_seg.flatten()))
print("AUROC Seg: ",roc_auc_score(labels_det,eval_det))

('AD_5',): 100%|██████████| 2990/2990 [06:53<00:00,  7.35it/s]

AUROC Det:  0.9975142845007046
AUROC Seg:  0.9619506299827716


## ONNX Version

ONNX model conversion degrades the AUROC performance. This version is open-weight.

In [6]:

# Evaluation setup 
eval_seg = np.zeros((len(dataset_val),config['general_config']['feature_size'],config['general_config']['feature_size']))
labels_seg = np.zeros((len(dataset_val),config['general_config']['feature_size'],config['general_config']['feature_size']))
eval_det = np.zeros(len(dataset_val))
labels_det = np.zeros(len(dataset_val))

# Loss setup
loss_visu = CosineLoss()

# tqdm setup
t = tqdm(total = len(dataloader_val))

# loop setup
folder_indice = None

# evaluation loop
for k,(_, folder, images, labels) in enumerate(dataloader_val):

    images_cuda = images.to('cuda')
    labels_np = labels[0,1].numpy().astype(int)
    with torch.no_grad():
        i  = 1
        if folder_indice != folder :
            folder_indice = folder
            i = 0
            
        onnx_input_name = VMTAD_session.get_inputs()[0].name  
        input_features=feature_extraction(images_cuda).detach().cpu().numpy().astype(np.float32)
        outputs = VMTAD_session.run(['output', 'src'], {onnx_input_name: input_features})
        output = outputs[0]
        src = outputs[1]
        visu_loss = loss_visu(torch.from_numpy(output).to('cuda'),torch.from_numpy(src).to('cuda'))
        anomaly_map_smooth = gs_filter(visu_loss).detach().cpu().numpy()
        
        eval_seg[k] = anomaly_map_smooth
        labels_seg[k] = labels_np
                
        eval_det[k] = np.max(anomaly_map_smooth)
        labels_det[k] = np.max(labels_np)
        
        t.update()
        t.set_description_str(str(folder))
        
# Print evaluation results        
print("AUROC Det: ",roc_auc_score(labels_seg.flatten(),eval_seg.flatten()))
print("AUROC Seg: ",roc_auc_score(labels_det,eval_det))

('AD_5',): 100%|██████████| 2990/2990 [07:01<00:00,  7.10it/s]


AUROC Det:  0.9972473926319537
AUROC Seg:  0.9579666618185309


# SimpleNet

In [7]:
# Evaluation setup 
eval_seg = np.zeros((len(dataset_val),config['general_config']['feature_size'],config['general_config']['feature_size']))
labels_seg = np.zeros((len(dataset_val),config['general_config']['feature_size'],config['general_config']['feature_size']))
eval_det = np.zeros(len(dataset_val))
labels_det = np.zeros(len(dataset_val))

# tqdm setup
t = tqdm(total = len(dataloader_val))

# evaluation loop
for k,(_, folder, images, labels) in enumerate(dataloader_val):

    images_cuda = images.to('cuda')
    labels_np = labels[0,1].numpy().astype(int)
    with torch.no_grad():
            
        outputs = model_SN(feature_extraction(images_cuda), mode='inference')
        anomaly_map_smooth = gs_filter(outputs)[0].detach().cpu().numpy()
        
        eval_seg[k] = anomaly_map_smooth
        labels_seg[k] = labels_np
                
        eval_det[k] = np.max(anomaly_map_smooth)
        labels_det[k] = np.max(labels_np)
        
        t.update()
        t.set_description_str(str(folder))
        
# Print evaluation results        
print("AUROC Det: ",roc_auc_score(labels_seg.flatten(),eval_seg.flatten()))
print("AUROC Seg: ",roc_auc_score(labels_det,eval_det))

('AD_5',): 100%|██████████| 2990/2990 [04:52<00:00, 10.21it/s]


('AD_5',): 100%|██████████| 2990/2990 [01:22<00:00, 37.85it/s]

AUROC Det:  0.9917580892768497
AUROC Seg:  0.9435392091740727


# GeneralAD

In [8]:
# Evaluation setup 
eval_seg = np.zeros((len(dataset_val),config['general_config']['feature_size'],config['general_config']['feature_size']))
labels_seg = np.zeros((len(dataset_val),config['general_config']['feature_size'],config['general_config']['feature_size']))
eval_det = np.zeros(len(dataset_val))
labels_det = np.zeros(len(dataset_val))

# tqdm setup
t = tqdm(total = len(dataloader_val))

# evaluation loop
for k,(_, folder, images, labels) in enumerate(dataloader_val):

    images_cuda = images.to('cuda')
    labels_np = labels[0,1].numpy().astype(int)
    with torch.no_grad():
            
        outputs = model_GeneralAD(feature_extraction(images_cuda), generate_anomaly=False)
        anomaly_map_smooth = gs_filter(outputs).detach().cpu().numpy()
        
        eval_seg[k] = anomaly_map_smooth
        labels_seg[k] = labels_np
                
        eval_det[k] = np.max(anomaly_map_smooth)
        labels_det[k] = np.max(labels_np)
        
        t.update()
        t.set_description_str(str(folder))
        
# Print evaluation results        
print("AUROC Det: ",roc_auc_score(labels_seg.flatten(),eval_seg.flatten()))
print("AUROC Seg: ",roc_auc_score(labels_det,eval_det))

('AD_5',): 100%|██████████| 2990/2990 [01:29<00:00, 33.23it/s]


AUROC Det:  0.9978179391350177
AUROC Seg:  0.9586124135269989


# PatchCore

In [9]:
# Define image preprocessing transformations for labels
preprocessing_labels = T.Compose([
    T.Resize((config['general_config']['feature_size']-2, config['general_config']['feature_size']-2)),
    T.ToTensor()
])

# Create validation dataset
dataset_val = AdImageSequenceDataset(
    os.path.join(config['general_config']['data_path']),
    preprocessing_efficientnet,
    preprocessing_labels
)


dataloader_val = DataLoader(
    dataset_val,
    batch_size=1,
    shuffle=False
)


# Evaluation setup 
eval_seg = np.zeros((len(dataset_val),config['general_config']['feature_size']-2,config['general_config']['feature_size']-2))
labels_seg = np.zeros((len(dataset_val),config['general_config']['feature_size']-2,config['general_config']['feature_size']-2))
eval_det = np.zeros(len(dataset_val))
labels_det = np.zeros(len(dataset_val))

t = tqdm(total = len(dataloader_val))

# evaluation loop
for k,(_, folder, images, labels) in enumerate(dataloader_val):

    images_cuda = images.to('cuda')
    labels_np = labels[0,1].numpy().astype(int)
    with torch.no_grad():
            
        outputs = model_PatchCore.compute_anomaly_map(feature_extraction(images_cuda))
        anomaly_map_smooth = gs_filter(outputs[1].unsqueeze(0)).detach().cpu().numpy()
        eval_seg[k] = anomaly_map_smooth
        labels_seg[k] = labels_np
                
        eval_det[k] = np.max(anomaly_map_smooth)
        labels_det[k] = np.max(labels_np)
        
        t.update()
        t.set_description_str(str(folder))
        
# Print evaluation results        
print("AUROC Det: ",roc_auc_score(labels_seg.flatten(),eval_seg.flatten()))
print("AUROC Seg: ",roc_auc_score(labels_det,eval_det))

('AD_5',): 100%|██████████| 2990/2990 [08:57<00:00,  5.56it/s]
('AD_1',):   0%|          | 11/2990 [00:01<05:26,  9.11it/s]

('AD_5',): 100%|██████████| 2990/2990 [05:17<00:00, 10.12it/s]

AUROC Det:  0.9681743571032554
AUROC Seg:  0.9500231155778895


('AD_5',): 100%|██████████| 2990/2990 [05:28<00:00, 10.12it/s]