<a href="https://colab.research.google.com/github/DeepKnowledge1/ALOCC-CVPR2018/blob/master/anomavision_colab_trt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Anomaly Detection with AnomaVision (Colab Notebook)

This notebook demonstrates how to train, export, and detect anomalies using the provided `train.py`, `export.py`, and `detect.py` scripts within a Google Colab environment. It follows a 'getting started' approach with direct code snippets.

## 1. Environment Setup

### 1.1 Check GPU (Optional)

In [None]:

#@title Check GPU (optional)
!nvidia-smi || echo "No NVIDIA GPU detected (that's okay for a quick CPU demo)."

In [None]:
!rm -rf AnomaVision
!rm -rf *

### 1.2 Install AnomaVision (from GitHub using Poetry)

In [None]:

# If running in Colab, this will take a minute.
!pip -q install --upgrade pip
!pip -q install poetry

!git clone --depth 1 -b bugfix/mah_dtype https://github.com/DeepKnowledge1/AnomaVision.git


In [None]:
%cd /content/AnomaVision

!poetry config virtualenvs.create false

# install dependencies & project using poetry
!poetry install --no-interaction --no-ansi

# make sure it's importable in the current Python env
import sys, pathlib
sys.path.append(str(pathlib.Path(".").resolve()))

print("✅ AnomaVision installed via Poetry.")


In [None]:
# Check GPU availability
import torch, sys, os, time
from pathlib import Path

print("🔍 AnomaVision - Optimized PaDiM Anomaly Detection")
print("=" * 60)
print("=== Hardware Check ===")
if torch.cuda.is_available():
    print(f"✅ CUDA available! GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version (PyTorch built with): {torch.version.cuda}")
    try:
        props = torch.cuda.get_device_properties(0)
        print(f"GPU Memory: {props.total_memory / 1e9:.1f} GB")
    except Exception:
        pass
    device = torch.device('cuda')
else:
    print("⚠️ CUDA not available - using CPU")
    device = torch.device('cpu')
print(f"PyTorch Version: {torch.__version__}")
print(f"Selected device: {device}")

In [None]:
!pip install numpy==1.26.4 --force-reinstall

### 1.3 Verify AnomaVision Installation

In [1]:

import numpy as np
import matplotlib.pyplot as plt
import cv2
from torch.utils.data import DataLoader
import torch
import os

print(np.__version__)
print(cv2.__version__)
print(torch.__version__ )

import anomavision


print(getattr(anomavision, "version", None))   # fallback



1.26.4
4.11.0
2.8.0+cu128
None


### 1.4 Upload Provided Scripts (Optional - if you have custom scripts)

In [2]:



# @title Upload images from your computer

data= torch.randn(10, 3, 224, 224).to("cuda")

# from google.colab import files
# import os

# upload_dir = "uploaded_images"
# os.makedirs(upload_dir, exist_ok=True)

# uploaded = files.upload()
# for filename, content in uploaded.items():
#     filepath = os.path.join(upload_dir, filename)
#     with open(filepath, "wb") as f:
#         f.write(content)
#     print(f"✅ Saved {filename} to {filepath}")



# upload_dir = "uploaded_images"
# print("Files in uploaded_images/:")
# print(os.listdir(upload_dir))


In [3]:
#@title Upload images from your computer


data2= torch.randn(1, 3, 224, 224).to("cuda")


# from google.colab import files
# import os

# test_dir = "test_images"
# os.makedirs(test_dir, exist_ok=True)

# test = files.upload()
# for filename, content in test.items():
#     filepath = os.path.join(test_dir, filename)
#     with open(filepath, "wb") as f:
#         f.write(content)
#     print(f"✅ Saved {filename} to {filepath}")



# test_dir = "test_images"
# print("Files in test_images/:")
# print(os.listdir(test_dir))

Upload test images from your computer




## 2. Configuration

Define parameters for training, export, and detection. These will replace command-line arguments.

In [4]:
from pickle import FALSE

import os
import torch
from easydict import EasyDict as edict

# --- Common Configuration ---
# This config will be used across train, export, and detect scripts.
# Adjust these parameters based on your dataset and model requirements.
common_config = edict({
    # IMPORTANT: Adjust dataset_path to point to your unzipped dataset within /content/dataset
    # For example, if you unzipped 'mvtec_ad.zip' and it created 'mvtec_ad/bottle' inside 'dataset',
    # then dataset_path should be './dataset/mvtec_ad'
    'dataset_path': "upload_dir", # Path to your dataset root (e.g., '/content/dataset/mvtec_ad')
    'class_name': 'bottle',      # Example: 'bottle', 'cable', etc. (must match a folder in your dataset_path)
    'resize': [224, 224],        # Resize images to 256x256
    'crop_size': [224, 224],     # Crop images to 224x224 after resize
    'normalize': True,           # Enable normalization
    'norm_mean': [0.485, 0.456, 0.406], # ImageNet means
    'norm_std': [0.229, 0.224, 0.225],  # ImageNet stds
    'model_data_path': './model_outputs', # Directory to save/load models (relative to AnomaVision dir)
    'log_level': 'INFO',
})

# Create model_data_path if it doesn't exist
os.makedirs(common_config.model_data_path, exist_ok=True)

# --- Training Specific Configuration ---
train_config = edict({
    'backbone': 'resnet18',      # Model backbone: 'resnet18' or 'wide_resnet50'
    'batch_size': 8,
    'feat_dim': 50,             # Number of random feature dimensions
    'layer_indices': [0],     # Layers to extract features from
    'output_model': 'padim_model.pt', # Filename for the trained model
    'run_name': 'colab_train_exp',
    'device': "cuda",
})

# --- Export Specific Configuration ---
export_config = edict({
    'input_shape': [1, 3, 224, 224], # Input shape for ONNX/TorchScript export
    'onnx_output_name': 'padim_model.onnx',
    'torchscript_output_name': 'padim_model.torchscript',
    'openvino_output_name': 'padim_model_openvino',
    'dynamic_batch': False,
    'quantize_dynamic_flag': False, # Set to True for dynamic INT8 quantization
    'quantize_static_flag': False,  # Set to True for static INT8 quantization
    'calib_samples': 100,           # Number of calibration samples for static quantization
})

# --- Detection Specific Configuration ---
detect_config = edict({
    # IMPORTANT: Adjust img_path to point to your test images within your dataset
    # e.g., '/content/dataset/mvtec_ad/bottle/test'
    'img_path': "test_dir", # Path to test images
    'model': 'padim_model.onnx', # Model file to use for detection (e.g., .pt, .onnx, .torchscript)
    'device': 'auto',            # 'auto', 'cpu', or 'cuda'
    'batch_size': 1,
    'thresh': 13.0,               # Anomaly classification threshold
    'enable_visualization': False,
    'save_visualizations': False,
    'viz_output_dir': './visualizations',
    'run_name': 'colab_detect_exp',
    'overwrite': True,
    'viz_alpha': 0.5,
    'viz_padding': 40,
    'viz_color': (128,0,128),
})

print("Configuration defined. Please review and adjust `dataset_path` and `class_name`.")


Configuration defined. Please review and adjust `dataset_path` and `class_name`.


In [5]:
import anomavision
from anomavision.utils import (    get_logger,    setup_logging)


  # Setup logging first
setup_logging(enabled=True, log_level="INFO", log_to_file=True)
logger = get_logger("anomavision.detect")  # Force it into anomavision hierarchy


2025-09-24 18:32:30,217 - anomavision - INFO - Anomavision logging initialized - Level: INFO
2025-09-24 18:32:30,218 - anomavision - INFO - Log file: logs/anomavision_20250924_183230.log


## 3. Training

This section performs model training using the `anomavision.Padim` class directly.

In [6]:

import time
import anomavision
from torch.utils.data import DataLoader

from anomavision.general import Profiler, determine_device, increment_path
from pathlib import Path
device_str = determine_device(train_config.device)
start_time = time.time()

print("=== Training PaDiM Model ===")

# # Create dataset and dataloader for training
# train_dataset = anomavision.AnodetDataset(
#     common_config.dataset_path,
#     resize=common_config.resize,
#     crop_size=common_config.crop_size,
#     normalize=common_config.normalize,
#     mean=common_config.norm_mean,
#     std=common_config.norm_std,
# )
train_dataloader = DataLoader(data, batch_size=train_config.batch_size, shuffle=False)

padim_model = anomavision.Padim(
    backbone=train_config.backbone,
    device=device_str,
    layer_indices=train_config.layer_indices,
    feat_dim=train_config.feat_dim
)

padim_model.fit(train_dataloader)
training_time = time.time() - start_time
print(f"✅ Training completed in {training_time:.2f} seconds")

# padim_model.mean = padim_model.mean.half() if device_str.device.type != "cpu" else padim_model.mean
# padim_model.cov_inv = (
#     padim_model.cov_inv.half() if device_str.device.type != "cpu" else padim_model.cov_inv
# )

# Save state_dict (safer than pickling whole object)
model_path = os.path.join(common_config.model_data_path, train_config.output_model)

torch.save(padim_model, model_path)

stats_path =  os.path.join(common_config.model_data_path, "padim.pth")
padim_model.save_statistics(str(stats_path), half=True)
stats_size = Path(stats_path).stat().st_size / (1024 * 1024)

print(f"Model saved to: {model_path}")
print(f"Compact statistics saved to: {stats_path}")
print(f"   Statistics file size: {stats_size:.2f} MB")

2025-09-24 18:32:31,773 - anomavision.feature_extraction - INFO - Initializing ResnetEmbeddingsExtractor with backbone: resnet18, device: cuda
2025-09-24 18:32:31,774 - anomavision.feature_extraction - INFO - Loading resnet18 with weights: ResNet18_Weights.IMAGENET1K_V1


=== Training PaDiM Model ===
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 169MB/s]
2025-09-24 18:32:32,477 - anomavision.feature_extraction - INFO - Backbone successfully moved to device: cuda:0
2025-09-24 18:32:32,478 - anomavision.feature_extraction - INFO - Model set to evaluation mode
2025-09-24 18:32:32,485 - anomavision.feature_extraction - INFO - Starting feature extraction from dataloader with 2 batches


Backbone device: cuda:0


Feature extraction: 100%|██████████| 2/2 [00:00<00:00,  2.75it/s]
2025-09-24 18:32:33,223 - anomavision.feature_extraction - INFO - Feature extraction completed. Final shape: torch.Size([10, 3136, 50])


✅ Training completed in 1.95 seconds
Statistics saved to ./model_outputs/padim.pth using FP16 precision
Model saved to: ./model_outputs/padim_model.pt
Compact statistics saved to: ./model_outputs/padim.pth
   Statistics file size: 15.26 MB


## 4. Export

In [None]:
!python -m pip install onnx onnxruntime-gpu


This section exports the trained model to ONNX, TorchScript, and OpenVINO formats using the `anomavision.ModelExporter` class directly.

> Add blockquote



In [7]:

import anomavision
from export import ModelExporter

model_path = os.path.join(common_config.model_data_path, train_config.output_model)
output_dir = common_config.model_data_path

# # Assuming a logger is not strictly necessary for basic Colab usage, or can be simplified
# class DummyLogger:
#     def info(self, *args): print("INFO:", *args)
#     def exception(self, *args): print("EXCEPTION:", *args)
#     def warning(self, *args): print("WARNING:", *args)
# logger = DummyLogger()

exporter = ModelExporter(model_path, output_dir, logger,device="cuda")

print("Starting export...")

# Export to ONNX
onnx_path = exporter.export_onnx(
    input_shape=export_config.input_shape,
    output_name=export_config.onnx_output_name,
    dynamic_batch=export_config.dynamic_batch,
    quantize_dynamic_flag=export_config.quantize_dynamic_flag,
    quantize_static_flag=export_config.quantize_static_flag,
    calib_samples=export_config.calib_samples,
    calib_dir=common_config.dataset_path,
    # force_precision="fp32" if export_config.quantize_static_flag else None,
)

if onnx_path:
    print(f"✅ Model exported to ONNX: {onnx_path}")
    stats_size = onnx_path.stat().st_size / (1024 * 1024)
    print(f"   ONNX file size: {stats_size:.2f} MB")


# Export to TorchScript
torchscript_path = exporter.export_torchscript(
    input_shape=export_config.input_shape,
    output_name=export_config.torchscript_output_name,
)
if torchscript_path:
    print(f"✅ Model exported to TorchScript: {torchscript_path}")
    stats_size = torchscript_path.stat().st_size / (1024 * 1024)
    print(f"   torchscript file size: {stats_size:.2f} MB")


# Export to OpenVINO (requires ONNX as intermediate)
openvino_path = exporter.export_openvino(
    input_shape=export_config.input_shape,
    output_name=export_config.openvino_output_name,
    dynamic_batch=export_config.dynamic_batch,
)
if openvino_path:
    print(f"✅ Model exported to OpenVINO: {openvino_path}")
    stats_size = openvino_path.stat().st_size / (1024 * 1024)
    print(f"   openvino file size: {stats_size:.2f} MB")


print("Export complete.")


2025-09-24 18:32:36,885 - anomavision.detect - INFO - Auto-detected device: CUDA
2025-09-24 18:32:36,886 - anomavision.detect - INFO - load: model_outputs/padim_model.pt
2025-09-24 18:32:36,940 - anomavision.detect - INFO - Loaded object type: <class 'anomavision.padim.Padim'>
2025-09-24 18:32:36,941 - anomavision.detect - INFO - GOING INTO FULL MODEL PATH
2025-09-24 18:32:36,971 - anomavision.detect - INFO - Using FP16 precision (auto-detected for CUDA)


Starting export...


2025-09-24 18:32:37,754 - anomavision.detect - INFO - onnx: ok (0.87s) file=model_outputs/padim_model.onnx size=15.6MB dynamic_batch=False opset=17 precision=FP16 device=cuda
2025-09-24 18:32:37,757 - anomavision.detect - INFO - load: model_outputs/padim_model.pt
2025-09-24 18:32:37,816 - anomavision.detect - INFO - Loaded object type: <class 'anomavision.padim.Padim'>
2025-09-24 18:32:37,816 - anomavision.detect - INFO - GOING INTO FULL MODEL PATH
2025-09-24 18:32:37,840 - anomavision.detect - INFO - Using FP16 precision (auto-detected for CUDA)
2025-09-24 18:32:37,847 - anomavision.detect - INFO - ts: tracing optimize=False precision=FP16 device=cuda


✅ Model exported to ONNX: model_outputs/padim_model.onnx
   ONNX file size: 15.57 MB


2025-09-24 18:32:38,264 - anomavision.detect - INFO - ts: ok (0.51s) file=model_outputs/padim_model.torchscript size=37.7MB optimized=False precision=FP16 device=cuda
2025-09-24 18:32:38,267 - anomavision.detect - INFO - OpenVINO: using FP16 (auto-detected for CUDA)
2025-09-24 18:32:38,268 - anomavision.detect - INFO - load: model_outputs/padim_model.pt
2025-09-24 18:32:38,316 - anomavision.detect - INFO - Loaded object type: <class 'anomavision.padim.Padim'>
2025-09-24 18:32:38,317 - anomavision.detect - INFO - GOING INTO FULL MODEL PATH
2025-09-24 18:32:38,346 - anomavision.detect - INFO - Using FP16 precision (forced to FP16)


✅ Model exported to TorchScript: model_outputs/padim_model.torchscript
   torchscript file size: 37.68 MB


2025-09-24 18:32:38,611 - anomavision.detect - INFO - onnx: ok (0.34s) file=model_outputs/temp_model.onnx size=15.6MB dynamic_batch=False opset=17 precision=FP16 device=cuda
2025-09-24 18:32:39,649 - anomavision.detect - INFO - ov: convert fp16=True dynamic_batch=False device=cuda


[ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release.
In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. 
Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html


2025-09-24 18:32:40,751 - anomavision.detect - INFO - ov: ok (2.48s) dir=model_outputs/padim_model_openvino xml=padim_model_openvino.xml size=0.0MB precision=FP16 dynamic_batch=False device=cuda
2025-09-24 18:32:40,751 - anomavision.detect - INFO - ov: ok (2.48s) dir=model_outputs/padim_model_openvino xml=padim_model_openvino.xml size=0.0MB precision=FP16 dynamic_batch=False device=cuda
2025-09-24 18:32:40,751 - anomavision.detect - INFO - ov: ok (2.48s) dir=model_outputs/padim_model_openvino xml=padim_model_openvino.xml size=0.0MB precision=FP16 dynamic_batch=False device=cuda


✅ Model exported to OpenVINO: model_outputs/padim_model_openvino
   openvino file size: 0.00 MB
Export complete.
✅ Model exported to OpenVINO: model_outputs/padim_model_openvino✅ Model exported to OpenVINO: model_outputs/padim_model_openvino

[link text](https://)## 4.1. TensorRT

In [8]:
# !pip install torch-tensorrt

# !pip install nvidia-pyindex nvidia-pip
# !pip install nvidia-tensorrt==8.6.1
!pip install torch-tensorrt==2.8.0



Collecting torch-tensorrt==2.8.0
  Downloading torch_tensorrt-2.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_34_x86_64.whl.metadata (11 kB)
Collecting dllist (from torch-tensorrt==2.8.0)
  Downloading dllist-2.0.0-py3-none-any.whl.metadata (2.0 kB)
Collecting tensorrt<10.13.0,>=10.12.0 (from torch-tensorrt==2.8.0)
  Downloading tensorrt-10.12.0.36.tar.gz (40 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorrt-cu12-bindings<10.13.0,>=10.12.0 (from torch-tensorrt==2.8.0)
  Downloading tensorrt_cu12_bindings-10.12.0.36-cp312-none-manylinux_2_28_x86_64.whl.metadata (607 bytes)
Collecting tensorrt-cu12-libs<10.13.0,>=10.12.0 (from torch-tensorrt==2.8.0)
  Downloading tensorrt_cu12_libs-10.12.0.36.tar.gz (709 bytes)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tensorrt_cu12==10.12.0.36 (from tensorrt<10.13.0,>=10.12.0->torch-te

**Accelerating the model with TensorRT**


In [9]:

# re-trace
example_input = torch.randn(1, 3, 224, 224).cuda()
ts_model = torch.jit.trace(padim_model.cuda().eval(), example_input)
ts_model.save("padim_fixed.ts")



In [None]:
# !pip list

In [10]:
ts_model = torch.jit.load("padim_fixed.ts").eval().cuda()
ts_model = ts_model.half()             # convert weights & ops to half
example_input = torch.randn(1, 3, 224, 224, device="cuda", dtype=torch.float16)
torch.jit.save(torch.jit.trace(ts_model, example_input), "padim_fp16.ts")




In [None]:

import torch
import torch_tensorrt as torchtrt



trt_model = torchtrt.compile(
    torch.jit.load("padim_fp16.ts").eval().cuda(),
    ir="torchscript",
    inputs=[torchtrt.Input(example_input.shape, dtype=torch.float16)],
    enabled_precisions={torch.float16},
    truncate_long_and_double=True,
    require_full_compilation=False,
)
torch.jit.save(trt_model, "padim_trt.ts")



In [None]:
import torch

# 1) Load compiled TRT module
trt_model = torch.jit.load("padim_trt.ts").eval().cuda()

# 2) Create half-precision input
x = torch.randn(1, 3, 224, 224, device="cuda", dtype=torch.float16)

# 3) Inference
with torch.no_grad():
    scores, embeddings = trt_model(x)   # returns tuple (Tensor, Tensor)

print("Scores shape:", scores.shape, "dtype:", scores.dtype)
print("Embeddings shape:", embeddings.shape, "dtype:", embeddings.dtype)


In [None]:
import time
torch.cuda.synchronize()
for _ in range(10):
    t0 = time.time()
    x = torch.randn(1, 3, 224, 224, device="cuda", dtype=torch.float16)
    s,m = trt_model(x)
    # print(f"============ {s.dtype}, {m.dtype}")
    t1 = time.time()
    print(f"Total pipeline time: {(t1 - t0) * 1000:.2f} ms")

torch.cuda.synchronize()


In [None]:
print(trt_model)  # sometimes shows forward signature
try:
    print(trt_model.graph)   # TorchScript graph, look for 'execute_engine([..., ...])'
except Exception:
    pass

# Check parameters / buffers that could be fp32
for n, b in trt_model.named_buffers(recurse=True):
    if b.dtype == torch.float32:
        print("FP32 buffer:", n, b.shape)

for n, p in trt_model.named_parameters(recurse=True):
    if p.dtype == torch.float32:
        print("FP32 param:", n, p.shape)


In [None]:

from anomavision.inference.model.wrapper import ModelWrapper
import torch

device_str = "cuda"

wrapper = ModelWrapper(torchscript_path, device_str)

# Access the actual TorchScript model
ts_model = wrapper.backend.model
# Access TorchScript model directly
ts_model = wrapper.backend.model
ts_model.eval().cuda()


dtypes = {p.dtype for p in ts_model.parameters()} | {b.dtype for b in ts_model.buffers()}
print("Detected dtypes in model:", dtypes)



# Save it again (if you want a cleaned copy)
ts_model.save("padim_fixed.ts")

dtypes = {p.dtype for p in ts_model.parameters()} | {b.dtype for b in ts_model.buffers()}

print("Detected dtypes in model:", dtypes)

if torch.float16 in dtypes:
    print("✅ Model contains FP16 weights")
if torch.float32 in dtypes:
    print("✅ Model contains FP32 weights")


# Convert to FP16
ts_model = ts_model.half()

# Save a new TorchScript in FP16
example_input = torch.randn(1, 3, 224, 224, device="cuda", dtype=torch.float16)
ts_model_fp16 = torch.jit.trace(ts_model, example_input)
ts_model_fp16.save("padim_fp16.ts")

# Double-check
dtypes = {p.dtype for p in ts_model_fp16.parameters()} | {b.dtype for b in ts_model_fp16.buffers()}
print("Detected dtypes after conversion:", dtypes)


In [None]:
import pdb
pdb.set_trace()

import torch

import torch
import torch_tensorrt as torchtrt



trt_model = torchtrt.compile(
    torch.jit.load("padim_fp16.ts").eval().cuda(),
    ir="torchscript",
    inputs=[torchtrt.Input(example_input.shape, dtype=torch.float16)],
    enabled_precisions={torch.float16},
    truncate_long_and_double=True,
    require_full_compilation=False,
)
torch.jit.save(trt_model, "padim_trt.ts")
# 1) Load compiled TRT module
trt_model = torch.jit.load("padim_trt.ts").eval().cuda()

# 2) Create half-precision input
x = torch.randn(1, 3, 224, 224, device="cuda", dtype=torch.float16)

# 3) Inference
with torch.no_grad():
    scores, embeddings = trt_model(x)   # returns tuple (Tensor, Tensor)

print("Scores shape:", scores.shape, "dtype:", scores.dtype)
print("Embeddings shape:", embeddings.shape, "dtype:", embeddings.dtype)
