In [None]:
# Early permission request for google drive
from google.colab import drive
drive.mount('/content/drive', timeout_ms=18*60*3600)

In [None]:
# Install dependencies
%%shell
apt-get -qq remove python3-blinker --quiet
pip uninstall torchaudio fastai -y --quiet
pip install git+https://github.com/Pystronic/master-thesis-anomaly-detection.git#subdirectory=experiment/thesis_library --quiet

In [None]:
# Imports
from thesis_library.data.miad_dataset import MIAD_CATEGORIES
from thesis_library.data.miad_datamodule import MIAD
from thesis_library.metrics import image, pixel
from thesis_library.metrics.util import  calculate_AD_metrics
from thesis_library.LimitedImageVisualizer import LimitedImageVisualizer

import time
from pathlib import Path
from datetime import datetime

import torch
from anomalib.callbacks import ModelCheckpoint
from anomalib.data.utils import ValSplitMode
from anomalib.deploy import ExportType
from anomalib.metrics import Evaluator
from anomalib.engine import Engine
from lightning.pytorch.loggers import CSVLogger
from pandas import DataFrame

In [None]:
# Fix pytorch-lightning bug on export
# https://github.com/Lightning-AI/pytorch-lightning/issues/17124
def getstate_patch(*_):
    return {}
from torch.utils.data.dataloader import _BaseDataLoaderIter
_BaseDataLoaderIter.__getstate__ = getstate_patch

In [None]:
# Optimization for A100, H100, etc
# Less precission but more performance
torch.backends.cuda.matmul.allow_tf32 = True
torch.set_float32_matmul_precision("high")

## Auswahl von Modell und Kategorie

In [None]:
# Set category for training / testing
CURRENT_CATEGORY = MIAD_CATEGORIES[0]
print(f"Selected category {CURRENT_CATEGORY} from {MIAD_CATEGORIES}")


CURRENT_MODEL = "EfficientAd_10_Epoch"
LOAD_MODEL_PATH = None#f"/content/drive/MyDrive/{CURRENT_MODEL}/{CURRENT_CATEGORY}/model/weights/torch/{CURRENT_MODEL}.pt"
MODEL_EPOCHS = 10

In [None]:
# Clear data of previous run
!rm -r results/*
!rm -r datasets/MIAD/*.zip*

## Vorbereitung von Ordnern und Dataset

In [None]:
# Prepare google drive directories
from pathlib import Path
GDRIVE_DIR = Path("/content/drive/MyDrive")
LOCAL_DIR = Path("results")
MIAD_GDRIVE_DIR = GDRIVE_DIR / "MIAD"
MIAD_GDRIVE_CATEGORY_DIR = MIAD_GDRIVE_DIR / CURRENT_CATEGORY
print(MIAD_GDRIVE_CATEGORY_DIR)

LOCAL_CHECKPOINT_DIR = LOCAL_DIR / "checkpoints"
LOCAL_RESULT_DIR = LOCAL_DIR / "results"

GDRIVE_RESULT_DIR = GDRIVE_DIR / CURRENT_MODEL / CURRENT_CATEGORY
MODEL_EXPORT_DIR = GDRIVE_RESULT_DIR / "model"
print(LOCAL_CHECKPOINT_DIR)
print(LOCAL_RESULT_DIR)
print(MODEL_EXPORT_DIR)
print(GDRIVE_RESULT_DIR)

In [None]:
# Prepare local dataset directories
MIAD_DIR = Path("datasets/MIAD")
MIAD_CATEGORY_DIR = MIAD_DIR / CURRENT_CATEGORY
MIAD_DIR.mkdir(parents=True, exist_ok=True)

# Prepare result directories
LOCAL_CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
LOCAL_RESULT_DIR.mkdir(parents=True, exist_ok=True)

MODEL_EXPORT_DIR.mkdir(parents=True, exist_ok=True)
GDRIVE_RESULT_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# Extract files locally from GDrive
!cp $MIAD_GDRIVE_CATEGORY_DIR/*.zip* $MIAD_DIR
!7z x "$MIAD_DIR/*.zip*" -o$MIAD_DIR -y -bd

## Vorbereitung für Training

In [None]:
# Prepare logging and saving checkpoints
logger = CSVLogger(LOCAL_RESULT_DIR, name=f"run_log_{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}")

checkpoint_callback = ModelCheckpoint(
    dirpath=LOCAL_CHECKPOINT_DIR,
    filename="best-{epoch:02d}-{IMG_AUROC:.3f}",
    monitor="IMG_AUROC",
    mode="max",
    save_top_k=3,
    every_n_epochs=10,
    save_weights_only=True
)

In [None]:
# Load datamodule as object
datamodule = MIAD(
    root=MIAD_DIR,
    category=CURRENT_CATEGORY,
    val_split_mode=ValSplitMode.FROM_TEST,
    val_split_ratio=0.1,
    num_workers=2,
    seed=4232,
    # Modify batch size to optimizie GPU training
    # Set to 1 since efficentAd requires it
    train_batch_size=1,
    eval_batch_size=8
)

In [None]:
# Prepare metrics and evaluator
test_metrics = [
    *image.get_metrics(),
    *pixel.get_metrics(),
]

evaluator = Evaluator(
    test_metrics=test_metrics,
    val_metrics=pixel.get_val_metrics(),
    # Deactivated since this causes errors during testing
    compute_on_cpu=False
)

In [None]:
# Only visualize the first 50 images per category
# to reduce performance overhead in test
visualizer = LimitedImageVisualizer(50, field_size=(512, 512))

In [None]:
from anomalib.models import EfficientAd
# Set pre-processor to the correct size
pre_processor = EfficientAd.configure_pre_processor((512, 512))

In [None]:
# Setup engine for training / testing
engine = Engine(
    max_epochs=MODEL_EPOCHS,
    callbacks=[checkpoint_callback],
    logger=logger,
    default_root_dir=LOCAL_RESULT_DIR,
    # Use bfloat precision to increase fitting / interference performance
    precision="bf16-mixed",
    # Do not invest more than 16 hours of training time
    # Batch size of 1 trains very very slowly
    max_time="00:16:00:00",
    log_every_n_steps=400
)

## Model-Training

In [None]:
from anomalib.models.image.efficient_ad.torch_model import EfficientAdModelSize
model = EfficientAd(evaluator=evaluator, visualizer=visualizer, pre_processor=pre_processor, model_size=EfficientAdModelSize.M)

if LOAD_MODEL_PATH is not None:
    # Load model weight. We can trust them, since we exported it ourselves
    model.load_state_dict(torch.load(LOAD_MODEL_PATH, weights_only=False), strict=False)

In [None]:
# Prepare auxiliary data with same size as the dataset
model.prepare_imagenette_data((512, 512))

In [None]:
# Prepare EfficientAD pre-trained model
model.prepare_pretrained_model()

In [None]:
# Train the model
if LOAD_MODEL_PATH is None:
    engine.fit(datamodule=datamodule, model=model)
    engine.export(model, ExportType.TORCH, model_file_name=CURRENT_MODEL, export_root=MODEL_EXPORT_DIR)

# Validierung des Modells

In [None]:
# Returned as single element list and approximate performance
test_start = time.perf_counter()
test_result = engine.test(
    datamodule=datamodule,
    model=model
)[0]
test_end = time.perf_counter()

In [None]:
# Calculate AD metrics
test_result = calculate_AD_metrics(test_result)

In [None]:
# Calculate compound metrics and export results
result_frame = DataFrame.from_records([dict(test_result)])
result_frame["model"] = CURRENT_MODEL
result_frame["category"] = CURRENT_CATEGORY
result_frame["rel_images_per_second"] = [(test_end - test_start) / len(datamodule.test_data.samples)]
result_frame.to_csv(LOCAL_RESULT_DIR / "test_results.csv")

In [None]:
# Move local files to drive
!cp -r $LOCAL_DIR/* $GDRIVE_RESULT_DIR

In [None]:
# Stop the colab runtime
from google.colab import runtime
runtime.unassign()