###Step 1: Install Core Libraries

In [None]:
# Install anomalib library for anomaly detection
!pip install anomalib

# Install FrEIA, which provides flow-based invertible architectures for machine learning
!pip install FrEIA

# Install Kornia, a computer vision library that integrates with PyTorch
!pip install kornia

# Install dotenv to manage environment variables
!pip install python-dotenv

# Install open_clip, an implementation for training and inference with OpenAI's CLIP model
!pip install git+https://github.com/mlfoundations/open_clip.git

# Install torchmetrics, a library providing metrics for PyTorch models
!pip install torchmetrics

# Install PyTorch Lightning, a lightweight framework for organizing PyTorch code
!pip install lightning

# Install OpenVINO, Intel’s toolkit for optimized model deployment
!pip install openvino-dev

# Install the full version of anomalib for extended functionality
!anomalib install --option full

# Update the Weights & Biases (wandb) tool for experiment tracking
!pip install --upgrade wandb

# Install qdrant-client, a client for Qdrant’s vector search database
!pip install qdrant-client


Collecting FrEIA
  Using cached FrEIA-0.2-py3-none-any.whl
Installing collected packages: FrEIA
Successfully installed FrEIA-0.2
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
Collecting git+https://github.com/mlfoundations/open_clip.git
  Cloning https://github.com/mlfoundations/open_clip.git to /tmp/pip-req-build-_4l8_af0
  Running command git clone --filter=blob:none --quiet https://github.com/mlfoundations/open_clip.git /tmp/pip-req-build-_4l8_af0
  Resolved https://github.com/mlfoundations/open_clip.git to commit 49eac2f27a5bb98a7f7ecc1154918880aa55256c
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ftfy (from open_clip_torch==2.29.0)
  Downloading ftfy-6.3.1-p


###Step 2: Import Necessary Libraries

In [None]:
# Step 2: Import necessary libraries
import os
import torch
from torchvision import transforms
from anomalib.data import MVTec
from anomalib.engine import Engine
from anomalib.models import Patchcore, EfficientAd
from sklearn.metrics import f1_score, roc_auc_score, roc_curve, confusion_matrix, precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt
import numpy as np
import warnings

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


###Step 3: Set Up the anomalib Directory
To work with anomalib directly, we’ll set up its directory in our environment. This step checks if the anomalib repository is already present, and if not, it clones the repository from GitHub.

In [None]:
# Define the path for the anomalib directory
ANOMALIB_DIR = '/kaggle/working/anomalib'

# Check if the anomalib directory exists
if not os.path.exists(ANOMALIB_DIR):
    # Clone the repository if it doesn't exist
    !git clone https://github.com/openvinotoolkit/anomalib.git {ANOMALIB_DIR}
else:
    print(f"Directory {ANOMALIB_DIR} already exists.")

# Change the working directory to the anomalib directory
%cd {ANOMALIB_DIR}

# Install the package
!pip install -e . -q

Cloning into '/kaggle/working/anomalib'...
remote: Enumerating objects: 11802, done.[K
remote: Counting objects: 100% (1339/1339), done.[K
remote: Compressing objects: 100% (925/925), done.[K
remote: Total 11802 (delta 622), reused 804 (delta 360), pack-reused 10463 (from 1)[K
Receiving objects: 100% (11802/11802), 67.65 MiB | 21.02 MiB/s, done.
Resolving deltas: 100% (7015/7015), done.
/kaggle/working/anomalib
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
  Building editable for anomalib (pyproject.toml) ... [?25l[?25hdone


###Step 4: Define Default Transform for Image Preprocessing
In this step, we create a transformation pipeline using torchvision.transforms. This pipeline will standardize images before they are input to the model, ensuring that all images have a uniform size, format, and normalization.

In [None]:
# Define a default transform (example)
default_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

###Step 5: Set Dataset Path

In [None]:
# Set dataset path
dataset_path = '/kaggle/working/mvtec_ad'  # Ensure this path is correct


### Step 6: Initialize Data Module, Model, and Engine
In this step, we will set up the necessary components for training our anomaly detection model. We will initialize the data module, define the model architecture, and create an engine to manage the training process.

In [None]:
# Initialize the datamodule, model, and engine
datamodule = MVTec(root=dataset_path, train_batch_size=1, category="tile")  # Adjust categories as needed
model = EfficientAd()
engine = Engine(max_epochs=5)

# Train the model
engine.fit(datamodule=datamodule, model=model)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
mvtec: 5.26GB [03:55, 22.4MB/s]                            


###Step 7: Model Evaluation and Metrics Computation
In this step, we will evaluate our trained anomaly detection model using various metrics and visualizations. We'll start by suppressing specific warnings, defining normalization parameters, and then proceed to evaluate the model on the test set.

In [None]:
# Suppress specific warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Define ImageNet normalization (if needed for batch_imagenet)
imagenet_normalization = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

# --------------------- Evaluation and Metrics Computation ---------------------

# Evaluate the model on the test set
print("Evaluating the model on the test set...")
engine.test(datamodule=datamodule, model=model)
print("Evaluation completed.\n")

# Initialize lists to collect true labels and predicted anomaly scores
true_labels = []
predicted_scores = []

# Set model to evaluation mode
model.eval()

# Disable gradient calculations for inference
with torch.no_grad():
    # Loop over the test dataset to gather labels and anomaly scores
    for batch_idx, batch in enumerate(datamodule.test_dataloader()):
        try:
            images = batch['image'].to(model.device)
            labels = batch['label'].cpu().numpy()
            true_labels.extend(labels)

            # Prepare batch_imagenet with ImageNet normalization
            batch_imagenet = imagenet_normalization(images)

            # Forward pass through the model to get predictions
            outputs = model(batch=images, batch_imagenet=batch_imagenet, normalize=True)

            if 'anomaly_map' not in outputs:
                raise KeyError("Model output does not contain 'anomaly_map'.")

            # Aggregate anomaly scores
            anomaly_scores = outputs['anomaly_map'].cpu().numpy().mean(axis=(1, 2, 3))
            predicted_scores.extend(anomaly_scores)

            print(f"Processed batch {batch_idx + 1} successfully.\n")

        except Exception as e:
            print(f"Error processing batch {batch_idx + 1}: {e}\n")
            continue

# Convert lists to numpy arrays for processing
true_labels = np.array(true_labels).flatten()
predicted_scores = np.array(predicted_scores).flatten()

# Debugging: Inspect unique classes in true_labels
unique_classes = np.unique(true_labels)
print(f"Number of unique classes in true_labels: {len(unique_classes)}")
print(f"Unique classes in true_labels: {unique_classes}")

# Ensure binary classification
if len(unique_classes) != 2:
    raise ValueError(f"Expected binary classification, but found {len(unique_classes)} classes: {unique_classes}")
else:
    print("\nConfirmed binary classification with classes:", unique_classes)

# Compute AUROC
try:
    auroc = roc_auc_score(true_labels, predicted_scores)
    print(f"\nAUROC Score: {auroc:.4f}")
except ValueError as e:
    print(f"Error computing ROC AUC Score: {e}")

# Plot ROC Curve
try:
    fpr, tpr, _ = roc_curve(true_labels, predicted_scores)
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='blue', lw=2, label=f'AUROC = {auroc:.4f}')
    plt.plot([0, 1], [0, 1], color='grey', linestyle='--', label='Random Guess')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.show()
except Exception as e:
    print(f"Error plotting ROC Curve: {e}")

# Threshold Analysis
threshold = 0.5
predicted_labels = (predicted_scores > threshold).astype(int)
f1 = f1_score(true_labels, predicted_labels)
print(f"F1 Score with threshold {threshold}: {f1:.4f}")

# Confusion Matrix at threshold 0.5
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("\nConfusion Matrix at Threshold 0.5:")
print(conf_matrix)
print("\nConfusion Matrix Breakdown:")
print(f"True Negatives: {conf_matrix[0,0]}")
print(f"False Positives: {conf_matrix[0,1]}")
print(f"False Negatives: {conf_matrix[1,0]}")
print(f"True Positives: {conf_matrix[1,1]}")

# Optimal Threshold Calculation
precision, recall, thresholds_pr = precision_recall_curve(true_labels, predicted_scores)
f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)
optimal_idx = np.argmax(f1_scores)
optimal_threshold = thresholds_pr[optimal_idx]
max_f1 = f1_scores[optimal_idx]

print(f"\nOptimal Threshold based on Precision-Recall Curve: {optimal_threshold:.4f}")
print(f"Maximum F1 Score: {max_f1:.4f}")

# Apply optimal threshold
predicted_labels_optimal = (predicted_scores > optimal_threshold).astype(int)

# Recompute F1 Score at optimal threshold
f1_optimal = f1_score(true_labels, predicted_labels_optimal)
print(f"F1 Score with Optimal Threshold ({optimal_threshold:.4f}): {f1_optimal:.4f}")

# Confusion Matrix at optimal threshold
conf_matrix_optimal = confusion_matrix(true_labels, predicted_labels_optimal)
print("\nConfusion Matrix at Optimal Threshold:")
print(conf_matrix_optimal)
print("\nConfusion Matrix Breakdown at Optimal Threshold:")
print(f"True Negatives: {conf_matrix_optimal[0,0]}")
print(f"False Positives: {conf_matrix_optimal[0,1]}")
print(f"False Negatives: {conf_matrix_optimal[1,0]}")
print(f"True Positives: {conf_matrix_optimal[1,1]}")

# Plot F1 Score vs Threshold
plt.figure(figsize=(8, 6))
plt.plot(thresholds_pr, f1_scores[:-1], color='orange')
plt.axvline(x=optimal_threshold, color='red', linestyle='--', label=f'Optimal Threshold = {optimal_threshold:.4f}')
plt.xlabel('Threshold')
plt.ylabel('F1 Score')
plt.title('F1 Score vs Threshold')
plt.legend(loc="best")
plt.grid(True)
plt.show()

# Plot Precision-Recall Curve
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, color='purple', lw=2, label=f'Precision-Recall Curve (AP = {average_precision_score(true_labels, predicted_scores):.4f})')
plt.scatter(recall[optimal_idx], precision[optimal_idx], color='red', label=f'Optimal Threshold = {optimal_threshold:.4f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="upper right")
plt.grid(True)
plt.show()

## Step 8: Visualizing Model Performance with the plot_metrics Function
We used the plot_metrics function, which is designed to help you visualize and evaluate the performance of a binary classification model. Understanding how to interpret model performance is crucial in machine learning, as it allows you to assess how well your model is making predictions.

In [None]:
def plot_metrics(true_labels, predicted_scores):
    print("Starting to plot metrics...")  # Debugging line

    # Plot ROC Curve
    try:
        fpr, tpr, _ = roc_curve(true_labels, predicted_scores)
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, color='blue', lw=2, label=f'AUROC = {roc_auc_score(true_labels, predicted_scores):.4f}')
        plt.plot([0, 1], [0, 1], color='grey', linestyle='--', label='Random Guess')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC) Curve')
        plt.legend(loc="lower right")
        plt.grid(True)
        plt.show()
    except Exception as e:
        print(f"Error plotting ROC Curve: {e}")

    # Calculate Precision-Recall Curve
    precision, recall, thresholds_pr = precision_recall_curve(true_labels, predicted_scores)
    f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)

    # Find optimal threshold
    optimal_idx = np.argmax(f1_scores)
    optimal_threshold = thresholds_pr[optimal_idx]
    max_f1 = f1_scores[optimal_idx]

    # Debugging print statements for thresholds and F1 score
    print(f"Optimal Threshold: {optimal_threshold}")
    print(f"Maximum F1 Score: {max_f1}")

    # Plot F1 Score vs Threshold
    plt.figure(figsize=(8, 6))
    plt.plot(thresholds_pr, f1_scores[:-1], color='orange')
    plt.axvline(x=optimal_threshold, color='red', linestyle='--', label=f'Optimal Threshold = {optimal_threshold:.4f}')
    plt.xlabel('Threshold')
    plt.ylabel('F1 Score')
    plt.title('F1 Score vs Threshold')
    plt.legend(loc="best")
    plt.grid(True)
    plt.show()

    # Plot Precision-Recall Curve
    plt.figure(figsize=(8, 6))
    plt.plot(recall, precision, color='purple', lw=2, label=f'Precision-Recall Curve (AP = {average_precision_score(true_labels, predicted_scores):.4f})')
    plt.scatter(recall[optimal_idx], precision[optimal_idx], color='red', label=f'Optimal Threshold = {optimal_threshold:.4f}')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc="upper right")
    plt.grid(True)
    plt.show()

    print("Plotting metrics Finished.")

    plot_metrics(true_labels, predicted_scores)