In [1]:
from sklearn.svm import OneClassSVM
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
from PIL import Image
import os
import numpy as np
import torch
import pytorch_msssim as ssim
import matplotlib as olt
import torch.nn as nn

#### Data Path

In [2]:
dataset_path = './mvtec_anomaly_detection/'

In [3]:
# print('Training Set: \t\t' + ', '.join(train_set))
# print('Testing Folders: \t' + ', '.join(test_folders))
# print('Ground Truth Folders \t' + ', '.join(ground_truth_folders))

# Pre-Processing & Data augmentation

In [4]:
transform_pipeline = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
])

#### Read image and convert it to gray image

In [5]:
img_height, img_width, img_channels = 256, 256, 1
X, y_good, y_bad, ground_truth = [], [], [], []
all_items = os.listdir(dataset_path)
class_names = [item for item in all_items 
               if os.path.isdir(os.path.join(dataset_path, item))]

for class_name in class_names:
    train_path = dataset_path + class_name + '/train/good/'

    test_path = dataset_path + class_name + '/test/'

    ground_truth_path = dataset_path + class_name + '/ground_truth/'

    train_set = os.listdir(train_path)

    test_folders = os.listdir(test_path)

    ground_truth_folders = os.listdir(ground_truth_path)
    
    for img_name in train_set:
        img_path = train_path + img_name
        img = Image.open(img_path)
        img = img.resize((img_height, img_width))
        img_gray = img.convert('L')
        img_gray = transform_pipeline(img_gray)
        X.append(img_gray)

    for test_folder in test_folders:
        test_folder_path = test_path + test_folder

        for img_name in sorted(os.listdir(test_folder_path)):
            img_path = test_folder_path + '/' + img_name
            img = Image.open(img_path)
            img = img.resize((img_height, img_width))
            img_gray = img.convert('L')
            img_gray = transform_pipeline(img_gray)

            if test_folder == 'good':
                y_good.append(img_gray)
            else:
                y_bad.append(img_gray)
            
    for ground_truth_folder in ground_truth_folders:
        ground_truth_folder_path = ground_truth_path + ground_truth_folder

        for img_name in sorted(os.listdir(ground_truth_folder_path)):
            img_path = ground_truth_folder_path + '/' + img_name
            img = Image.open(img_path)
            img = img.resize((img_height, img_width))
            img_gray = img.convert('L')
            img_gray = transform_pipeline(img_gray)
            ground_truth.append(img_gray)

print("Training: \t\t", np.shape(X))
print("Good Testing: \t\t", np.shape(y_good))
print("Bad Testing: \t\t", np.shape(y_bad))
print("Ground Truth: \t\t", np.shape(ground_truth))

: 

In [None]:
print(X[0])

tensor([[[0.0627, 0.0627, 0.0667,  ..., 0.0588, 0.0588, 0.0588],
         [0.0627, 0.0627, 0.0667,  ..., 0.0588, 0.0588, 0.0588],
         [0.0627, 0.0667, 0.0627,  ..., 0.0627, 0.0588, 0.0627],
         ...,
         [0.0588, 0.0588, 0.0588,  ..., 0.0627, 0.0627, 0.0627],
         [0.0627, 0.0588, 0.0588,  ..., 0.0667, 0.0627, 0.0588],
         [0.0627, 0.0588, 0.0627,  ..., 0.0627, 0.0627, 0.0627]]])


#### SSIM

In [None]:
def SSIMLoss(y_true, y_pred):
    return 1 - ssim(y_true, y_pred, data_range=1.0, size_average=True)

# SVM

In [None]:
X_tensor = torch.stack(X)
y_good_tensor = torch.stack(y_good)
y_bad_tensor = torch.stack(y_bad)

n_features = img_height * img_width
X_train_features = X_tensor.reshape(-1, n_features)
y_good_features = y_good_tensor.reshape(-1, n_features)
y_bad_features = y_bad_tensor.reshape(-1, n_features)

X_train_features = X_train_features.detach().cpu().numpy()
y_good_features = y_good_features.detach().cpu().numpy()
y_bad_features = y_bad_features.detach().cpu().numpy()

print("Training feature shape: \t", X_train_features.shape)
print("Good test feature shape: \t", y_good_features.shape)
print("Bad test feature shape: \t", y_bad_features.shape)

Training feature shape: 	 (3629, 1024)
Good test feature shape: 	 (467, 1024)
Bad test feature shape: 	 (1258, 1024)


In [None]:
print("Training One-Class SVM...")
svm = OneClassSVM(kernel='rbf', nu=0.1, gamma='auto')

svm.fit(X_train_features)
print("SVM training complete")

Training One-Class SVM...
SVM training complete


In [None]:
X_test_features = np.concatenate((y_good_features, y_bad_features), axis=0)

labels_good = np.zeros(y_good_features.shape[0])
labels_bad = np.ones(y_bad_features.shape[0])
y_true_labels = np.concatenate((labels_good, labels_bad), axis=0)

print("Total test features shape: \t", X_test_features.shape)
print("Total test labels shape: \t", y_true_labels.shape)

Total test features shape: 	 (1725, 1024)
Total test labels shape: 	 (1725,)


In [None]:
svm_predictions = svm.predict(X_test_features)

y_pred_labels = [0 if p == 1 else 1 for p in svm_predictions]

accuracy = accuracy_score(y_true_labels, y_pred_labels)
auc = roc_auc_score(y_true_labels, y_pred_labels)

print("--- Evaluation Results ---")
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"AUC Score: {auc:.4f}")

print("\nConfusion Matrix:")
print(confusion_matrix(y_true_labels, y_pred_labels))

--- Evaluation Results ---
Accuracy: 31.19%
AUC Score: 0.5067

Confusion Matrix:
[[ 435   32]
 [1155  103]]


In [None]:
import csv
with open('submit_SVM.csv', mode='w', newline='') as submit_file:
    csv_writer = csv.writer(submit_file)
    header = ['id', 'prediction']
    print(header)
    csv_writer.writerow(header)
    for i in range(svm_predictions.shape[0]):
        row = [str(i), 0 if svm_predictions[i] == -1 else 1]
        csv_writer.writerow(row)
        print(row)

['id', 'prediction']
['0', 1]
['1', 1]
['2', 1]
['3', 1]
['4', 1]
['5', 1]
['6', 1]
['7', 1]
['8', 1]
['9', 1]
['10', 0]
['11', 1]
['12', 1]
['13', 1]
['14', 1]
['15', 0]
['16', 0]
['17', 0]
['18', 0]
['19', 0]
['20', 0]
['21', 1]
['22', 0]
['23', 0]
['24', 0]
['25', 0]
['26', 0]
['27', 0]
['28', 0]
['29', 0]
['30', 0]
['31', 0]
['32', 0]
['33', 0]
['34', 0]
['35', 1]
['36', 0]
['37', 1]
['38', 1]
['39', 1]
['40', 1]
['41', 1]
['42', 1]
['43', 1]
['44', 1]
['45', 1]
['46', 1]
['47', 1]
['48', 1]
['49', 1]
['50', 1]
['51', 1]
['52', 1]
['53', 1]
['54', 1]
['55', 1]
['56', 1]
['57', 1]
['58', 1]
['59', 1]
['60', 1]
['61', 1]
['62', 1]
['63', 1]
['64', 1]
['65', 1]
['66', 1]
['67', 1]
['68', 1]
['69', 1]
['70', 1]
['71', 1]
['72', 1]
['73', 1]
['74', 1]
['75', 1]
['76', 1]
['77', 1]
['78', 1]
['79', 1]
['80', 1]
['81', 1]
['82', 1]
['83', 1]
['84', 1]
['85', 1]
['86', 1]
['87', 1]
['88', 1]
['89', 1]
['90', 1]
['91', 1]
['92', 1]
['93', 1]
['94', 1]
['95', 1]
['96', 1]
['97', 1]
['98', 1]

# KNN

In [None]:
k = 5

print("Fitting KNN model...")
knn = NearestNeighbors(n_neighbors=k, n_jobs=-1)
knn.fit(X_train_features)
print(X_train_features)
print("Model fitting complete")

Fitting KNN model...
[[0.0627451  0.0627451  0.06666667 ... 0.0627451  0.0627451  0.0627451 ]
 [0.05490196 0.05490196 0.05490196 ... 0.05490196 0.05490196 0.05490196]
 [0.0627451  0.0627451  0.0627451  ... 0.06666667 0.06666667 0.0627451 ]
 ...
 [0.10196079 0.09803922 0.09803922 ... 0.09803922 0.09411765 0.09803922]
 [0.09019608 0.09019608 0.09019608 ... 0.09019608 0.09411765 0.09411765]
 [0.09411765 0.09411765 0.09411765 ... 0.09411765 0.09411765 0.09019608]]
Model fitting complete


In [None]:
distances, indices = knn.kneighbors(X_test_features)

anomaly_scores = np.mean(distances, axis=1)

print(f"Calculated {len(anomaly_scores)} anomaly scores")

Calculated 1725 anomaly scores


In [None]:
train_distances, _ = knn.kneighbors(X_train_features)
train_anomaly_scores = np.mean(train_distances, axis=1)

threshold = np.percentile(train_anomaly_scores, 95)
print(f"Anomaly Threshold set to: {threshold:.4f}")

y_pred_labels = (anomaly_scores > threshold).astype(int)

accuracy = accuracy_score(y_true_labels, y_pred_labels)
auc = roc_auc_score(y_true_labels, anomaly_scores)
print("\n--- Evaluation result ---")
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"AUC Score: {auc:.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_true_labels, y_pred_labels))

Anomaly Threshold set to: 2.7589

--- Evaluation result ---
Accuracy: 35.30%
AUC Score: 0.4745

Confusion Matrix:
[[ 398   69]
 [1047  211]]


In [None]:
with open('submit_KNN.csv', mode='w', newline='') as submit_file:
    csv_writer = csv.writer(submit_file)
    header = ['id', 'prediction']
    print(header)
    csv_writer.writerow(header)
    for i in range(svm_predictions.shape[0]):
        row = [str(i), y_pred_labels[i]]
        csv_writer.writerow(row)
        print(row)

['id', 'prediction']
['0', np.int64(0)]
['1', np.int64(0)]
['2', np.int64(0)]
['3', np.int64(0)]
['4', np.int64(0)]
['5', np.int64(0)]
['6', np.int64(0)]
['7', np.int64(0)]
['8', np.int64(0)]
['9', np.int64(0)]
['10', np.int64(0)]
['11', np.int64(0)]
['12', np.int64(0)]
['13', np.int64(0)]
['14', np.int64(0)]
['15', np.int64(0)]
['16', np.int64(0)]
['17', np.int64(0)]
['18', np.int64(0)]
['19', np.int64(0)]
['20', np.int64(0)]
['21', np.int64(0)]
['22', np.int64(0)]
['23', np.int64(0)]
['24', np.int64(0)]
['25', np.int64(0)]
['26', np.int64(0)]
['27', np.int64(0)]
['28', np.int64(0)]
['29', np.int64(0)]
['30', np.int64(0)]
['31', np.int64(0)]
['32', np.int64(0)]
['33', np.int64(0)]
['34', np.int64(0)]
['35', np.int64(0)]
['36', np.int64(0)]
['37', np.int64(0)]
['38', np.int64(0)]
['39', np.int64(0)]
['40', np.int64(0)]
['41', np.int64(0)]
['42', np.int64(0)]
['43', np.int64(0)]
['44', np.int64(0)]
['45', np.int64(0)]
['46', np.int64(0)]
['47', np.int64(0)]
['48', np.int64(0)]
['49', np

# AutoEncoder

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1), # [N, 1, 256, 256] -> [N, 16, 128, 128]
            nn.ReLU(True),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), # [N, 16, 128, 128] -> [N, 32, 64, 64]
            nn.ReLU(True),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), # [N, 32, 64, 64] -> [N, 64, 32, 32]
            nn.ReLU(True),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), # [N, 64, 32, 32] -> [N, 128, 16, 16]
            nn.ReLU(True),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1), # [N, 128, 16, 16] -> [N, 256, 8, 8]
            nn.ReLU(True),
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1), # [N, 256, 8, 8] -> [N, 128, 16, 16]
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1), # [N, 128, 16, 16] -> [N, 64, 32, 32]
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1), # [N, 64, 32, 32] -> [N, 32, 64, 64]
            nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1), # [N, 32, 64, 64] -> [N, 16, 128,128]
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1), #[N, 16, 128, 128] -> #[N, 1, 256, 256]
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [65]:
num_epochs = 50
batch_size = 64
learning_rate = 1e-3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device {device}")

train_dataset = TensorDataset(X_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

model = AutoEncoder().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

print("--- Starting Training ---")

for epoch in range(num_epochs):
    epoch_loss = 0.0
    for data in train_loader:
        images = data[0].to(device)

        outputs = model(images)

        loss = criterion(outputs, images)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * images.size(0)

    avg_epoch_loss = epoch_loss / len(train_dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_epoch_loss:.6f}")

print("--- Training Complete ---")

Using device cuda
--- Starting Training ---
Epoch [1/50], Loss: 0.051359
Epoch [2/50], Loss: 0.016768
Epoch [3/50], Loss: 0.007680
Epoch [4/50], Loss: 0.006189
Epoch [5/50], Loss: 0.005587
Epoch [6/50], Loss: 0.005206
Epoch [7/50], Loss: 0.004911
Epoch [8/50], Loss: 0.004704
Epoch [9/50], Loss: 0.004511
Epoch [10/50], Loss: 0.004320
Epoch [11/50], Loss: 0.004168
Epoch [12/50], Loss: 0.004004
Epoch [13/50], Loss: 0.003884
Epoch [14/50], Loss: 0.003766
Epoch [15/50], Loss: 0.003669
Epoch [16/50], Loss: 0.003552
Epoch [17/50], Loss: 0.003438
Epoch [18/50], Loss: 0.003395
Epoch [19/50], Loss: 0.003308
Epoch [20/50], Loss: 0.003254
Epoch [21/50], Loss: 0.003164
Epoch [22/50], Loss: 0.003107
Epoch [23/50], Loss: 0.003167
Epoch [24/50], Loss: 0.003000
Epoch [25/50], Loss: 0.002934
Epoch [26/50], Loss: 0.002908
Epoch [27/50], Loss: 0.002867
Epoch [28/50], Loss: 0.002810
Epoch [29/50], Loss: 0.002759
Epoch [30/50], Loss: 0.002728
Epoch [31/50], Loss: 0.002691
Epoch [32/50], Loss: 0.002649
Epoch

In [None]:
model.eval()

y_good_tensor = y_good_tensor.to(device)
y_bad_tensor = y_bad_tensor.to(device)

with torch.no_grad():
    reconstructed_good = model(y_good_tensor)
    reconstructed_bad = model(y_bad_tensor)

    test_loss = criterion(reconstructed_good, y_good_tensor)

loss_fn = nn.MSELoss(reduction='none')

error_good = loss_fn(reconstructed_good, y_good_tensor)
scores_good = torch.mean(error_good, dim=(1, 2, 3))

error_bad = loss_fn(reconstructed_bad, y_bad_tensor)
scores_bad = torch.mean(error_bad, dim=(1, 2, 3))

anomaly_scores_tensor = torch.cat([scores_good, scores_bad])
anomaly_scores_numpy = anomaly_scores_tensor.cpu().numpy()

auc = roc_auc_score(y_true_labels, anomaly_scores_numpy)

print("--- AutoEncoder Evaluation ---")
print(f"AUC Score: {auc:.4f}")
print(f"Testing Loss: {test_loss.item():.6f}")

--- AutoEncoder Evaluation ---
AUC Score: 0.4791
