In [52]:
from sklearn.svm import OneClassSVM
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
from PIL import Image
import os
import numpy as np
import torch
import pytorch_msssim as ssim
import matplotlib as olt
import torch.nn as nn

#### Data Path

In [19]:
dataset_path = './mvtec_anomaly_detection/'

In [None]:
# print('Training Set: \t\t' + ', '.join(train_set))
# print('Testing Folders: \t' + ', '.join(test_folders))
# print('Ground Truth Folders \t' + ', '.join(ground_truth_folders))

Training Set: 		029.png, 123.png, 160.png, 079.png, 201.png, 006.png, 024.png, 089.png, 145.png, 150.png, 082.png, 032.png, 092.png, 202.png, 100.png, 163.png, 133.png, 184.png, 049.png, 124.png, 192.png, 045.png, 067.png, 022.png, 131.png, 025.png, 137.png, 030.png, 084.png, 072.png, 060.png, 063.png, 129.png, 177.png, 154.png, 088.png, 208.png, 066.png, 078.png, 085.png, 037.png, 086.png, 018.png, 065.png, 004.png, 055.png, 193.png, 102.png, 039.png, 048.png, 068.png, 121.png, 105.png, 118.png, 168.png, 069.png, 023.png, 181.png, 196.png, 199.png, 179.png, 016.png, 050.png, 097.png, 057.png, 093.png, 080.png, 008.png, 206.png, 169.png, 187.png, 013.png, 153.png, 115.png, 130.png, 081.png, 110.png, 090.png, 094.png, 142.png, 178.png, 167.png, 026.png, 186.png, 104.png, 071.png, 198.png, 040.png, 188.png, 146.png, 197.png, 017.png, 091.png, 095.png, 135.png, 011.png, 112.png, 036.png, 173.png, 054.png, 073.png, 035.png, 033.png, 014.png, 182.png, 046.png, 166.png, 003.png, 051.png, 038

# Pre-Processing & Data augmentation

In [5]:
transform_pipeline = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
])

#### Read image and convert it to gray image

In [44]:
img_height, img_width, img_channels = 64, 64, 1
X, y_good, y_bad, ground_truth = [], [], [], []
all_items = os.listdir(dataset_path)
class_names = [item for item in all_items 
               if os.path.isdir(os.path.join(dataset_path, item))]

for class_name in class_names:
    train_path = dataset_path + class_name + '/train/good/'

    test_path = dataset_path + class_name + '/test/'

    ground_truth_path = dataset_path + class_name + '/ground_truth/'

    train_set = os.listdir(train_path)

    test_folders = os.listdir(test_path)

    ground_truth_folders = os.listdir(ground_truth_path)
    
    for img_name in train_set:
        img_path = train_path + img_name
        img = Image.open(img_path)
        img = img.resize((img_height, img_width))
        img_gray = img.convert('L')
        img_gray = transform_pipeline(img_gray)
        X.append(img_gray)

    for test_folder in test_folders:
        test_folder_path = test_path + test_folder

        for img_name in sorted(os.listdir(test_folder_path)):
            img_path = test_folder_path + '/' + img_name
            img = Image.open(img_path)
            img = img.resize((img_height, img_width))
            img_gray = img.convert('L')
            img_gray = transform_pipeline(img_gray)

            if test_folder == 'good':
                y_good.append(img_gray)
            else:
                y_bad.append(img_gray)
            
    for ground_truth_folder in ground_truth_folders:
        ground_truth_folder_path = ground_truth_path + ground_truth_folder

        for img_name in sorted(os.listdir(ground_truth_folder_path)):
            img_path = ground_truth_folder_path + '/' + img_name
            img = Image.open(img_path)
            img = img.resize((img_height, img_width))
            img_gray = img.convert('L')
            img_gray = transform_pipeline(img_gray)
            ground_truth.append(img_gray)

print("Training: \t\t", np.shape(X))
print("Good Testing: \t\t", np.shape(y_good))
print("Bad Testing: \t\t", np.shape(y_bad))
print("Ground Truth: \t\t", np.shape(ground_truth))

Training: 		 (3629, 1, 64, 64)
Good Testing: 		 (467, 1, 64, 64)
Bad Testing: 		 (1258, 1, 64, 64)
Ground Truth: 		 (1258, 1, 64, 64)


In [38]:
print(X[0])

tensor([[[0.0627, 0.0627, 0.0667,  ..., 0.0588, 0.0588, 0.0588],
         [0.0627, 0.0627, 0.0667,  ..., 0.0588, 0.0588, 0.0588],
         [0.0627, 0.0667, 0.0627,  ..., 0.0627, 0.0588, 0.0627],
         ...,
         [0.0588, 0.0588, 0.0588,  ..., 0.0627, 0.0627, 0.0627],
         [0.0627, 0.0588, 0.0588,  ..., 0.0667, 0.0627, 0.0588],
         [0.0627, 0.0588, 0.0627,  ..., 0.0627, 0.0627, 0.0627]]])


#### SSIM

In [18]:
def SSIMLoss(y_true, y_pred):
    return 1 - ssim(y_true, y_pred, data_range=1.0, size_average=True)

# SVM

In [39]:
X_tensor = torch.stack(X)
y_good_tensor = torch.stack(y_good)
y_bad_tensor = torch.stack(y_bad)

n_features = img_height * img_width
X_train_features = X_tensor.reshape(-1, n_features)
y_good_features = y_good_tensor.reshape(-1, n_features)
y_bad_features = y_bad_tensor.reshape(-1, n_features)

X_train_features = X_train_features.detach().cpu().numpy()
y_good_features = y_good_features.detach().cpu().numpy()
y_bad_features = y_bad_features.detach().cpu().numpy()

print("Training feature shape: \t", X_train_features.shape)
print("Good test feature shape: \t", y_good_features.shape)
print("Bad test feature shape: \t", y_bad_features.shape)

Training feature shape: 	 (3629, 1024)
Good test feature shape: 	 (467, 1024)
Bad test feature shape: 	 (1258, 1024)


In [40]:
print("Training One-Class SVM...")
svm = OneClassSVM(kernel='rbf', nu=0.1, gamma='auto')

svm.fit(X_train_features)
print("SVM training complete")

Training One-Class SVM...
SVM training complete


In [41]:
X_test_features = np.concatenate((y_good_features, y_bad_features), axis=0)

labels_good = np.zeros(y_good_features.shape[0])
labels_bad = np.ones(y_bad_features.shape[0])
y_true_labels = np.concatenate((labels_good, labels_bad), axis=0)

print("Total test features shape: \t", X_test_features.shape)
print("Total test labels shape: \t", y_true_labels.shape)

Total test features shape: 	 (1725, 1024)
Total test labels shape: 	 (1725,)


In [None]:
svm_predictions = svm.predict(X_test_features)

y_pred_labels = [0 if p == 1 else 1 for p in svm_predictions]

accuracy = accuracy_score(y_true_labels, y_pred_labels)
auc = roc_auc_score(y_true_labels, y_pred_labels)

print("--- Evaluation Results ---")
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"AUC Score: {auc:.4f}")

print("\nConfusion Matrix:")
print(confusion_matrix(y_true_labels, y_pred_labels))


--- Evaluation Results ---
Accuracy: 31.19%
AUC Score: 0.5067

Confusion Matrix:
[[ 435   32]
 [1155  103]]


In [43]:
import csv
with open('submit_SVM.csv', mode='w', newline='') as submit_file:
    csv_writer = csv.writer(submit_file)
    header = ['id', 'prediction']
    print(header)
    csv_writer.writerow(header)
    for i in range(svm_predictions.shape[0]):
        row = [str(i), 0 if svm_predictions[i] == -1 else 1]
        csv_writer.writerow(row)
        print(row)

['id', 'prediction']
['0', 1]
['1', 1]
['2', 1]
['3', 1]
['4', 1]
['5', 1]
['6', 1]
['7', 1]
['8', 1]
['9', 1]
['10', 0]
['11', 1]
['12', 1]
['13', 1]
['14', 1]
['15', 0]
['16', 0]
['17', 0]
['18', 0]
['19', 0]
['20', 0]
['21', 1]
['22', 0]
['23', 0]
['24', 0]
['25', 0]
['26', 0]
['27', 0]
['28', 0]
['29', 0]
['30', 0]
['31', 0]
['32', 0]
['33', 0]
['34', 0]
['35', 1]
['36', 0]
['37', 1]
['38', 1]
['39', 1]
['40', 1]
['41', 1]
['42', 1]
['43', 1]
['44', 1]
['45', 1]
['46', 1]
['47', 1]
['48', 1]
['49', 1]
['50', 1]
['51', 1]
['52', 1]
['53', 1]
['54', 1]
['55', 1]
['56', 1]
['57', 1]
['58', 1]
['59', 1]
['60', 1]
['61', 1]
['62', 1]
['63', 1]
['64', 1]
['65', 1]
['66', 1]
['67', 1]
['68', 1]
['69', 1]
['70', 1]
['71', 1]
['72', 1]
['73', 1]
['74', 1]
['75', 1]
['76', 1]
['77', 1]
['78', 1]
['79', 1]
['80', 1]
['81', 1]
['82', 1]
['83', 1]
['84', 1]
['85', 1]
['86', 1]
['87', 1]
['88', 1]
['89', 1]
['90', 1]
['91', 1]
['92', 1]
['93', 1]
['94', 1]
['95', 1]
['96', 1]
['97', 1]
['98', 1]

# KNN

In [46]:
k = 5

print("Fitting KNN model...")
knn = NearestNeighbors(n_neighbors=k, n_jobs=-1)
knn.fit(X_train_features)
print(X_train_features)
print("Model fitting complete")

Fitting KNN model...
[[0.0627451  0.0627451  0.06666667 ... 0.0627451  0.0627451  0.0627451 ]
 [0.05490196 0.05490196 0.05490196 ... 0.05490196 0.05490196 0.05490196]
 [0.0627451  0.0627451  0.0627451  ... 0.06666667 0.06666667 0.0627451 ]
 ...
 [0.10196079 0.09803922 0.09803922 ... 0.09803922 0.09411765 0.09803922]
 [0.09019608 0.09019608 0.09019608 ... 0.09019608 0.09411765 0.09411765]
 [0.09411765 0.09411765 0.09411765 ... 0.09411765 0.09411765 0.09019608]]
Model fitting complete


In [47]:
distances, indices = knn.kneighbors(X_test_features)

anomaly_scores = np.mean(distances, axis=1)

print(f"Calculated {len(anomaly_scores)} anomaly scores")

Calculated 1725 anomaly scores


In [50]:
train_distances, _ = knn.kneighbors(X_train_features)
train_anomaly_scores = np.mean(train_distances, axis=1)

threshold = np.percentile(train_anomaly_scores, 95)
print(f"Anomaly Threshold set to: {threshold:.4f}")

y_pred_labels = (anomaly_scores > threshold).astype(int)

accuracy = accuracy_score(y_true_labels, y_pred_labels)
auc = roc_auc_score(y_true_labels, anomaly_scores)
print("\n--- Evaluation result ---")
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"AUC Score: {auc:.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_true_labels, y_pred_labels))

Anomaly Threshold set to: 2.7589

--- Evaluation result ---
Accuracy: 35.30%
AUC Score: 0.4745

Confusion Matrix:
[[ 398   69]
 [1047  211]]


# AutoEncoder

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1), # [N, 1, 64, 64] -> [N, 16, 32, 32]
            nn.ReLU(True),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), # [N, 16, 32, 32] -> [N, 32, 16, 16]
            nn.ReLU(True),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), # [N, 32, 16, 16] -> [N, 64, 8, 8]
            nn.ReLU(True),
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1), # [N, 64, 8, 8] -> [N, 32, 16, 16]
            nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1), # [N, 32, 16, 16] -> [N, 16, 32, 32]
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1), #[N, 16, 32, 32] -> #[N, 1, 64, 64]
            nn.Sigmoid(),
        )

    def formard(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
num_epochs = 50
batch_size = 64
learning_rate = 1e-3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device {device}")

train_dataset = TensorDataset(X_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

model = AutoEncoder().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

print("--- Starting Training ---")