<a href="https://colab.research.google.com/github/Murad-pitafi/Computer-Vision/blob/main/Anomaly_Detection_Using_Autoencoders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split


In [2]:
class SyntheticAnomalyDataset(Dataset):
    def __init__(self, n_samples=1000, anomaly_ratio=0.1):
        self.normal_data = np.random.normal(0, 1, (int(n_samples * (1 - anomaly_ratio)), 20))
        self.anomalous_data = np.random.uniform(-10, 10, (int(n_samples * anomaly_ratio), 20))
        self.data = np.vstack((self.normal_data, self.anomalous_data))
        self.labels = np.hstack((np.zeros(len(self.normal_data)), np.ones(len(self.anomalous_data))))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32), self.labels[idx]

dataset = SyntheticAnomalyDataset()
train_data, test_data = train_test_split(dataset, test_size=0.3, random_state=42)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)


In [3]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim=20, latent_dim=8):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 16),
            nn.ReLU(),
            nn.Linear(16, input_dim)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


In [4]:
autoencoder = Autoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)


In [5]:
epochs = 20
for epoch in range(epochs):
    autoencoder.train()
    train_loss = 0
    for data, labels in train_loader:
        if labels.sum() == 0:  # Only train on normal samples
            optimizer.zero_grad()
            recon = autoencoder(data)
            loss = criterion(recon, data)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {train_loss/len(train_loader):.4f}')


Epoch 1, Loss: 0.0000
Epoch 2, Loss: 0.0417
Epoch 3, Loss: 0.0494
Epoch 4, Loss: 0.0955
Epoch 5, Loss: 0.0491
Epoch 6, Loss: 0.0000
Epoch 7, Loss: 0.0452
Epoch 8, Loss: 0.0423
Epoch 9, Loss: 0.0427
Epoch 10, Loss: 0.0000
Epoch 11, Loss: 0.0438
Epoch 12, Loss: 0.0861
Epoch 13, Loss: 0.0839
Epoch 14, Loss: 0.0000
Epoch 15, Loss: 0.0437
Epoch 16, Loss: 0.0458
Epoch 17, Loss: 0.0452
Epoch 18, Loss: 0.0441
Epoch 19, Loss: 0.0000
Epoch 20, Loss: 0.0870


In [6]:
autoencoder.eval()
reconstruction_errors = []
true_labels = []

with torch.no_grad():
    for data, labels in test_loader:
        recon = autoencoder(data)
        loss = ((recon - data) ** 2).mean(dim=1)
        reconstruction_errors.extend(loss.numpy())
        true_labels.extend(labels.numpy())


In [7]:
threshold = np.percentile(reconstruction_errors, 95)  # 95th percentile as threshold
predicted_labels = [1 if error > threshold else 0 for error in reconstruction_errors]


In [8]:
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')


Precision: 1.0000
Recall: 0.4286
F1-score: 0.6000
