# FOR TRAIN SPLIT OF ONLY 2K

## Imports

In [None]:
import tensorflow_datasets as tfds
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

## Data perparation

In [None]:
# Loading
(train_data, test_data), ds_info = tfds.load(
    'mnist_corrupted/identity',
    split=['train[:2000]', 'test[:2000]'],
    as_supervised=True,
    with_info=True
)


def convert_to_numpy(dataset):
    return np.array([img.numpy() for img, _ in dataset])

train_images = convert_to_numpy(train_data)
test_images = convert_to_numpy(test_data)


train_images = torch.Tensor(train_images / 255.0).unsqueeze(1).squeeze(-1)
test_images = torch.Tensor(test_images / 255.0).unsqueeze(1).squeeze(-1)

# Creating DataLoader
batch_size = 128
train_dataset = TensorDataset(train_images)
test_dataset = TensorDataset(test_images)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Training set size: {len(train_images)}")
print(f"Test set size (identity): {len(test_images)}")

## Model definition

In [None]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.network = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 4, 4, 1, 0, bias=True),
            nn.BatchNorm2d(ngf * 4),
            nn.ELU(),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 3, 2, 1, bias=True),
            nn.BatchNorm2d(ngf * 2),
            nn.ELU(),
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=True),
            nn.BatchNorm2d(ngf),
            nn.ELU(),
            nn.ConvTranspose2d(ngf, 1, 4, 2, 1, bias=True),
            nn.Tanh()
        )

    def forward(self, x):
        return self.network(x)

# Initialization
nz = 100
ngf = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Generator().to(device)

loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Training

In [None]:
# Training loop
num_epochs = 30
training_losses = []

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    for batch_images, in train_loader:
        batch_images = batch_images.to(device)
        current_batch_size = batch_images.size(0)

        optimizer.zero_grad()

        # Gradient origin optimization
        z = torch.zeros(current_batch_size, nz, 1, 1, device=device, requires_grad=True)
        g = model(z)
        L_inner = ((g - batch_images) ** 2).sum(1).mean()
        grad = torch.autograd.grad(L_inner, [z], create_graph=True, retain_graph=True)[0]
        z = -grad

        # Optimizing data fitting loss
        g = model(z)
        L_outer = ((g - batch_images) ** 2).sum(1).mean()
        L_outer.backward()
        optimizer.step()

        epoch_loss += L_outer.item() * current_batch_size

    average_epoch_loss = epoch_loss / len(train_loader.dataset)
    training_losses.append(average_epoch_loss)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_epoch_loss:.4f}')

## Error Calculations

In [None]:
# Computation of reconstruction errors
def reconstruction_loss(model, loader):
    model.eval()
    error_list = []
    with torch.no_grad():
        for batch_images, in loader:
            batch_images = batch_images.to(device)
            current_batch_size = batch_images.size(0)
            z = torch.zeros(current_batch_size, nz, 1, 1, device=device)
            generated_images = model(z)
            loss = torch.mean((generated_images - batch_images) ** 2, dim=[1, 2, 3])
            error_list.extend(loss.cpu().numpy())
    return np.array(error_list)

training_reconstruction_errors = reconstruction_loss(model, train_loader)

# Calculation of dynamic threshold
def dynamic_threshold(errors):
    return np.mean(errors) + 2 * np.std(errors)

# Setting the anomaly detection threshold
threshold = dynamic_threshold(training_reconstruction_errors)
print(f"Initial anomaly detection threshold: {threshold}")

## Evaluation and Plotting

In [None]:
# Evaluation
corruption_types = [
    'identity', 'shot_noise', 'impulse_noise', 'glass_blur', 'motion_blur',
    'shear', 'scale', 'rotate', 'brightness', 'translate', 'stripe',
    'fog', 'spatter', 'dotted_line', 'zigzag'
]

all_predictions = []
all_ground_truths = []
all_reconstruction_errors = []

total_test_images = 0

for corruption in corruption_types:
    print(f"Evaluating corruption type: {corruption}")
    ds_test, ds_info = tfds.load(
        f'mnist_corrupted/{corruption}',
        split='test[:2000]',
        as_supervised=True,
        with_info=True
    )

    test_images = convert_to_numpy(ds_test)
    test_images = torch.Tensor(test_images / 255.0).unsqueeze(1).squeeze(-1)
    test_loader = DataLoader(TensorDataset(test_images), batch_size=batch_size, shuffle=False)

    reconstruction_errors = reconstruction_loss(model, test_loader)
    all_reconstruction_errors.extend(reconstruction_errors)

    ground_truth = np.zeros_like(reconstruction_errors) if corruption == 'identity' else np.ones_like(reconstruction_errors)

    predictions = reconstruction_errors > threshold
    all_predictions.extend(predictions)
    all_ground_truths.extend(ground_truth)

    print(f"Number of images in the {corruption} test set: {len(test_images)}")
    total_test_images += len(test_images)

print(f"Total number of test images across all corruption types: {total_test_images}")

# Confusion matrix
all_predictions = np.array(all_predictions)
all_ground_truths = np.array(all_ground_truths)
conf_matrix = confusion_matrix(all_ground_truths, all_predictions)

# Plotting of reconstruction errors
plt.figure(figsize=(12, 6))
plt.title('Reconstruction Error Distribution')
sns.histplot(all_reconstruction_errors, bins=100, kde=True, color='blue')
plt.axvline(threshold, color='red', linestyle='--', label='Threshold')
plt.xlabel('Reconstruction Error')
plt.ylabel('Frequency')
plt.legend()
plt.show()

# Original vs. reconstructed images
for images, in train_loader:
    images = images.to(device)
    z = torch.zeros(images.size(0), nz, 1, 1, device=device)
    reconstructed_images = model(z)
    break

plt.figure(figsize=(12, 6))
plt.suptitle('Original vs Reconstructed Images', fontsize=16)

for i in range(6):
    plt.subplot(2, 6, i + 1)
    plt.imshow(images[i].cpu().detach().squeeze(), cmap='gray')
    plt.title('Original')
    plt.axis('off')

for i in range(6):
    plt.subplot(2, 6, i + 7)
    plt.imshow(reconstructed_images[i].cpu().detach().squeeze(), cmap='gray')
    plt.title('Reconstructed')
    plt.axis('off')

plt.show()

# Training loss
plt.figure(figsize=(12, 6))
plt.title('Training Loss Over Epochs')
plt.plot(range(1, num_epochs + 1), training_losses, marker='o', linestyle='-', color='blue')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid()
plt.show()

confusion_disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix)
confusion_disp.plot(cmap='Blues')
plt.title('Confusion Matrix for Anomalies')
plt.show()

# FOR 60K TRAIN SPLIT

## Data perparation

In [None]:
# Loading
(train_data, test_data), ds_info = tfds.load(
    'mnist_corrupted/identity',
    split=['train', 'test'],
    as_supervised=True,
    with_info=True
)


def convert_to_numpy(dataset):
    return np.array([img.numpy() for img, _ in dataset])

train_images = convert_to_numpy(train_data)
test_images = convert_to_numpy(test_data)


train_images = torch.Tensor(train_images / 255.0).unsqueeze(1).squeeze(-1)
test_images = torch.Tensor(test_images / 255.0).unsqueeze(1).squeeze(-1)

# Creating DataLoader
batch_size = 128
train_dataset = TensorDataset(train_images)
test_dataset = TensorDataset(test_images)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Training set size: {len(train_images)}")
print(f"Test set size (identity): {len(test_images)}")

## Model definition

In [2]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.network = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 4, 4, 1, 0, bias=True),
            nn.BatchNorm2d(ngf * 4),
            nn.ELU(),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 3, 2, 1, bias=True),
            nn.BatchNorm2d(ngf * 2),
            nn.ELU(),
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=True),
            nn.BatchNorm2d(ngf),
            nn.ELU(),
            nn.ConvTranspose2d(ngf, 1, 4, 2, 1, bias=True),
            nn.Tanh()
        )

    def forward(self, x):
        return self.network(x)

# Initialization
nz = 100
ngf = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Generator().to(device)

loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Training

In [None]:
# Training loop
num_epochs = 30
training_losses = []

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    for batch_images, in train_loader:
        batch_images = batch_images.to(device)
        current_batch_size = batch_images.size(0)

        optimizer.zero_grad()

        # Gradient origin optimization
        z = torch.zeros(current_batch_size, nz, 1, 1, device=device, requires_grad=True)
        g = model(z)
        L_inner = ((g - batch_images) ** 2).sum(1).mean()
        grad = torch.autograd.grad(L_inner, [z], create_graph=True, retain_graph=True)[0]
        z = -grad

        # Optimizing data fitting loss
        g = model(z)
        L_outer = ((g - batch_images) ** 2).sum(1).mean()
        L_outer.backward()
        optimizer.step()

        epoch_loss += L_outer.item() * current_batch_size

    average_epoch_loss = epoch_loss / len(train_loader.dataset)
    training_losses.append(average_epoch_loss)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_epoch_loss:.4f}')

## Error Calculations

In [None]:
# Computation of reconstruction errors
def reconstruction_loss(model, loader):
    model.eval()
    error_list = []
    with torch.no_grad():
        for batch_images, in loader:
            batch_images = batch_images.to(device)
            current_batch_size = batch_images.size(0)
            z = torch.zeros(current_batch_size, nz, 1, 1, device=device)
            generated_images = model(z)
            loss = torch.mean((generated_images - batch_images) ** 2, dim=[1, 2, 3])
            error_list.extend(loss.cpu().numpy())
    return np.array(error_list)

training_reconstruction_errors = reconstruction_loss(model, train_loader)

# Calculation of dynamic threshold
def dynamic_threshold(errors):
    return np.mean(errors) + 2 * np.std(errors)

# Setting the anomaly detection threshold
threshold = dynamic_threshold(training_reconstruction_errors)
print(f"Initial anomaly detection threshold: {threshold}")

## Evaluation and Plotting

In [None]:
# Evaluation
corruption_types = [
    'identity', 'shot_noise', 'impulse_noise', 'glass_blur', 'motion_blur',
    'shear', 'scale', 'rotate', 'brightness', 'translate', 'stripe',
    'fog', 'spatter', 'dotted_line', 'zigzag'
]

all_predictions = []
all_ground_truths = []
all_reconstruction_errors = []

total_test_images = 0

for corruption in corruption_types:
    print(f"Evaluating corruption type: {corruption}")
    ds_test, ds_info = tfds.load(
        f'mnist_corrupted/{corruption}',
        split='test[:2000]',
        as_supervised=True,
        with_info=True
    )

    test_images = convert_to_numpy(ds_test)
    test_images = torch.Tensor(test_images / 255.0).unsqueeze(1).squeeze(-1)
    test_loader = DataLoader(TensorDataset(test_images), batch_size=batch_size, shuffle=False)

    reconstruction_errors = reconstruction_loss(model, test_loader)
    all_reconstruction_errors.extend(reconstruction_errors)

    ground_truth = np.zeros_like(reconstruction_errors) if corruption == 'identity' else np.ones_like(reconstruction_errors)

    predictions = reconstruction_errors > threshold
    all_predictions.extend(predictions)
    all_ground_truths.extend(ground_truth)

    print(f"Number of images in the {corruption} test set: {len(test_images)}")
    total_test_images += len(test_images)

print(f"Total number of test images across all corruption types: {total_test_images}")

# Confusion matrix
all_predictions = np.array(all_predictions)
all_ground_truths = np.array(all_ground_truths)
conf_matrix = confusion_matrix(all_ground_truths, all_predictions)

# Plotting of reconstruction errors
plt.figure(figsize=(12, 6))
plt.title('Reconstruction Error Distribution')
sns.histplot(all_reconstruction_errors, bins=100, kde=True, color='blue')
plt.axvline(threshold, color='red', linestyle='--', label='Threshold')
plt.xlabel('Reconstruction Error')
plt.ylabel('Frequency')
plt.legend()
plt.show()

# Original vs. reconstructed images
for images, in train_loader:
    images = images.to(device)
    z = torch.zeros(images.size(0), nz, 1, 1, device=device)
    reconstructed_images = model(z)
    break

plt.figure(figsize=(12, 6))
plt.suptitle('Original vs Reconstructed Images', fontsize=16)

for i in range(6):
    plt.subplot(2, 6, i + 1)
    plt.imshow(images[i].cpu().detach().squeeze(), cmap='gray')
    plt.title('Original')
    plt.axis('off')

for i in range(6):
    plt.subplot(2, 6, i + 7)
    plt.imshow(reconstructed_images[i].cpu().detach().squeeze(), cmap='gray')
    plt.title('Reconstructed')
    plt.axis('off')

plt.show()

# Training loss
plt.figure(figsize=(12, 6))
plt.title('Training Loss Over Epochs')
plt.plot(range(1, num_epochs + 1), training_losses, marker='o', linestyle='-', color='blue')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid()
plt.show()

confusion_disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix)
confusion_disp.plot(cmap='Blues')
plt.title('Confusion Matrix for Anomalies')
plt.show()