In [None]:
import numpy as np
import os
import torch
import sys

from matplotlib import pyplot as plt
from torch import nn
from torchvision.transforms import Normalize,ToTensor, Compose

project_dir = os.path.join(os.getcwd(),'..')
if project_dir not in sys.path:
    sys.path.append(project_dir)

from dataset import AnomalyMNIST

In [None]:
seed = 42
transform = Compose([ToTensor(), Normalize((0.5,), (0.5,))])

dataset = AnomalyMNIST('data/', download=True, transform=transform, n_normal_samples=2000, known_anomalies=0.1, pollution=0.0, seed=seed)
print(dataset)

dataset.montage(5, 5, seed)
plt.show()

In [None]:
from experiments.utils.Supervised import SupervisedModel, train

In [None]:
model = SupervisedModel((28,28), [1, 32, 48], [1024, 256, 32, 2])
model = train(model, dataset, 128, 50, weighted_sampler=True)

In [None]:
model = model.train()

In [None]:
# TODO: Include the test set configuration in the AnoamlyMNIST class
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import Subset, DataLoader

test_dataset_full = MNIST('data/', train = False, download = True, transform = transform)
normal_idx = torch.where((test_dataset_full.targets == 1))[0]
anomaly_idx = torch.where((test_dataset_full.targets == 7))[0]
idx = torch.cat([normal_idx[:1024], anomaly_idx[:1024]]) # 512 samples!

test_dataset_full.targets = torch.ones_like(test_dataset_full.targets) * -1
test_dataset_full.targets[normal_idx] = 0
test_dataset_full.targets[anomaly_idx] = 1

x_test_set = Subset(test_dataset_full, idx)

In [None]:
x_test, y_test = zip(*x_test_set)
x_test = torch.stack(x_test)
y_test = torch.tensor(y_test)

y_score = model.score_samples(x_test)
# y_score = model(x_test).detach().numpy()[:,1]
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test, y_score[:])


In [None]:
from sklearn.metrics import roc_curve, auc, roc_auc_score
fpr, tpr, _ = roc_curve(y_test, y_score[:])
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

In [None]:
_y_score = torch.sigmoid(y_score.squeeze())

In [None]:
from matplotlib import pyplot as plt
plt.hist(_y_score[y_test==0], bins=10, alpha=0.5, label='Normal')
plt.hist(_y_score[y_test==1], bins=10, alpha=0.5, label='Anomaly')
plt.legend()
plt.show()

In [None]:
# Classification report
model.train()
from sklearn.metrics import classification_report
# y_pred = model(x_test).detach().argmax(dim=1)
y_pred = np.where(torch.sigmoid(model(x_test).detach()) > 0.5, 1, 0)
print(classification_report(y_test, y_pred, zero_division=1))

# Confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

In [None]:
## Extract a 2 from the test dataset
number = 9
test2_dataset = MNIST('data/', train = False, download = True, transform=transform)
test2_idx = torch.where((test2_dataset.targets == number))[0]
test2_dataset = Subset(test2_dataset, test2_idx)

X, y = zip(*test2_dataset)
X = torch.stack(X)
y = torch.tensor(y).flatten()
# y_pred = model(X).detach().argmax(dim=1)
y_pred = np.where(torch.sigmoid(model(X).detach()) > 0.5, 1, 0)
y_score = model(X).detach()

from matplotlib import pyplot as plt
plt.imshow(X[0,0])


In [None]:
plt.subplot(1,2,1)
plt.hist(y_pred)
plt.subplot(1,2,2)
plt.hist(y_score)
plt.show()

In [None]:
_y = np.ones_like(y)
np.bincount(y_pred == _y)

# MedMNIST

In [None]:
import medmnist, torch
from medmnist import INFO, Evaluator
from medmnist.dataset import PneumoniaMNIST
import torch.utils.data as data

import os, sys
project_dir = os.path.join(os.getcwd(),'..')
if project_dir not in sys.path:
    sys.path.append(project_dir)


print(f"MedMNIST v{medmnist.__version__} @ {medmnist.HOMEPAGE}")

In [None]:
from dataset import AnomalyPneumoniaMNIST
from matplotlib import pyplot as plt
from torchvision import transforms

data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5]),
])

# Load the dataset
seed = 42
train_dataset = AnomalyPneumoniaMNIST('data/', download=True, transform=data_transform, n_normal_samples=-1, known_anomalies=0.1, pollution=0, seed=seed)
print(train_dataset)

train_dataset.montage(5, 5, seed)
plt.show()

In [None]:
BATCH_SIZE = 128
test_dataset = PneumoniaMNIST(split='test', transform=data_transform, download=True, root='data/')

train_loader = data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=4*BATCH_SIZE, shuffle=False)

In [None]:
model = SupervisedModel((28,28), [1, 32, 48], [1024, 256, 32, 2])
model = train(model, train_dataset, 128, 100, weighted_sampler=False)

In [None]:
x_test, y_test = zip(*test_dataset)
x_test = torch.stack(x_test)
y_test = torch.tensor(y_test).flatten()

# y_score = model(x_test).detach().numpy().mean(axis=1)
y_score = model(x_test).detach().numpy()
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test, y_score[:])

In [None]:
y_score.shape
y_test.shape

In [None]:
from sklearn.metrics import roc_curve, auc, roc_auc_score
fpr, tpr, _ = roc_curve(y_test, y_score[:])
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

In [None]:
from matplotlib import pyplot as plt
plt.hist(y_score[y_test==0], bins=15, alpha=0.5, label='Normal')
plt.hist(y_score[y_test==1], bins=15, alpha=0.5, label='Anomaly')
plt.legend()
plt.show()

In [None]:
import numpy as np
from scipy import stats

# Tus datos
data1 = y_score[y_test==0]
data2 = y_score[y_test==1]

# Realizar la prueba t
t_stat, p_value = stats.ttest_ind(data1, data2)

print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")


In [None]:
y_hat = torch.sigmoid(model(x_test))
y_hat = np.where(y_hat > 0.5, 1, 0)
from sklearn.metrics import classification_report
print(classification_report(y_test, y_hat, target_names=('Normal', 'Anomaly')))

from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_hat)

In [None]:
np.bincount(y_test), np.bincount(y_hat)