In [None]:
import torch 
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

import numpy as np
import sklearn

from torch import nn
from torch import optim

from urllib.request import urlopen
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import normalize
from sklearn.svm import OneClassSVM
from sklearn import metrics
from sklearn.metrics import roc_auc_score, roc_curve, auc

from tqdm.autonotebook import tqdm
from itertools import chain

from autoencoder import Encoder, Decoder
from helper import K9_dataloader, K9_stratified_class_sample, K9_OCSVM

## Obtain denoising representations

In [None]:
def get_denoising_reps(train_loader, test_loader, dataset):
    enc_dim    = 64
    latent_dim = 75
    epochs     = 10

    if dataset == 'fMNIST':
        image_dim = 784
    else:
        image_dim  = 3072

    enc = Encoder(image_dim, enc_dim, latent_dim)
    dec = Decoder(latent_dim, enc_dim, image_dim)
    optimizer = optim.Adam(chain(enc.parameters(), dec.parameters()), lr=1e-3)

    print('Training denoising autoencoder\n------------------------------')
    for epoch in range(epochs):
        losses = []
        trainloader = tqdm(train_loader)

        for i, data in enumerate(trainloader, 0):
            inputs, _ = data

            noisy_inputs = inputs
            for idx, input in enumerate(noisy_inputs):
                noisy_inputs[idx][torch.randint(input.shape[0],(int(input.shape[0] * 25 / 100),))] = 0

            optimizer.zero_grad()

            z = enc(noisy_inputs)
            noisy_z = z
            for idx, z_rep in enumerate(noisy_z):
                noisy_z[idx][torch.randint(z_rep.shape[0],(int(z_rep.shape[0] * 25 / 100),))] = 1

            outputs = dec(noisy_z)

            loss = F.binary_cross_entropy(outputs, inputs, reduction='sum') / inputs.shape[0]
            loss.backward()
            optimizer.step()

            # keep track of the loss and update the stats
            losses.append(loss.item())
            trainloader.set_postfix(loss=np.mean(losses), epoch=epoch)

    train_set_X, train_set_y = [], []
    test_set_X , test_set_y  = [], []

    for input, target in (train_loader):
        for x, y in zip(input, target):
            train_set_X.append(enc(x).detach())
            train_set_y.append(y)

    for input, target in (test_loader):
        for x, y in zip(input, target):
            test_set_X.append(enc(x).detach())
            test_set_y.append(y)

    return train_set_X, train_set_y, test_set_X, test_set_y

## Fit OC-SVM

### Minimal example

In [None]:
# Download dataset
train_loader, test_loader = K9_dataloader(dataset='CIFAR10', batch_size=256)

# Obtain representations
tr_X, tr_y, ts_X, ts_y = get_denoising_reps(train_loader, test_loader, 'CIFAR10')

# Prepare data to fit OC-SVM
X_train, y_train = K9_stratified_class_sample(torch.stack(tr_X), np.array(tr_y))
X_test, y_test   = torch.stack(ts_X),  np.array(ts_y)

# Fit OC-SVM
oc_svm = K9_OCSVM(X_train, y_train, X_test, y_test, kernel_type='rbf')