In [1]:
import torch
from torch import nn 
from torch import optim
from river import compose, metrics, preprocessing, stream, anomaly
from OnlineTorch.anomaly import TorchAE, SklearnAnomalyDetector
from tqdm import tqdm
import river  
import torchvision

from sklearn.metrics import roc_auc_score
from sklearn.linear_model import SGDOneClassSVM
from util import build_anomaly_dataset, Tensor2Dict

In [2]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [3]:
def train_test_incremental(model, data, update_interv=100):
    scores = []
    truths = []
    iterator = tqdm(data, unit='samples')
    iterator.set_description('Learning from stream')
    loss_sum = 0
    idx = 0
    for x, y in iterator:
        model = model.learn_one(x)
        score = model.score_one(x)
        scores.append(score)
        if isinstance(y, torch.Tensor):
            y = y.item()
        truths.append(y)
        loss_sum += score
        idx += 1
        if idx == update_interv:
            iterator.set_postfix({f'loss_{update_interv}': loss_sum/update_interv})
            loss_sum = 0
            idx = 0
    return roc_auc_score(truths, scores)

In [11]:
def build_cae(n_features=1):
    model = nn.Sequential(
        nn.Conv2d(in_channels=n_features, out_channels=32,
                  kernel_size=3, stride=2),
        nn.SELU(),
        nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, stride=2),
        nn.SELU(),
        nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, stride=3),
        nn.SELU(),
        nn.ConvTranspose2d(in_channels=8, out_channels=16,
                           kernel_size=3, stride=3),
        nn.SELU(),
        nn.ConvTranspose2d(in_channels=16, out_channels=32,
                           kernel_size=3, stride=2),
        nn.SELU(),
        nn.ConvTranspose2d(in_channels=32, out_channels=n_features,
                           kernel_size=4, stride=2),
    )

    return model

loss = nn.L1Loss
optimizer = optim.AdamW
model = TorchAE(build_fn=build_cae, loss_fn=loss, device=device,
                optimizer_fn=optimizer, learning_rate=0.01, seed=42)

model2 = Tensor2Dict() | anomaly.HalfSpaceTrees(seed=20)
model3 = SklearnAnomalyDetector(SGDOneClassSVM)


In [5]:
mnist = torchvision.datasets.MNIST('./data/', download=True)
mnist_x, mnist_y = mnist.train_data.unsqueeze(1) / 255., mnist.targets
mnist = build_anomaly_dataset(mnist_x, mnist_y)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [12]:
train_test_incremental(model=model2, data=mnist)

Learning from stream: 100%|██████████| 9631/9631 [00:30<00:00, 313.77samples/s, loss_100=0.557]


0.8960713049498096

In [7]:
def build_cae_cifar(n_features=3):
    model = nn.Sequential(
        nn.Conv2d(in_channels=n_features, out_channels=64,
                  kernel_size=3, stride=2),
        nn.SELU(),
        nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2),
        nn.SELU(),
        nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2),
        nn.SELU(),
        nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2),
        nn.SELU(),
        nn.ConvTranspose2d(in_channels=256, out_channels=128,
                           kernel_size=3, stride=2),
        nn.SELU(),
        nn.ConvTranspose2d(in_channels=128, out_channels=128,
                           kernel_size=3, stride=2),
        nn.SELU(),
        nn.ConvTranspose2d(in_channels=128, out_channels=64,
                           kernel_size=3, stride=2),
        nn.SELU(),
        nn.ConvTranspose2d(in_channels=64, out_channels=n_features,
                           kernel_size=3, stride=2),
    )

    return model

In [8]:
def build_ae(n_features, latent_dim=1):
    model = nn.Sequential(
        nn.Dropout(),
        nn.Linear(n_features, 20), 
        nn.LeakyReLU(),
        nn.Linear(20, latent_dim),
        nn.LeakyReLU(),
        nn.Linear(latent_dim, 20),
        nn.LeakyReLU(), 
        nn.Linear(20, n_features)
    )
    return model

loss =  nn.L1Loss
optimizer = optim.AdamW
model = compose.Pipeline(
    preprocessing.MinMaxScaler(),
    TorchAE(build_fn=build_ae, loss_fn=loss, optimizer_fn=optimizer, learning_rate=0.01, seed=42)
)

model2 = compose.Pipeline(
    preprocessing.MinMaxScaler(),
    anomaly.HalfSpaceTrees(seed=20)
)

phishing = stream.shuffle(river.datasets.CreditCard().take(8000), 1000, seed=20)
train_test_incremental(model, phishing)

Learning from stream: : 8000samples [00:07, 1000.88samples/s, loss_100=0.048]


0.9816225705329154