In [18]:
import torch
from torch import nn 
from torch import optim
from river import compose, metrics, preprocessing, stream, anomaly, linear_model, datasets, compose
from river import feature_extraction as fx
from IncrementalTorch.anomaly.anomaly import TorchAE, SklearnAnomalyDetector
from tqdm import tqdm
import river  
import torchvision
from pprint import pprint

from OnlineTorch.classifier import PyTorch2RiverClassifier
from torch import nn, optim

from sklearn.metrics import roc_auc_score
from sklearn.linear_model import SGDOneClassSVM

N_SAMPLES = 1_000
SEED = 42
track_name = "RBF"
#LOSS = nn.BCELoss
LOSS = nn.L1Loss
OPTIMIZER = optim.AdamW
BATCH_SIZE=1
LEARNING_RATE=1e-3
METRIC = river.metrics.ROCAUC()



## Dataset

In [7]:
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), 1000, seed=20)

# Autoencoder
## Undercomplete Autoencoder
### Softmax

In [8]:
def undercomplete_ae_sm(n_features, latent_dim=1):
    net = nn.Sequential(
        nn.Dropout(),
        nn.Linear(n_features, 20), 
        nn.LeakyReLU(),
        nn.Linear(20, latent_dim),
        nn.LeakyReLU(),
        nn.Linear(latent_dim, 20),
        nn.LeakyReLU(), 
        nn.Linear(20, n_features),
        nn.Linear(n_features,1),
        nn.Softmax()
    )
    return net

## Undercomplete Autoencoder standard

In [9]:
def undercomplete_ae(n_features, latent_dim=1):
    net = nn.Sequential(
        nn.Dropout(),
        nn.Linear(n_features, 20), 
        nn.LeakyReLU(),
        nn.Linear(20, latent_dim),
        nn.LeakyReLU(),
        nn.Linear(latent_dim, 20),
        nn.LeakyReLU(), 
        nn.Linear(20, n_features),
    )
    return net

In [10]:
model1 = compose.Pipeline(
    preprocessing.StandardScaler(),
    PyTorch2RiverClassifier(
                build_fn = undercomplete_ae_sm,
                loss_fn = LOSS,
                optimizer_fn = OPTIMIZER,
                #batch_size=BATCH_SIZE,
                learning_rate=LEARNING_RATE,
                seed=SEED
    )
)


In [11]:
model2 = compose.Pipeline(
    preprocessing.MinMaxScaler(),
    TorchAE(
                build_fn = undercomplete_ae,
                loss_fn = LOSS,
                optimizer_fn = OPTIMIZER,
                #batch_size=BATCH_SIZE,
                learning_rate=LEARNING_RATE,
                seed=SEED
    )
)

In [12]:
#supervised learning approach with Softmax function --> Proba gets predicted, ROC way worse
metric = river.metrics.ROCAUC()
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=42)
for x, y in data_stream:
    y_pred = model1.predict_proba_one(x) #ruft learn_unsupervised auf, müssen wir learn_one dann überhaupt auch aufrufen?
    model1.learn_one(x, y) #model1.learn_one ist supervised Ansatz, wir wollen Unsupervised
    metric.update(y, y_pred)
metric

  input = module(input)


ROCAUC: 0.496552

In [13]:
torch.__version__

'1.9.1+cpu'

In [14]:
metric= river.metrics.ROCAUC()
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=42)
for x, y in data_stream:
    model2.learn_one(x)
    y_pred= model2.score_one(x)
    metric.update(y,y_pred)
    #model2.learn_one(x,y_pred)
    #model2.learn_one(x,learn_unsupervised=True)
    #y_pred = model1.predict_proba_one(x) #ruft learn_unsupervised auf, müssen wir learn_one dann überhaupt auch aufrufen?
    #model2.learn_one(x, y)
    #METRIC.update(y, y_pred)
metric

ROCAUC: -0.

## Sparse Autoencoder

In [15]:
river.__version__

'0.9.0'

# Baselines
## OneClassSVM

In [33]:
model4 = compose.Pipeline(
    preprocessing.StandardScaler(),
    #fx.RBFSampler(),    
    anomaly.QuantileThresholder(
        anomaly.OneClassSVM(),
        q=0.97 #q Anpassung viele Auswirkungen
    )
)

In [34]:
metric= river.metrics.ROCAUC()
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=42)
for x, y in data_stream:
    model4.learn_one(x)
    y_pred= model4.score_one(x)
    metric.update(y,y_pred)
metric

ROCAUC: 0.803009

## HalfSpaceTrees

In [None]:
model3 = compose.Pipeline(
    preprocessing.Sta(),
    anomaly.HalfSpaceTrees(seed=SEED)
)

In [None]:
metric= river.metrics.ROCAUC()
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=42)
for x, y in data_stream:
    model3.learn_one(x)
    y_pred= model3.score_one(x)
    metric.update(y,y_pred)
metric

ROCAUC: 0.851694

# Random

In [None]:
dataset = datasets.synth.RandomRBF(seed_model=7, seed_sample=seed,n_classes=10,n_features=200).take(n_samples)

def build_fn(n_features):
    net = nn.Sequential(
        nn.Linear(n_features, 5),
        nn.ReLU(),
        nn.Linear(5, 5),
        nn.ReLU(),
        nn.Linear(5, 5),
        nn.ReLU(),
        nn.Linear(5, 5),
        nn.ReLU(),
        nn.Linear(5, 1),
        nn.Softmax()
    )
    return net

model = compose.Pipeline(
    preprocessing.StandardScaler(),
    PyTorch2RiverClassifier(
                build_fn=build_fn,
                loss_fn=nn.BCELoss,
                optimizer_fn=optim.Adam,
                batch_size=1,
                learning_rate=1e-3,
    )
)

for x, y in data_stream:
    y_pred = model.predict_proba_one(x)
    model.learn_one(x, y)
    METRIC.update(y, y_pred)
METRIC

NameError: name 'seed' is not defined