In [1]:
import torch
from torch import nn 
from torch import optim
from river import compose, metrics, preprocessing, stream, anomaly, linear_model, datasets, compose
from river import feature_extraction as fx
from IncrementalTorch.anomaly.anomaly import TorchAE, SklearnAnomalyDetector
from tqdm import tqdm
import river  
import torchvision
import numpy as np
import pandas as pd
from pprint import pprint

from OnlineTorch.classifier import PyTorch2RiverClassifier
from torch import nn, optim

from sklearn.metrics import roc_auc_score
from sklearn.linear_model import SGDOneClassSVM
from sklearn.cluster import k_means
from sklearn.decomposition import IncrementalPCA

N_SAMPLES = 1_000
SEED = 42
track_name = "RBF"
#LOSS = nn.BCELoss
LOSS = nn.L1Loss
OPTIMIZER = optim.AdamW
BATCH_SIZE=1
LEARNING_RATE=1e-3
LATENT_DIM = 1
METRIC = river.metrics.ROCAUC()


## Dataset

In [2]:
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), 1000, seed=20)

# Autoencoder
## Undercomplete Autoencoder
### Softmax

In [3]:
def undercomplete_ae_sm(n_features, latent_dim=1):
    net = nn.Sequential(
        nn.Dropout(),
        nn.Linear(n_features, 20), 
        nn.LeakyReLU(),
        nn.Linear(20, latent_dim),
        nn.LeakyReLU(),
        nn.Linear(latent_dim, 20),
        nn.LeakyReLU(), 
        nn.Linear(20, n_features),
        nn.Linear(n_features,1),
        nn.Softmax()
    )
    return net

## Undercomplete Autoencoder standard

In [4]:
def undercomplete_ae(n_features, latent_dim=1):
    net = nn.Sequential(
        nn.Dropout(),
        nn.Linear(n_features, 20),
        nn.LeakyReLU(),
        nn.Linear(20, latent_dim),
        nn.LeakyReLU(),
        nn.Linear(latent_dim, 20),
        nn.LeakyReLU(), 
        nn.Linear(20, n_features),
    )
    return net

In [5]:
model1 = compose.Pipeline(
    preprocessing.StandardScaler(),
    PyTorch2RiverClassifier(
                build_fn = undercomplete_ae_sm,
                loss_fn = LOSS,
                optimizer_fn = OPTIMIZER,
                #batch_size=BATCH_SIZE,
                learning_rate=LEARNING_RATE,
                seed=SEED
    )
)

In [6]:
model2 = compose.Pipeline(
    preprocessing.MinMaxScaler(),
    TorchAE(
                build_fn = undercomplete_ae,
                loss_fn = LOSS,
                optimizer_fn = OPTIMIZER,
                #batch_size=BATCH_SIZE,
                learning_rate=LEARNING_RATE,
                seed=SEED
    )
)

In [7]:
#supervised learning approach with Softmax function --> Proba gets predicted, ROC way worse
metric = river.metrics.ROCAUC()
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=42)
for x, y in data_stream:
    y_pred = model1.predict_proba_one(x) #ruft learn_unsupervised auf, müssen wir learn_one dann überhaupt auch aufrufen?
    model1.learn_one(x, y) #model1.learn_one ist supervised Ansatz, wir wollen Unsupervised
    metric.update(y, y_pred)
metric

  input = module(input)


ROCAUC: 0.496552

In [8]:
torch.__version__

'1.9.1+cpu'

In [147]:
metric= river.metrics.ROCAUC()
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=42)
for x, y in data_stream:
    model2.learn_one(x)
    y_pred= model2.score_one(x)
    #print(y_pred)
    metric.update(y,y_pred)
    #model2.learn_one(x,y_pred)
    #model2.learn_one(x,learn_unsupervised=True)
    #y_pred = model1.predict_proba_one(x) #ruft learn_unsupervised auf, müssen wir learn_one dann überhaupt auch aufrufen?
    #model2.learn_one(x, y)
    #METRIC.update(y, y_pred)
metric

ROCAUC: 0.936567

## Sparse Autoencoder

In [10]:
river.__version__

'0.8.0'

# Baselines
## PCA (Incremental)

In [33]:
ipca = IncrementalPCA(n_components=LATENT_DIM, batch_size=1)

In [34]:
model6=compose.Pipeline(
    preprocessing.MinMaxScaler(),
    ipca #IncrementalPCA(n_components=LATENT_DIM, batch_size=1)
)

In [149]:
model7=compose.Pipeline(
    preprocessing.MinMaxScaler(),
    SklearnAnomalyDetector(IncrementalPCA(n_components=10, batch_size=1))
)

In [150]:
#metric= river.metrics.ROCAUC()
#data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=SEED)
#for x, y in data_stream:
#    print(x.values())

In [151]:
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=SEED)
data_stream = list(data_stream)
data1 = pd.DataFrame(data=data_stream)

In [152]:
#data1

In [153]:
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=SEED)
#df=pd.read_csv(r'C:\Users\Manuel\river_data\CreditCard\creditcard.csv')

In [154]:
#df.head()
#x = df.iloc[:,:-1]
#y = df.iloc[:,-1:]

In [155]:
#x

In [179]:
metric= river.metrics.ROCAUC()
data_stream = stream.shuffle(river.datasets.CreditCard().take(10000), N_SAMPLES, seed=40)
MSE_score_arr = []
values = []
for x, y in data_stream:
    #print(pd.DataFrame.from_dict(x))
    ipca = ipca.partial_fit(pd.DataFrame.from_dict([x]))
    x_trans = ipca.transform(pd.DataFrame.from_dict([x]))
    inverse_trans= ipca.inverse_transform(x_trans)
    MSE_score = ((np.array(pd.DataFrame.from_dict([x]))-np.array([inverse_trans]))**2).sum()
    MSE_score_arr.append(MSE_score)
    values.append(y)
    #model6.learn_one(x)
    #y_pred= model6.score_one(x)
    #metric.update(y,y_pred)
#metric



In [180]:
values_df = pd.Series(data=values)

In [181]:
loss = pd.Series(data=MSE_score_arr)
loss = (loss-np.min(loss))/(np.max(loss)-np.min(loss))

In [182]:
loss.sum()

5.83266519427549

In [183]:
metric = metrics.ROCAUC()
for yt, yp in zip(values, loss):
    print(yt, yp)
    metric = metric.update(yt, yp)
metric

0 7.512757387361314e-05
0 0.0018324259314191065
0 5.223598982721371e-05
0 6.594090690406285e-06
0 7.598764807796735e-05
0 5.733809055652214e-07
0 6.649683981970006e-05
0 8.662522097739047e-05
0 0.00011449981099920014
0 4.031814491825032e-07
0 5.443995561727084e-05
0 6.5335887266851e-05
0 7.626803821318485e-05
0 5.505326524513531e-05
0 8.459041649326979e-05
0 7.220548990921478e-05
0 5.318125062090093e-05
0 6.739737404305103e-05
0 4.699694853430128e-06
0 7.537560142292763e-05
0 6.113562128472793e-05
0 8.388012020745597e-05
0 3.738958406630855e-05
0 1.0205277657589877e-05
0 7.285144531738889e-05
0 0.00015899295324369494
0 5.2348310013511235e-05
0 0.0004037769394944264
0 6.234038617130721e-05
0 3.755749902185873e-05
0 6.410955544884758e-05
0 8.31125351592682e-05
0 7.629646495430076e-05
0 8.803977425209661e-05
0 2.116724638771584e-05
0 6.003395018372979e-05
0 4.6177950940089566e-05
0 1.5033196011939296e-05
0 1.722407410169257e-06
0 1.0432437578012063e-05
0 9.489517619624946e-06
0 8.29452259

ROCAUC: 0.499649

## OneClassSVM

In [161]:
'''#funktioniert nur mit river=0.9, allerdings muss dann git repo geupdatet werden, da anomalie.anomaliedetector klasse nicht mehr in base sondern in anomly ist --> Wheels können nicht mehr so gebaut werden wie bisher
model4 = compose.Pipeline(
    preprocessing.StandardScaler(),
    #fx.RBFSampler(),    
    anomaly.QuantileThresholder(
        anomaly.OneClassSVM(),
        q=0.97 #q Anpassung viele Auswirkungen
    )
)
'''

'#funktioniert nur mit river=0.9, allerdings muss dann git repo geupdatet werden, da anomalie.anomaliedetector klasse nicht mehr in base sondern in anomly ist --> Wheels können nicht mehr so gebaut werden wie bisher\nmodel4 = compose.Pipeline(\n    preprocessing.StandardScaler(),\n    #fx.RBFSampler(),    \n    anomaly.QuantileThresholder(\n        anomaly.OneClassSVM(),\n        q=0.97 #q Anpassung viele Auswirkungen\n    )\n)\n'

In [44]:
'''metric= river.metrics.ROCAUC()
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=42)
for x, y in data_stream:
    model4.learn_one(x)
    y_pred= model4.score_one(x)
    metric.update(y,y_pred)
metric
'''

'metric= river.metrics.ROCAUC()\ndata_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=42)\nfor x, y in data_stream:\n    model4.learn_one(x)\n    y_pred= model4.score_one(x)\n    metric.update(y,y_pred)\nmetric\n'

## HalfSpaceTrees

In [45]:
model3 = compose.Pipeline(
    preprocessing.StandardScaler(),
    anomaly.HalfSpaceTrees(seed=SEED)
)

In [53]:
metric= river.metrics.ROCAUC()
data_stream = stream.shuffle(river.datasets.CreditCard().take(8000), N_SAMPLES, seed=42)
for x, y in data_stream:
    model3.learn_one(x)
    y_pred= model3.score_one(x)
    #print(y_pred)
    metric.update(y,y_pred)
metric

0.9732352250489237
0.9701291585127202
0.9709244618395303
0.9513393346379648
0.9865048923679061
0.9794630136986301
0.9338802348336595
0.9869620352250489
0.9812790606653621
0.9853150684931506
0.9641142857142857
0.9456563600782779
0.9665033268101761
0.9758841487279843
0.9842317025440314
0.9766230919765166
0.9499272015655578
0.9488219178082192
0.9639890410958905
0.9428477495107632
0.9907694716242661
0.9553127201565558
0.9837651663405088
0.9615530332681017
0.9834864970645792
0.9706739726027397
0.9587976516634051
0.9704234833659491
0.9759780821917808
0.9861103718199609
0.9752391389432485
0.9718700587084149
0.98213385518591
0.9601816046966732
0.9709244618395303
0.892508806262231
0.9599123287671233
0.9786896281800391
0.9613651663405088
0.9748759295499021
0.9903874755381604
0.961841095890411
0.9524602739726027
0.9701228962818004
0.9604508806262231
0.9816454011741683
0.9580587084148728
0.9907694716242661
0.9760407045009785
0.9679123287671233
0.9935185909980431
0.9690896281800392
0.98387475538160

ROCAUC: 0.639373

# Random

In [47]:
dataset = datasets.synth.RandomRBF(seed_model=7, seed_sample=SEED,n_classes=10,n_features=200).take(N_SAMPLES)

def build_fn(n_features):
    net = nn.Sequential(
        nn.Linear(n_features, 5),
        nn.ReLU(),
        nn.Linear(5, 5),
        nn.ReLU(),
        nn.Linear(5, 5),
        nn.ReLU(),
        nn.Linear(5, 5),
        nn.ReLU(),
        nn.Linear(5, 1),
        nn.Softmax()
    )
    return net

model = compose.Pipeline(
    preprocessing.StandardScaler(),
    PyTorch2RiverClassifier(
                build_fn=build_fn,
                loss_fn=nn.BCELoss,
                optimizer_fn=optim.Adam,
                batch_size=1,
                learning_rate=1e-3,
    )
)

for x, y in data_stream:
    y_pred = model.predict_proba_one(x)
    model.learn_one(x, y)
    METRIC.update(y, y_pred)
METRIC

ROCAUC: -0.