In [None]:
# Base
import pandas as pd

# Data
import collections
from river import datasets

from river import optim
from river import linear_model
from river import imblearn
from river import anomaly
from river import neighbors
from river import facto
from river import naive_bayes
from river import tree
from river import ensemble
from river import metrics
from river import evaluate
from river import preprocessing
from river import feature_extraction
from river import stats
from river import utils
from river import dummy

from streamz import Stream
from streamz.river import RiverTrain, RiverPredict

In [None]:
X_y = datasets.CreditCard()

counts = collections.Counter(y for _, y in X_y)

for c, count in counts.items():
    print(f'{c}: {count} ({count / sum(counts.values()):.5%})')


In [None]:
model = (
    preprocessing.StandardScaler() |
    imblearn.RandomUnderSampler(
        classifier=linear_model.LogisticRegression(
            loss=optim.losses.Log(weight_pos=5)
        ),
        desired_dist={0: .8, 1: .2},
        seed=42
    )
)

metric = metrics.ClassificationReport()

anomaly_times = []
for x, y in X_y:
    y_pred = model.predict_one(x)
    if y_pred == 1:
        anomaly_times.append(1)
    else:
        anomaly_times.append(0)
    metric = metric.update(y, y_pred)
    model = model.learn_one(x, y)
    
metric

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px


In [None]:
df = pd.DataFrame.from_dict(X_y.take(X_y.n_samples))
df_y = df[1]
df = pd.json_normalize(df[0])
df[1] = df_y
df['pred'] = anomaly_times
df.head()

In [None]:
px.line(df.head(10000)[["V12", 1, 'pred']])

In [None]:
class HalfSpaceTrees(anomaly.HalfSpaceTrees):
  def learn_one(self, x, y=None):
    return anomaly.HalfSpaceTrees.learn_one(self, x)
  def predict_one(self, x, y=None):
    return anomaly.HalfSpaceTrees.score_one(self, x)
  def predict_proba_one(self, x, y=None):
    p = anomaly.HalfSpaceTrees.score_one(self, x)
    return {False: 1.0 - p, True: p}
  
class OneClassSVM(anomaly.OneClassSVM):
  def learn_one(self, x, y=None):
    return anomaly.OneClassSVM.learn_one(self, x)
  def predict_one(self, x, y=None):
    return anomaly.OneClassSVM.score_one(self, x)
  def predict_proba_one(self, x, y=None):
    p = anomaly.OneClassSVM.score_one(self, x)
    return {False: 1.0 - p, True: p}

In [None]:
model = anomaly.QuantileFilter(
        anomaly.HalfSpaceTrees(seed=42),
        q=0.95
    )


anomaly_times = []
for i in df.iterrows():
    x = i[1].drop('time').to_dict()
    score = model.score_one(x)
    y_pred = model.classify(score)
    
    if y_pred == 1:
        anomaly_times.append(1)
    else:
        anomaly_times.append(0)
    model = model.learn_one(x)
        
    

In [None]:
px.line(x=df.time, y=[anomaly_times, df.SOC])

In [None]:
model = (
    #feature_extraction.TargetAgg(by='V1', how=utils.Rolling(stats.Mean(), 7)) |
    preprocessing.StandardScaler() |
    dummy.NoChangeClassifier()
)

metric = metrics.ClassificationReport()

for i, data in enumerate(X_y):
    x, y = data
    if i > 0:
        y_pred = model.predict_one(x)
        metric = metric.update(y, y_pred)
    model = model.learn_one(x, y)
    
metric

In [None]:
model = (
    preprocessing.StandardScaler() |
    ensemble.VotingClassifier(models=(
        tree.HoeffdingTreeClassifier(
                splitter=tree.splitter.GaussianSplitter(),
                leaf_prediction="mc",
                max_depth=5
            ),
        linear_model.LogisticRegression(
            loss=optim.losses.Log(weight_pos=5)
        )
    ))
)

metric = metrics.ClassificationReport()

for i, data in enumerate(X_y):
    x, y = data
    if i > 0:
        y_pred = model.predict_one(x)
        metric = metric.update(y, y_pred)
    model = model.learn_one(x, y)
    
metric

In [None]:
model = (
    
    ensemble.VotingClassifier(models=(
        preprocessing.MinMaxScaler() |
        tree.HoeffdingTreeClassifier(
                splitter=tree.splitter.GaussianSplitter(),
                leaf_prediction="mc",
                max_depth=5
            ),
        preprocessing.StandardScaler() |
        linear_model.LogisticRegression(
            loss=optim.losses.Log(weight_pos=5)
        )
    ))
)

metric = metrics.ClassificationReport()

for i, data in enumerate(X_y):
    x, y = data
    if i > 0:
        y_pred = model.predict_one(x)
        metric = metric.update(y, y_pred)
    model = model.learn_one(x, y)
    
metric

In [None]:
model['HoeffdingTreeClassifier'].draw()

In [None]:
from river import utils

def _raw_dot_one(self, x: dict) -> float:
    # Performs matrix multiplication x.__matmul__(y)
    return self._weights @ utils.VectorDict(x) + self.intercept

In [None]:
v = _raw_dot_one(model, x)
v

In [None]:
model._raw_dot_one(x)

In [None]:
loss=optim.losses.Log()

In [None]:
p = loss.mean_func(v)

In [None]:
{False: 1.0 - p, True: p}

In [None]:
p = loss.mean_func(v)
p

In [None]:
y_pred = {False: 1.0 - p, True: p}
y_pred

Inverse

In [None]:
import numpy as np

In [None]:
p_ = y_pred[True]

In [None]:
eps = 1e-12# np.spacing(p_)
raw_dot = -np.log((1-(p_ + eps))/(p_ + eps))
raw_dot

In [None]:
model.intercept

In [None]:
(raw_dot - model.intercept)

In [None]:
w = list(model._weights.values())

In [None]:
np.array([np.array(w), np.ones(len(w))]).shape

In [None]:
np.linalg.lstsq(w, raw_dot)

In [None]:
np.linalg.pinv(w)