In [14]:
import sys
import os
sys.path.append(os.path.abspath("../.."))
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from tinyshift.outlier import HBOS, SPAD, PCAReconstructionError
from sklearn.model_selection import train_test_split
from tinyshift.series import hampel_filter, bollinger_bands

In [2]:

weights = [0.2, 0.8]

X, y = make_classification(
    n_samples=100000, 
    n_features=20, 
    n_informative=2,      
    weights=weights, 
    random_state=42,
    n_redundant=2)

num_samples = X.shape[0]

categorical_col1 = np.random.choice(['A', 'B', 'C'], size=num_samples)
categorical_col2 = np.random.choice(['X', 'Y', 'Z'], size=num_samples)

df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(X.shape[1])])
df['cat1'] = categorical_col1
df['cat2'] = categorical_col2
df["cat1"] = df["cat1"].astype("category").cat.rename_categories({"A": 0, "B": 1, "C": 2})
df["cat2"] = df["cat2"].astype("category").cat.rename_categories({"Z": 0, "Y": 1, "X": 2})

df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

## HBOS

In [3]:
hbos = HBOS(dynamic_bins=True)
hbos.fit(df_train, nbins="fd")

0,1,2
,dynamic_bins,True


In [4]:
hbos.predict(df_test)

75721    False
80184    False
19864    False
76699    False
92991    False
         ...  
32595    False
29313    False
37862    False
53421    False
42410    False
Length: 20000, dtype: bool

## SPAD

In [5]:
spad = SPAD()
spad.fit(df_train, nbins="fd", method="mad")

0,1,2
,plus,False


In [6]:
spad.predict(df_test)

75721    False
80184    False
19864    False
76699    False
92991    False
         ...  
32595    False
29313    False
37862    False
53421    False
42410    False
Length: 20000, dtype: bool

## SPAD+

In [7]:
spad_plus = SPAD(plus=True)
spad_plus.fit(df_train)

0,1,2
,plus,True


In [8]:
spad_plus.predict(df_test)

75721    False
80184    False
19864    False
76699    False
92991    False
         ...  
32595    False
29313    False
37862    False
53421    False
42410    False
Length: 20000, dtype: bool

# PCAReconstructionError

In [9]:
model = PCAReconstructionError()
model.fit(df_train.loc[:, ~df_train.columns.isin(['cat1', 'cat2'])])

In [10]:
model.predict(df_test.loc[:, ~df_test.columns.isin(['cat1', 'cat2'])])

75721    False
80184    False
19864    False
76699    False
92991    False
         ...  
32595    False
29313    False
37862    False
53421    False
42410    False
Length: 20000, dtype: bool

# Hampel Filter

In [11]:
mask = hampel_filter(df_train["feature_0"], rolling_window=31)

In [12]:
mask

75220    False
48955    False
44966    False
13568    False
92727    False
         ...  
6265     False
54886    False
76820    False
860      False
15795    False
Length: 80000, dtype: bool

In [13]:
df_train[mask]

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,cat1,cat2
52795,2.249811,1.769962,-1.133707,-1.092061,2.132263,0.675658,1.794896,-0.209883,0.088248,1.862140,...,-0.482956,-0.094769,2.006993,-0.015849,0.923930,1.985005,-1.661885,-0.830211,1,1
63509,-1.700386,0.310031,1.020469,0.561973,1.941331,-1.053395,0.875382,-0.562172,-0.185595,1.637448,...,0.854262,0.202537,-0.516156,1.390298,0.820246,-0.301861,-1.608119,-0.715477,0,2
18305,3.275712,-0.330931,-0.681957,-1.000313,1.211448,-1.598008,1.178794,-0.519731,1.233146,3.383114,...,0.240360,0.981985,0.797859,0.825664,1.147904,2.181968,2.869432,-0.557981,2,2
93541,-2.617944,-1.215280,1.138909,0.057781,0.514659,0.628937,0.165770,0.227630,-0.186650,1.033453,...,-1.149154,-1.464689,-0.868170,-0.427337,-1.433856,-0.357814,0.556726,-0.177873,1,2
81204,-3.152698,-0.292751,0.158583,-0.153290,-0.911454,-1.881228,-0.465879,0.414919,-0.977661,-0.484612,...,0.640820,0.054296,-0.477392,0.024903,-0.282346,1.291195,1.221097,-0.130863,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90272,3.824992,-0.776902,1.138167,0.470020,-0.085914,0.687675,-0.194048,0.123542,0.347932,-1.055155,...,-0.761483,-1.060512,0.269134,-0.055415,-0.101634,-0.306009,-1.540617,0.762117,0,2
94179,3.329940,-1.380274,-0.659775,0.730667,-0.206522,-1.356045,-1.665993,-0.360154,0.635125,-0.265886,...,0.570982,-0.332362,-0.021239,0.168183,1.498932,-0.226665,0.020684,0.479702,2,2
50859,-2.589795,-0.232267,-0.954487,1.670590,1.292465,0.423638,-0.253321,1.437519,0.825140,1.387005,...,3.489848,-0.728712,-0.111361,0.652907,1.610334,-0.552116,-0.583731,0.347285,0,0
8392,-2.133624,-2.142353,0.221636,0.810008,-0.743554,-1.410799,-0.177610,-0.091637,0.482689,-1.541899,...,1.562888,0.874327,0.602895,-0.611166,0.461044,0.349691,-0.884398,0.715821,0,0


## Bollinger Bands

In [16]:
mask = bollinger_bands(df_train["feature_0"], window_size=20)

In [17]:
df_train[mask]

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,cat1,cat2
91822,2.446897,-1.873173,-0.277207,-0.323350,-0.194161,-0.465009,-0.259840,-0.828421,0.852921,-0.937976,...,-1.903408,-1.397510,1.155421,0.914957,0.861792,-0.900985,-1.109002,-0.497740,0,0
80164,2.158321,0.088711,0.074423,0.922979,-2.120018,0.379830,-0.910783,-3.154454,0.184499,-3.037899,...,0.008467,0.155263,-0.005680,-1.697111,1.497193,-1.150032,-0.293651,1.161037,0,2
3676,-2.238795,1.513381,-0.171619,-0.383898,0.855568,1.235173,0.920553,-1.029712,1.194192,1.343726,...,-0.771300,2.161347,-0.076466,0.440215,-0.171268,-1.105015,0.311610,0.676825,2,1
28576,2.307311,0.032496,-1.685927,0.414169,0.924681,-0.214520,-0.520629,-0.075347,0.574153,1.303859,...,2.005095,-0.437482,1.239350,-1.757824,0.793733,0.001945,0.093357,-1.010856,2,1
58405,2.471928,-1.752529,0.901016,1.180065,-1.262996,-0.797453,-2.916988,1.314769,0.770943,-0.609714,...,-0.474826,-0.399541,-0.670467,-0.866592,-0.270076,1.188339,1.793447,2.252252,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8392,-2.133624,-2.142353,0.221636,0.810008,-0.743554,-1.410799,-0.177610,-0.091637,0.482689,-1.541899,...,1.562888,0.874327,0.602895,-0.611166,0.461044,0.349691,-0.884398,0.715821,0,0
39504,2.463999,-0.027669,0.665223,0.065668,-0.629223,2.005676,0.706197,-1.076758,-0.812484,-0.355699,...,-1.221804,0.236144,0.586282,0.484663,-0.243867,0.291738,0.808302,-1.598902,0,1
89475,-2.475341,-1.627642,0.902674,0.451601,0.485651,1.352948,-0.442088,0.960226,0.886217,0.173513,...,-1.374681,0.417602,-0.543764,1.771024,1.108823,-1.026496,-0.789566,-0.040948,1,2
56886,-2.191413,-0.493790,0.379632,0.538501,0.379134,2.067255,1.717376,-0.598492,0.628453,1.023933,...,-1.652189,-0.305734,0.344367,2.224179,0.887497,-0.763483,0.840864,-0.233993,1,1
