In [None]:
import pandas as pd
from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset
from evidently.tests import *
from batchstream.history.base.history_manager import HistoryManager
from batchstream.monitoring.pipeline.model_monitoring_pipeline import ModelMonitoringPipeline
from batchstream.monitoring.pipeline.steps.batch.evidently_monitoring_step import EvidentlyMonitoringStep
from sklearn.datasets import load_breast_cancer
from batchstream.utils.logging.base.logger_factory import LoggerFactory
from batchstream.batch_monitoring_strategy.dummy_monitoring_strategy import DummyMonitoringStrategy
from batchstream.retraining_strategy.dummy_retraining_strategy import DummyRetrainingStrategy 
from batchstream.model_comparers.batch_comparer import BatchModelComparer
from batchstream.model_comparers.shadow_comparer import ShadowOnlineComparer
from batchstream.pipelines.batch.batch_pipeline import BatchPipeline
from batchstream.estimators.sklearn_estimator import SklearnEstimator
from batchstream.detectors.base.detector import DriftDetector
from batchstream.experiment.experiment import StreamExperiment
from river.metrics import Accuracy, ROCAUC
from batchstream.evaluation.model_evaluation_pipeline import ModelEvaluationPipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline



history = HistoryManager()
logger_factory = LoggerFactory('test-2220')


### INPUT DRIFT DETECTION
# Detector 1.1 - Data Drift
data_drift_test_suite = TestSuite(tests=[
   DataDriftTestPreset(),
])
d1 = DummyMonitoringStrategy(n_curr=120, n_ref=120)
ev1 = EvidentlyMonitoringStep(data_drift_test_suite, d1, logger_factory, min_instances=240, clock=120, name='data_drift_eval')

# Detector 1.2 - Target Drift
target_drift = TestSuite(tests=[
    TestColumnDrift(column_name='target'),
])
d2 = DummyMonitoringStrategy(n_curr=120, n_ref=120, type='target')
ev2 = EvidentlyMonitoringStep(target_drift, d2, logger_factory, min_instances=240, clock=120, name='target_drift_eval')

input_monitoring = ModelMonitoringPipeline([(ev1._name, ev1), (ev2._name, ev2)])
input_drift_retraining_strategy = DummyRetrainingStrategy(n_last_retrain=120, n_last_test=0)
input_detector = DriftDetector(input_monitoring, input_drift_retraining_strategy)
###




### OUTPUT (PERFORMANCE) DRIFT DETECTION
# Detector 2.1 - Performance Drift

performance_drift = TestSuite(tests=[
    TestPrecisionScore(),
    TestRecallScore(),
    TestF1Score(),
    TestAccuracyScore()
])
d3 = DummyMonitoringStrategy(n_curr=120, n_ref=120, type='prediction')
ev3 = EvidentlyMonitoringStep(performance_drift, d3, logger_factory, min_instances=360, clock=120, name='performance_drift_eval')

output_monitoring = ModelMonitoringPipeline([(ev3._name, ev3)])
output_drift_retraining_strategy = DummyRetrainingStrategy(n_last_retrain=120, n_last_test=0)
output_detector = DriftDetector(output_monitoring, output_drift_retraining_strategy)
###

### Models comparison (after retraining)
#model_comparer = BatchModelComparer()
model_comparer = ShadowOnlineComparer(n_online=20)
###


### Model's Performance Evaluation
acc = Accuracy()
roc_auc = ROCAUC()
eval_pipe = ModelEvaluationPipeline(metric_steps=[
    ('accuracy', acc),
    ('roc_auc', roc_auc)
])
###


### Model composition
sklearn_batch_classifier = SklearnEstimator(Pipeline([('rf', RandomForestClassifier())]))
batch_pipeline = BatchPipeline(
    sklearn_batch_classifier,
    input_drift_detector=input_detector,
    output_drift_detector=output_detector,
    history=history,
    logger_factory=logger_factory,
    model_comparer=model_comparer,
    min_samples_retrain=120,
    min_samples_first_fit=240
)
###

### Experiment
experiment = StreamExperiment(batch_pipeline, eval_pipe, logger_factory)


In [None]:
X, Y = load_breast_cancer(return_X_y=True)
df = pd.DataFrame(X)
df['target'] = Y


In [None]:
experiment.run(df)

In [None]:
from river.metrics import Accuracy
from river import utils

Y_true =    [1, 0, 1, 0, 1, 0, 0]
Y_predict = [1, 1, 0, 0, 1, 1, 1]


roll_acc = utils.Rolling(Accuracy(), window_size=3)
for y_t, y_p in zip(Y_true, Y_predict):
    print(roll_acc.update(y_t, y_p).get())

In [None]:

from evidently.test_preset import DataDriftTestPreset



data_drift_test_suite = TestSuite(tests=[
    DataDriftTestPreset(),
])

In [None]:
data_drift_test_suite._inner_suite.context.tests

In [None]:
from evidently.tests.base_test import Test

t: Test = data_drift_test_suite._inner_suite.context.tests[0] 

In [None]:
t.__dict__

In [None]:
from batchstream.pipelines.base.stream_pipeline import StreamPipeline
from batchstream.utils.logging.base.logger_factory import LoggerFactory


class CombinationPipeline(StreamPipeline):

    def __init__(self, members: List[StreamPipeline], combiner: object):
        self._members = members
        self._combiner = combiner

    def handle(self, x, y) -> int:
        return self._combiner.combine(x, y, self._members)



In [None]:
import pandas as pd
from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset
from evidently.tests import *
from batchstream.history.base.history_manager import HistoryManager
from batchstream.monitoring.pipeline.model_monitoring_pipeline import ModelMonitoringPipeline
from batchstream.monitoring.pipeline.steps.batch.evidently_monitoring_step import EvidentlyMonitoringStep
from sklearn.datasets import load_breast_cancer
from batchstream.utils.logging.base.logger_factory import LoggerFactory
from batchstream.batch_monitoring_strategy.simple_monitoring_strategy import SimpleMonitoringStrategy
from batchstream.retraining_strategy.simple_retraining_strategy import SimpleRetrainingStrategy 
from batchstream.model_comparers.batch_comparer import BatchModelComparer
from batchstream.model_comparers.shadow_comparer import ShadowOnlineComparer
from batchstream.pipelines.batch.batch_pipeline import BatchPipeline
from batchstream.estimators.sklearn_estimator import SklearnEstimator
from batchstream.detectors.base.drift_handler import DriftHandler
from batchstream.experiment.experiment import StreamExperiment
from river.metrics import Accuracy, ROCAUC
from batchstream.evaluation.river_evaluation_pipeline import RiverEvaluationPipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline



logger_factory = LoggerFactory('test-2218')
retraining_strategy = SimpleRetrainingStrategy(
  n_last_retrain=500, n_last_test=0)


### INPUT DRIFT DETECTION
# Detector 1.1 - Data Drift
data_drift_test_suite = TestSuite(tests=[
DataDriftTestPreset(),
])
d1 = SimpleMonitoringStrategy(n_curr=120, n_ref=120)
ev1 = EvidentlyMonitoringStep(data_drift_test_suite, d1, logger_factory, min_instances=240, clock=120, name='data_drift_eval')

# Detector 1.2 - Target Drift
target_drift = TestSuite(tests=[
    TestColumnDrift(column_name='target'),
])
d2 = SimpleMonitoringStrategy(n_curr=120, n_ref=120, type='target')
ev2 = EvidentlyMonitoringStep(target_drift, d2, logger_factory, min_instances=240, clock=120, name='target_drift_eval')

input_monitoring = ModelMonitoringPipeline([(ev1._name, ev1), (ev2._name, ev2)])
input_detector = DriftHandler(input_monitoring, retraining_strategy)
###




### OUTPUT (PERFORMANCE) DRIFT DETECTION
# Detector 2.1 - Performance Drift

performance_drift = TestSuite(tests=[
    TestPrecisionScore(),
    TestRecallScore(),
    TestF1Score(),
    TestAccuracyScore()
])
d3 = SimpleMonitoringStrategy(n_curr=500, n_ref=500, type='prediction')
ev3 = EvidentlyMonitoringStep(performance_drift, d3, logger_factory,
  min_instances=1000, clock=500, name='performance_drift_eval')

# Output Drift Handler (Performance Drift + Retraining Strategy)
output_monitoring = ModelMonitoringPipeline([(ev3._name, ev3)])

output_drift_handlers = [
  DriftHandler(output_monitoring, retraining_strategy)
]
###

### Models comparison (after retraining)
#model_comparer = BatchModelComparer()
model_comparer = ShadowOnlineComparer(n_online=100)
###


### Model's Performance Evaluation
acc = Accuracy()
roc_auc = ROCAUC()
eval_pipe = RiverEvaluationPipeline(metric_steps=[
    ('accuracy', acc),
    ('roc_auc', roc_auc)
])
###


### Model composition
logger_factory = LoggerFactory(experiment_id='rf_exp')
Pipeline([('rf', RandomForestClassifier(max_depth=10))])
sklearn_batch_classifier = SklearnEstimator()
batch_pipeline = BatchPipeline(
    sklearn_batch_classifier,
    input_drift_handlers,
    output_drift_handlers,
    history,
    logger_factory,
    model_comparer,
    min_samples_retrain=500,
    min_samples_first_fit=1000
)

In [None]:
from river.metrics import Accuracy, MacroF1, MicroF1
from river.utils import Rolling



window_size = 1000
eval_pipe = RiverEvaluationPipeline(metric_steps=[
  (f'acc_preq_{window_size}', Rolling(Accuracy(), window_size)),
  (f'micro_f1_preq_{window_size}', Rolling(MicroF1(), window_size)),
  (f'macro_f1_preq_{window_size}', Rolling(MacroF1(), window_size))
])

In [None]:
from batchstream.pipelines.stream.model_river_pipeline import RiverPipeline


In [None]:
from river.forest import ARFClassifier
from river import ensemble
from river import evaluate
from river import metrics
from river.datasets import synth
from river import tree
from river import ADWIN
from river import naive_bayes


In [None]:
from river.ensemble import SRPClassifier

In [None]:
from river.forest import ARFClassifier


logger_factory = LoggerFactory(experiment_id='arf_exp')
arf_model = ARFClassifier(seed=42, leaf_prediction="mc")
arf_pipe = RiverPipeline(arf_model)
arf_experiment = StreamExperiment(arf_pipe, eval_pipe, logger_factory)

In [None]:
base_model = tree.HoeffdingTreeClassifier(grace_period=50, delta=0.01, nominal_attributes=['age', 'car', 'zipcode'])
srp_model = ensemble.SRPClassifier(model=base_model, n_models=3, seed=42)
srp_pipe = RiverPipeline(srp_model)

In [None]:
from river.tree import HoeffdingAdaptiveTreeClassifier



hat_model = HoeffdingAdaptiveTreeClassifier(
  grace_period=100,
  delta=1e-5,
  leaf_prediction='nb',
  nb_threshold=10,
  seed=42
)
hat_pipe = RiverPipeline(hat_model)

In [None]:
from river import naive_bayes



logger_factory = LoggerFactory(experiment_id='nb_exp')
nb_model =  naive_bayes.GaussianNB()
nb_pipe = RiverPipeline(nb_model)
nb_experiment = StreamExperiment(nb_pipe, eval_pipe, logger_factory)

In [None]:
from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset
from evidently.tests import *



### INPUT DRIFT DETECTION
# Detector 1.1 - Data Drift
data_drift_test_suite = TestSuite(tests=[
  DataDriftTestPreset(),
])
d1 = DummyMonitoringStrategy(n_curr=500, n_ref=500, type='data')
ev1 = EvidentlyMonitoringStep(data_drift_test_suite, d1, logger_factory,
  min_instances=1000, clock=500, name='data_drift_eval'
)

# Detector 1.2 - Target Drift 
target_drift = TestSuite(tests=[
    TestColumnDrift(column_name='target'),
])
d2 = DummyMonitoringStrategy(n_curr=500, n_ref=500, type='target')
ev2 = EvidentlyMonitoringStep(target_drift, d2, logger_factory,
  min_instances=1000, clock=500, name='target_drift_eval'
)

# Input Drift Handler (Data + Target Drift + Retraining Strategy)
input_monitoring = ModelMonitoringPipeline([(ev1._name, ev1), (ev2._name, ev2)])
input_drift_retraining_strategy = DummyRetrainingStrategy(
  n_last_retrain=500, n_last_test=0
)
input_drift_handlers = [
  DriftHandler(input_monitoring, input_drift_retraining_strategy)
]


In [None]:
StreamExperiment()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline



logger_factory = LoggerFactory(experiment_id='rf_exp_all_drifts')
rf_model = Pipeline([('rf', RandomForestClassifier(max_depth=10))])
sklearn_batch_classifier = SklearnEstimator(rf_model)
batch_pipeline = BatchPipeline(
    sklearn_batch_classifier,
    input_drift_handlers,
    output_drift_handlers,
    history,
    logger_factory,
    model_comparer,
    min_samples_retrain=1500,
    min_samples_first_fit=1000
)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline



logger_factory = LoggerFactory(experiment_id='lr_exp_all_drifts')
lr_model = Pipeline([('lr', LogisticRegression())])
sklearn_batch_classifier = SklearnEstimator(lr_model)
batch_pipeline = BatchPipeline(
    sklearn_batch_classifier,
    input_drift_handlers,
    output_drift_handlers,
    history,
    logger_factory,
    model_comparer,
    min_samples_retrain=1500,
    min_samples_first_fit=1000
)

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline



logger_factory = LoggerFactory(experiment_id='nb_exp_all_drifts')
nb_model = Pipeline([('nb', GaussianNB())])
sklearn_batch_classifier = SklearnEstimator(nb_model)
batch_pipeline = BatchPipeline(
    sklearn_batch_classifier,
    input_drift_handlers,
    output_drift_handlers,
    history,
    logger_factory,
    model_comparer,
    min_samples_retrain=1500,
    min_samples_first_fit=1000
)

In [None]:
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline



logger_factory = LoggerFactory(experiment_id='xgb_exp_all_drifts')
xgb_model = Pipeline([('xgb', XGBClassifier())])
sklearn_batch_classifier = SklearnEstimator(xgb_model)
batch_pipeline = BatchPipeline(
    sklearn_batch_classifier,
    input_drift_handlers,
    output_drift_handlers,
    history,
    logger_factory,
    model_comparer,
    min_samples_retrain=1500,
    min_samples_first_fit=1000
)

In [None]:
!pip install xgboost

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame({'1': [1, 2, 3]})

In [None]:
df

In [None]:
df['dataset'] = 'x'

In [None]:
dataset

In [None]:
df

In [None]:
df.columns = range(len(df.columns))

In [None]:
df

In [None]:
import pandas as pd
from io import BytesIO, TextIOWrapper
from zipfile import ZipFile
from scipy.io.arff import loadarff
from os import path
from typing import Tuple



def get_covtype_dataset(data_path='./data') -> Tuple[pd.DataFrame, pd.Series]:
    zip_path = path.join(data_path, 'COVTYPE/covtypeNorm.arff.zip')
    with (ZipFile(zip_path, 'r')) as zfile:
        in_mem_fo = TextIOWrapper(BytesIO(zfile.read('covtypeNorm.arff')), encoding='ascii')
        data = loadarff(in_mem_fo)
        df = pd.DataFrame(data[0])
        to_convert_df = df.select_dtypes([object])
        to_convert_col_names = to_convert_df.columns
        df[to_convert_col_names] = to_convert_df.stack().str.decode('utf-8').unstack()
        class_col = df.pop('class').replace(['noad', 'ad'], [0, 1])
        df['target'] = class_col.copy()
        df.loc[:, "Wilderness_Area1":] = df.loc[:, "Wilderness_Area1":].astype(str).astype(int)
        df['dataset'] = 'covtypeNorm'
    return df


In [None]:
df = get_covtype_dataset()

In [None]:
df.dtypes

In [None]:
from pathlib import Path
from batchstream.utils.visualization import visualize_results
from batchstream.utils.reading_logs import load_drift_history, get_metrics_vals



out_dir = Path(r'D:\Studia\praca\out\20230505_1848')
dir_list = [f for f in out_dir.resolve().glob('*') if not f.is_file()]
for d in dir_list:
    drift_hist = load_drift_history(str(d))
    res = get_metrics_vals(str(d))
    print(str(d))
    visualize_results(res, drift_hist, dataset_name='COVTYPE', metrics=['acc'])

    

In [None]:
# Drift features nie występuje
# Drift perfromance też nie (?) 

***
# Elec

In [14]:
from pathlib import Path
from batchstream.utils.visualization import visualize_results
from batchstream.utils.reading_logs import load_drift_history, get_metrics_vals



out_dir = Path(r'D:\Studia\praca\out\20230506_1423')
dir_list = [f for f in out_dir.resolve().glob('*') if not f.is_file()]
for d in dir_list:
    drift_hist = load_drift_history(str(d))
    res = get_metrics_vals(str(d))
    print(str(d))
    visualize_results(res, drift_hist, dataset_name='elec', metrics=['acc'])

D:\Studia\praca\out\20230506_1423\0118962a_test_rf_target_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\02f9e928_test_rf_data_evidently_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\04309361_test_rf_target_b2d6_elec_20230506_133011


D:\Studia\praca\out\20230506_1423\09dbb700_test_rf_perf_evidently_b2d6_elec_20230506_133011


D:\Studia\praca\out\20230506_1423\2556a8d2_test_rf_data_evidently_b2d6_elec_20230506_133011


D:\Studia\praca\out\20230506_1423\3d7f738e_rf_adwin_target_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\45ebd42d_test_rf_data_evidently_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\4daba219_test_rf_perf_evidently_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\5b6d7c3d_rf_adwin_target_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\7bffd064_test_rf_target_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\7e140ed0_nb_75b0_elec_20230506_132616


D:\Studia\praca\out\20230506_1423\852ff558_arf_75b0_elec_20230506_132616


D:\Studia\praca\out\20230506_1423\9706ec8c_srp_75b0_elec_20230506_132616


D:\Studia\praca\out\20230506_1423\a2132125_rf_adwin_target_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\a35a58ac_test_rf_perf_evidently_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\cc4f6a57_test_rf_perf_evidently_b2d6_elec_20230506_133011


D:\Studia\praca\out\20230506_1423\d0adbd83_test_rf_data_evidently_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\d6daf20d_test_rf_target_b2d6_elec_20230506_133010


D:\Studia\praca\out\20230506_1423\f34728ae_ht_75b0_elec_20230506_132616


In [10]:
from river import drift

In [9]:
drift.ADWIN()

AttributeError: module 'river' has no attribute 'drift'

In [None]:
visualize_results(res, drift_hist, dataset_name='COVTYPE', metrics=['kappa'])

In [None]:
len(res)

In [None]:
tran

In [13]:
from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset
from evidently.tests import *
from batchstream.history.base.history_manager import HistoryManager
from batchstream.monitoring.pipeline.drift_monitoring_pipeline import DriftMonitoringPipeline
from batchstream.monitoring.pipeline.steps.batch.evidently_monitoring_step import EvidentlyMonitoringStep
from batchstream.utils.logging.base.logger_factory import LoggerFactory
from batchstream.batch_monitoring_strategy.simple_monitoring_strategy import SimpleMonitoringStrategy
from batchstream.retraining_strategy.simple_retraining_strategy import SimpleRetrainingStrategy 
from batchstream.drift_handlers.base.drift_handler import DriftHandler
from tqdm import tqdm
from river import stream
from utils.read_data.get_dataset import get_dataset



n_start_detect=2_500
n_curr = 5_000
n_ref = 5_000
stattest = None
stattest_threshold = 0.04
dataset = 'rbf0.66_3'
drift_share = None


history = HistoryManager()
name = f'{dataset}_data_drift_{stattest}_{stattest_threshold}_{n_curr}_{n_ref}_{drift_share}'
logger_factory = LoggerFactory(name)

data_drift_test_suite_args = {
    'tests': [TestColumnDrift(column_name='target', stattest_threshold=stattest_threshold)]
}
d1 = SimpleMonitoringStrategy(n_curr=n_curr, n_ref=n_ref, type='target')
ev1 = EvidentlyMonitoringStep(data_drift_test_suite_args, d1, logger_factory, min_instances=n_curr, clock=n_curr, name=name)

input_monitoring = DriftMonitoringPipeline([(ev1._name, ev1)])
input_drift_retraining_strategy = SimpleRetrainingStrategy(n_last_retrain=n_curr, n_last_test=0)
input_detector = DriftHandler(input_monitoring, input_drift_retraining_strategy)

#df = get_dataset(dataset)


INFO:rbf0.66_3_data_drift_None_0.04_5000_5000_None_rbf0.66_3_data_drift_None_0.04_5000_5000_None:EvidentlyMonitoringStep - name:rbf0.66_3_data_drift_None_0.04_5000_5000_None - START


In [14]:
from river.datasets import synth


dataset = synth.RandomRBFDrift(seed_model=42, seed_sample=42,
    n_classes=4, n_features=4, n_centroids=20,
    change_speed=0.66, n_drift_centroids=10)

i = 0
for x, y in tqdm(dataset.take(250_000)):
    history.update_history_x(x)
    if i >= n_start_detect:
        drift_detected = input_detector.detect(history)
    history.update_history_y(y)
    i += 1

9425it [00:12, 9108.79it/s] 

In [9]:
from batchstream.monitoring.pipeline.steps.online.river_monitoring_step import RiverMonitoringStep
from river import drift
from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset
from evidently.tests import *
from batchstream.history.base.history_manager import HistoryManager
from batchstream.monitoring.pipeline.drift_monitoring_pipeline import DriftMonitoringPipeline
from batchstream.monitoring.pipeline.steps.batch.evidently_monitoring_step import EvidentlyMonitoringStep
from batchstream.utils.logging.base.logger_factory import LoggerFactory
from batchstream.batch_monitoring_strategy.simple_monitoring_strategy import SimpleMonitoringStrategy
from batchstream.retraining_strategy.simple_retraining_strategy import SimpleRetrainingStrategy 
from batchstream.drift_handlers.base.drift_handler import DriftHandler
from tqdm import tqdm
from river import stream
from utils.read_data.get_dataset import get_dataset




dataset = 'rbf_adwinx'
clock = 1500
grace_period = 100
min_window_length = 500

n_start_detect = grace_period

history = HistoryManager()
name = f'{dataset}_adwin_data_drift_{clock}_{grace_period}_{min_window_length}'
logger_factory = LoggerFactory(name)

adwins = []
j = 0
for col in [0, 1, 2, 3]:
    if col == 'dataset': continue
    if col == 'target': continue
    adwin = RiverMonitoringStep(col, j, drift.ADWIN(clock=clock, grace_period=grace_period, min_window_length=min_window_length), logger_factory)
    adwins.append(adwin)
    j += 1

input_monitoring = DriftMonitoringPipeline([(a._name, a) for a in adwins])
input_drift_retraining_strategy = SimpleRetrainingStrategy(n_last_retrain=clock, n_last_test=0)
input_detector = DriftHandler(input_monitoring, input_drift_retraining_strategy)

In [10]:
from river.datasets import synth


dataset = synth.RandomRBFDrift(seed_model=42, seed_sample=42,
    n_classes=4, n_features=4, n_centroids=20,
    change_speed=0.66, n_drift_centroids=10)

i = 0
for x, y in tqdm(dataset.take(250_000)):
    history.update_history_x(x)
    if i >= n_start_detect:
        drift_detected = input_detector.detect(history)
    history.update_history_y(y)
    i += 1

250000it [00:20, 12274.16it/s]


In [None]:
from river.datasets import synth

dataset = synth.RandomRBFDrift(seed_model=42, seed_sample=42,
    n_classes=4, n_features=4, n_centroids=20,
    change_speed=0.66, n_drift_centroids=10)

adwin = drift.ADWIN(clock=1500, min_window_length=250, grace_period=100)



i = 0
for x, y in tqdm(dataset.take(250_000)):
    val = x[0]
    adwin.update(val)
    if adwin.drift_detected:
        print(f"Change detected at index {i}, input value: {val}")
    i += 1


In [None]:

history = HistoryManager()
for x, y in tqdm(dataset.take(250_000)):  
    history.update_history_x(x)
    val_x = history.x_history.iloc[0, -1]
    history.update_history_y(y)
    val_y = history.y_history.iloc[-1]






In [4]:
from batchstream.history.base.history_manager2 import HistoryManager2

history = HistoryManager2()


In [5]:
from river.datasets import synth
from tqdm import tqdm

dataset = synth.RandomRBFDrift(seed_model=42, seed_sample=42,
    n_classes=4, n_features=4, n_centroids=20,
    change_speed=0.66, n_drift_centroids=10)

In [7]:
i = 0
for x, y in tqdm(dataset.take(250_000)):  
    history.update_history_x(x)
    val_x = history.x_history[-1][3]
    history.update_history_y(y)
    val_y = history.y_history[-1]

132603it [00:06, 21990.57it/s]


KeyboardInterrupt: 

In [None]:
# RiverMonitoringStep

In [None]:
history.x_history.iloc[-1].loc[2]

In [None]:
history._x_history[-1][2]

In [None]:
history._y_history[-1]

In [None]:
n_to_stay = 10

In [None]:
history.y_history.iloc[-(n_to_stay):].to_list()


In [None]:
history._y_history[-(n_to_stay):]

In [None]:
history.prediction_history.iloc[-(n_to_stay):].to_list()

In [None]:
history.x_history.iloc[-(n_to_stay):, :]

In [None]:
history._counter - n_to_stay

In [None]:
history._counter

In [None]:
n_last_retrain = 10

In [None]:
history.x_history.iloc[:-1, :].iloc[-n_last_retrain:, :], history.y_history.iloc[-n_last_retrain:]

In [None]:
import pandas as pd
pd.DataFrame(history._x_history[:-1][-n_last_retrain:])

In [None]:
history._y_history[-n_last_retrain:]

In [2]:
list(zip([5_000, 2_500], [15, 30, 20], [0, 1]))

[(5000, 15, 0), (2500, 30, 1)]

In [3]:
from sklearn.model_selection import ParameterSampler

[{'stattest_threshold': 0.04, 'n_curr': 1000},
 {'stattest_threshold': 0.038, 'n_curr': 1000},
 {'stattest_threshold': 0.038, 'n_curr': 1000},
 {'stattest_threshold': 0.043, 'n_curr': 2500},
 {'stattest_threshold': 0.043, 'n_curr': 5000},
 {'stattest_threshold': 0.043, 'n_curr': 1000},
 {'stattest_threshold': 0.04, 'n_curr': 1000},
 {'stattest_threshold': 0.043, 'n_curr': 1000},
 {'stattest_threshold': 0.04, 'n_curr': 5000}]