In [1]:
%matplotlib notebook

In [2]:
import numpy as np
from river import tree
from river import synth
from river.drift import PageHinkley
from river import evaluate
from river import metrics

In [3]:
from river import ensemble
from river import linear_model
from river import tree
from river import naive_bayes

In [4]:
dataset = synth.ConceptDriftStream(stream=synth.SEA(seed=42, variant=0), drift_stream=synth.SEA(seed=42, variant=1), seed=1, position=5, width=2)

In [6]:
metric = metrics.F1()

# Example with trees

In [7]:
model = tree.HoeffdingTreeClassifier()

evaluate.progressive_val_score(dataset, model, metric, print_every=200)

[200] F1: 90.24%
[400] F1: 92.58%
[600] F1: 93.95%
[800] F1: 94.67%
[1,000] F1: 94.99%
[1,200] F1: 95.19%
[1,400] F1: 95.23%
[1,600] F1: 95.23%
[1,800] F1: 95.26%
[2,000] F1: 95.51%
[2,200] F1: 95.33%
[2,400] F1: 95.34%
[2,600] F1: 95.40%
[2,800] F1: 95.40%
[3,000] F1: 95.49%
[3,200] F1: 95.51%
[3,400] F1: 95.44%
[3,600] F1: 94.99%
[3,800] F1: 94.88%
[4,000] F1: 94.84%
[4,200] F1: 94.84%
[4,400] F1: 94.85%
[4,600] F1: 94.75%
[4,800] F1: 94.61%
[5,000] F1: 94.60%
[5,200] F1: 94.64%
[5,400] F1: 94.66%
[5,600] F1: 94.57%
[5,800] F1: 94.62%
[6,000] F1: 94.61%
[6,200] F1: 94.60%
[6,400] F1: 94.65%
[6,600] F1: 94.70%
[6,800] F1: 94.78%
[7,000] F1: 94.84%
[7,200] F1: 94.87%
[7,400] F1: 94.88%
[7,600] F1: 94.94%
[7,800] F1: 94.93%
[8,000] F1: 95.00%
[8,200] F1: 95.00%
[8,400] F1: 95.03%
[8,600] F1: 95.12%
[8,800] F1: 95.18%
[9,000] F1: 95.21%
[9,200] F1: 95.24%
[9,400] F1: 95.33%
[9,600] F1: 95.36%
[9,800] F1: 95.41%
[10,000] F1: 95.44%
[10,200] F1: 95.48%
[10,400] F1: 95.54%
[10,600] F1: 95.5

KeyboardInterrupt: 

# Drift detection

In [8]:
# Stream with binary labels
classes = 2
features = len(list(dataset.take(1))[0][0].keys())

In [9]:
phs = [[PageHinkley() for i in range(features)] for j in range(classes)]

In [10]:
# Only works for one-dimensional streams, each class and feature needs to be considered separately
samples = dataset.take(2000)
for i, sample in enumerate(samples):
    item = sample[0]
    label = int(sample[1])
    in_drifts = np.array(
        [
            ph.update(item[feature])[0] 
            for feature, ph in enumerate(phs[label])
        ]
    )
    if in_drifts.any():
        print('Change detected in data: ' + str(item) + ', of index: ' + str(i) + ', of class: ' + str(label) + ', in features: ' + str(in_drifts))

Change detected in data: {0: 7.5347512505938585, 1: 8.534479505691046, 2: 9.534303384701063}, of index: 164, of class: 1, in features: [False  True False]
Change detected in data: {0: 0.09397431454871041, 1: 8.046330769751464, 2: 9.01209423598883}, of index: 297, of class: 0, in features: [False  True False]
Change detected in data: {0: 6.777644586233595, 1: 7.20928376670709, 2: 6.591815403167537}, of index: 309, of class: 1, in features: [False False  True]
Change detected in data: {0: 8.289589728944645, 1: 7.387746721294828, 2: 6.854144402775763}, of index: 437, of class: 1, in features: [False False  True]
Change detected in data: {0: 1.7578870753886255, 1: 9.19634811850934, 2: 9.9717180308851}, of index: 612, of class: 1, in features: [False False  True]
Change detected in data: {0: 8.957647816143137, 1: 7.785382813284745, 2: 0.5859123027811852}, of index: 964, of class: 1, in features: [ True False False]
Change detected in data: {0: 8.341081186760043, 1: 2.5788635998777654, 2: 9.

# Example with ensembles

In [11]:
model = ensemble.VotingClassifier([
        linear_model.LogisticRegression(),
        tree.HoeffdingTreeClassifier(),
        naive_bayes.GaussianNB()
])

evaluate.progressive_val_score(dataset, model, metric, print_every=200)

[200] F1: 97.46%
[400] F1: 97.44%
[600] F1: 97.41%
[800] F1: 97.39%
[1,000] F1: 97.36%
[1,200] F1: 97.35%
[1,400] F1: 97.34%
[1,600] F1: 97.31%
[1,800] F1: 97.28%
[2,000] F1: 97.27%
[2,200] F1: 97.23%
[2,400] F1: 97.21%
[2,600] F1: 97.20%
[2,800] F1: 97.19%
[3,000] F1: 97.18%
[3,200] F1: 97.18%
[3,400] F1: 97.15%
[3,600] F1: 97.12%
[3,800] F1: 97.09%
[4,000] F1: 97.08%
[4,200] F1: 97.07%
[4,400] F1: 97.06%
[4,600] F1: 97.03%
[4,800] F1: 97.00%
[5,000] F1: 96.99%
[5,200] F1: 96.99%
[5,400] F1: 96.98%
[5,600] F1: 96.97%
[5,800] F1: 96.96%
[6,000] F1: 96.95%
[6,200] F1: 96.94%
[6,400] F1: 96.94%
[6,600] F1: 96.93%
[6,800] F1: 96.93%
[7,000] F1: 96.94%
[7,200] F1: 96.93%
[7,400] F1: 96.92%
[7,600] F1: 96.92%
[7,800] F1: 96.92%
[8,000] F1: 96.92%
[8,200] F1: 96.92%
[8,400] F1: 96.91%
[8,600] F1: 96.92%
[8,800] F1: 96.92%
[9,000] F1: 96.92%
[9,200] F1: 96.92%
[9,400] F1: 96.92%
[9,600] F1: 96.92%
[9,800] F1: 96.92%
[10,000] F1: 96.93%
[10,200] F1: 96.93%
[10,400] F1: 96.93%
[10,600] F1: 96.9

KeyboardInterrupt: 