# Building AdwinDo - A journey towards an improved adaptive windowing algorithm package based on ADWIN.

## BACKGROUND

The ADWIN (ADaptive WINdowing) algorithm, based on Bifet and Gavaldà (2007)[1], is "a new approach for dealing with distribution change and concept drift when learning from data sequences that may vary with time." The size of sliding windows used to measure statistics is "recomputed online according to the rate of change observed from the data in the window itself." The ADWIN algorithm provides "rigorous guarantees of performance, as bounds on the rates of false positives and false negatives". These bounds are defined by $\delta$ (delta). 

## 1. Setup

### import libraries

In [None]:
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotdata as plot
import testdata as td
from skmultiflow.drift_detection.adwin import ADWIN

### load data

In [None]:
# ABRUPT DRIFTS
stream_name_abrupt1 = "data/moa-abrupt-1.txt"    # single abrupt up down
data_stream_abrupt1 = pd.read_csv(stream_name_abrupt1, sep=",", header=None, names=["input"], usecols=[6], squeeze=True)

stream_name_abrupt2 = "data/moa-abrupt-2.txt"    # double abrupt up down up
data_stream_abrupt2 = pd.read_csv(stream_name_abrupt2, sep=",", header=None, names=["input"], usecols=[6], squeeze=True)

# GRADUAL DRIFTS
stream_name_gradual = "data/moa-gradual-1.txt"   # single gradual up down
data_stream_gradual = pd.read_csv(stream_name_gradual, sep=",", header=None, names=["input"], usecols=[6], squeeze=True)

### helper function to run algorithm against provided input data

In [None]:
# Add stream elements to ADWIN and adjudicate if drift occurred
def start_adwin(stream, stream_name):
    for idx in range(len(stream)):
        adwin.add_element(stream[idx])
        window_width.append(adwin.width)    # plots window width
        if adwin.detected_change():
            print(f"Change in index {idx} for stream value {stream[idx]}, width {adwin.width}, window mean {adwin.estimation}, window variance {adwin.variance}")
            vertical_lines.append(idx)    # show vertical lines
            
        width_vs_variance.append((adwin.width, adwin.variance, idx))
        # print(f"width vs variance {width_vs_variance} \n")

    # plot stream and window
    caption = "delta: " + str(DELTA) + "\n" + "buckets: 5"
    plot.plot3data(stream, window_width, vertical_lines, caption, stream_name)
    return {
        "stream": stream,
        "window_width": window_width,
        "vertical_lines": vertical_lines,
        "caption": caption,
        "stream_name": stream_name
    }

## 2. Original ADWIN algorithm package in scikit-multiflow

### 2.1. Abrupt drift 1

In [None]:
data_stream_abrupt1

In [None]:
fig = plt.figure()
ax1 = plt.subplot()

color = 'tab:blue'
ax1.set_xlabel('time (t)')
ax1.set_ylabel('value')
ax1.plot(data_stream_abrupt1)
ax1.tick_params(axis='y', labelcolor=color)

plt.show()

In [None]:
DELTA = 0.002

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []

caption = ""

result_sk_abrupt1 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

 ### 2.2. Abrupt drift 2

In [None]:
data_stream_abrupt2

In [None]:
fig = plt.figure()
ax1 = plt.subplot()

color = 'tab:blue'
ax1.set_xlabel('time (t)')
ax1.set_ylabel('value')
ax1.plot(data_stream_abrupt2)
ax1.tick_params(axis='y', labelcolor=color)

plt.show()

In [None]:
DELTA = 0.002

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []

caption = ""

result_sk_abrupt2 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### 2.3. Gradual drift

In [None]:
data_stream_gradual

In [None]:
fig = plt.figure()
ax1 = plt.subplot()

color = 'tab:blue'
ax1.set_xlabel('time (t)')
ax1.set_ylabel('value')
ax1.plot(data_stream_gradual)
ax1.tick_params(axis='y', labelcolor=color)

plt.show()

In [None]:
DELTA = 0.002

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []

caption = ""

result_sk_gradual = start_adwin(data_stream_gradual, stream_name_gradual)

## 3. Original ADWIN algorithm package in River (successor to scikit-multiflow)

According to https://github.com/scikit-multiflow/scikit-multiflow, "creme and scikit-multiflow are merging into a new project called River. ...Both projects will stop active development." The following is the baseline ADWIN algorithm output from the River library. Notice that it very closely matches the scikit output with a few differences in drifts detected.

#### Replace scikit package with River package

In [None]:
from river.drift.adwin import ADWIN

#### Update helper function to match River package format

In [None]:
# Add stream elements to ADWIN and adjudicate if drift occurred
def start_adwin(stream, stream_name):
    for idx in range(len(stream)):
        in_drift, in_warning = adwin.update(stream[idx])
        window_width.append(adwin.width)    # plots window width
        if in_drift:
            print(f"Change in index {idx} for stream value {stream[idx]}, width {adwin.width}, window mean {adwin.estimation}, window variance {adwin.variance}")
            vertical_lines.append(idx)    # show vertical lines
            
        width_vs_variance.append((adwin.width, adwin.variance, idx))
        # print(f"width vs variance {width_vs_variance} \n")

    # plot stream and window
    caption = "delta: " + str(DELTA) + "\n" + "buckets: 5"
    plot.plot3data(stream, window_width, vertical_lines, caption, stream_name)
    return {
        "stream": stream,
        "window_width": window_width,
        "vertical_lines": vertical_lines,
        "caption": caption,
        "stream_name": stream_name
    }

### 3.1. Abrupt drift 1

In [None]:
DELTA = 0.002

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []

caption = ""

result_adwog_abrupt1 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

In [None]:
plot.plotoutput21(result_adwog_abrupt1, "RIVER OUTPUT", result_sk_abrupt1, "SCIKIT OUTPUT")

### 3.2. Abrupt drift 2

In [None]:
DELTA = 0.002

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_adwog_abrupt2 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

In [None]:
# plot comparison with scikit result for abupt drift 2
plot.plotoutput21(result_adwog_abrupt2, "RIVER OUTPUT", result_sk_abrupt2, "SCIKIT OUTPUT")

### 3.3. Gradual drift

In [None]:
DELTA = 0.002

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []

caption = ""

result_adwog_gradual = start_adwin(data_stream_gradual, stream_name_gradual)

In [None]:
# plot comparison with scikit result for gradual drift
plot.plotoutput21(result_adwog_gradual, "RIVER OUTPUT", result_sk_gradual, "SCIKIT OUTPUT")

## 4. Adjusting $\delta$

The only adjustable parameter the ADWIN module in the River package provides is $\delta$ (delta), which effects the threshold necessary for a difference in bucket means to trigger a drift detection. A higher $\delta$ will require a greater difference in means between two buckets to trigger a drift detection event. The default value in ADWIN module is 0.002.

### $\delta = 0.0001$

#### Abrupt drift 1 

In [None]:
DELTA = 0.0001

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_abrupt1_d0001 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

In [None]:
plot.plotoutput21(result_abrupt1_d0001, "ADWIN DELTA ADJUSTMENT", result_adwog_abrupt1, "ADWIN DELTA ADJUSTMENT")

#### Abrupt drift 2

In [None]:
DELTA = 0.0001

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_abrupt2_d0001 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

In [None]:
plot.plotoutput21(result_abrupt2_d0001, "ADWIN DELTA ADJUSTMENT", result_adwog_abrupt2, "ADWIN DELTA ADJUSTMENT")

#### Gradual drift

In [None]:
DELTA = 0.0001

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_gradual_d0001 = start_adwin(data_stream_gradual, stream_name_gradual)

In [None]:
plot.plotoutput21(result_gradual_d0001, "ADWIN DELTA ADJUSTMENT", result_adwog_gradual, "ADWIN DELTA ADJUSTMENT")

### $\delta = 0.01$

#### Abrupt drift 1

In [None]:
DELTA = 0.01

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_abrupt1_d01 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

#### Abrupt drift 2

In [None]:
DELTA = 0.01

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_abrupt2_d01 = result_adwog_abrupt1_d01 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

#### Gradual drift

In [None]:
DELTA = 0.01

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_gradual_d01 = start_adwin(data_stream_gradual, stream_name_gradual)

### $\delta = 0.1$

#### Abrupt drift 1

In [None]:
DELTA = 0.1

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_abrupt1_d1 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

#### Abrupt drift 2

In [None]:
DELTA = 0.1

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_abrupt2_d1 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

#### Gradual drift

In [None]:
DELTA = 0.1

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_gradual_d1 = start_adwin(data_stream_gradual, stream_name_gradual)

#### Summary of Delta Ranges for Abupt1

In [None]:
plot.plotoutput41(result_abrupt1_d0001, result_adwog_abrupt1, result_abrupt1_d01, result_abrupt1_d1, "ABRUPT 1")

#### Summary of Delta Ranges for Abupt2

In [None]:
plot.plotoutput41(result_abrupt2_d0001, result_adwog_abrupt2, result_abrupt2_d01, result_abrupt2_d1, "ABRUPT 2" )

#### Summary of Delta Ranges for Gradual

In [None]:
plot.plotoutput41(result_gradual_d0001, result_adwog_gradual, result_gradual_d01, result_gradual_d1, "GRADUAL - RANGE OF DELTAS")

## 5. Rescaling the input data

According to Bifet and Gavaldà (2007)[1], the input values should be between 0 and 1 <i>(emphasis added)</i>:

>The inputs to the [ADWIN] algorithms are a confidence value $\delta \in (0,1)$ and a (possibly infinite) sequence of real values $x_1, x_2, x_3, ..., x_t, ...$ The value of $x_t$ is available only at time $t$. Each $x_t$ is generated according to some distribution $D_t$, independently for every $t$. We denote with $\mu_t$ the expected value when it is drawn according to $D_t$. <strong>We assume that $x_t$ is always in $[0,1]$</strong>; by an easy rescaling, we can handle any case in which we know an interval $[a,b]$ such that $a\le x_t\le b$.

Therefore, a scaling function is needed to transform all input values between 0 and 1.

In [None]:
fig = plt.figure()
ax1 = plt.subplot(311)

color = 'tab:blue'
ax1.set_xlabel('time (t)')
ax1.set_ylabel('value')
ax1.plot(data_stream_abrupt1)
ax1.tick_params(axis='y', labelcolor=color)

ax1 = plt.subplot(312)

color = 'tab:green'
ax1.set_xlabel('time (t)')
ax1.set_ylabel('value')
ax1.plot(data_stream_abrupt2, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax1 = plt.subplot(313)

color = 'tab:purple'
ax1.set_xlabel('time (t)')
ax1.set_ylabel('value')
ax1.plot(data_stream_gradual, color=color)
ax1.tick_params(axis='y', labelcolor=color)

fig.tight_layout()
plt.show()

In [None]:
print(max(data_stream_abrupt1))
print(max(data_stream_abrupt2))
print(max(data_stream_gradual))

In [None]:
print(min(data_stream_abrupt1))
print(min(data_stream_abrupt2))
print(min(data_stream_gradual))

In [None]:
data_stream_gradual_hdrs = pd.read_csv("data/moa-single-gradual.txt", sep=",", usecols=[6])
data_stream_gradual_hdrs.columns

An analysis of the data characteristics shows that all values are between 45 and 79. Additionally looking at an input file with headers, the label on the column refers to a percent. Accordingly, all input values $x_t$ are divided by 100. Assuming $x_t \in [0,100] \implies x_t/100 = x_t' \in [0,1]$.

In [None]:
# normalize input stream
# need to normalize data to values between 0 and 1 according to
# Bifet and Gavalda (2007. "Learning from time-changing data with adaptive windowing.")
stream_norm_abrupt1 = np.interp(data_stream_abrupt1, (0, 100), (0,1) )
stream_norm_abrupt2 = np.interp(data_stream_abrupt2, (0, 100), (0,1) )
stream_norm_gradual = np.interp(data_stream_gradual, (0, 100), (0,1) )

### 5.1. Abrupt drift 1 - normed

In [None]:
DELTA = 0.002

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt1 = start_adwin(stream_norm_abrupt1, stream_name_abrupt1)

#### Compared to original (unscaled) input

In [None]:
plot.plotoutput21(result_nrm_abrupt1, "ABRUPT 1\nScaled Values", result_adwog_abrupt1, "Original Values")

### 5.2. Abrupt drift 2 - normed

In [None]:
DELTA = 0.002

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt2 = start_adwin(stream_norm_abrupt2, stream_name_abrupt2)

#### Compared to original (unscaled) input

In [None]:
plot.plotoutput21(result_nrm_abrupt2, "ABRUPT 2\nScaled Values", result_adwog_abrupt2, "Original Values")

### 5.3. Gradual drift - normed

In [None]:
DELTA = 0.002

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_gradual = start_adwin(stream_norm_gradual, stream_name_gradual)

#### Compared to original (unscaled) input

In [None]:
plot.plotoutput21(result_nrm_gradual, "GRADUAL\nScaled Values", result_adwog_gradual, "Original Values")

### 5.4. Delta adjustments - normed

#### $\delta = 0.0001$

##### Abrupt drift 1 

In [None]:
DELTA = 0.0001

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt1_d0001 = start_adwin(stream_norm_abrupt1, stream_name_abrupt1)

##### Abrupt drift 2

In [None]:
DELTA = 0.0001

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt2_d0001 = start_adwin(stream_norm_abrupt2, stream_name_abrupt2)

##### Gradual drift

In [None]:
DELTA = 0.0001

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_gradual_d0001 = start_adwin(stream_norm_gradual, stream_name_gradual)

#### $\delta = 0.01$

##### Abrupt drift 1

In [None]:
DELTA = 0.01

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt1_d01 = start_adwin(stream_norm_abrupt1, stream_name_abrupt1)

##### Abrupt drift 2

In [None]:
DELTA = 0.01

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt2_d01 = start_adwin(stream_norm_abrupt2, stream_name_abrupt2)

##### Gradual drift

In [None]:
DELTA = 0.01

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_gradual_d01 = start_adwin(stream_norm_gradual, stream_name_gradual)

#### $\delta = 0.1$

##### Abrupt drift 1

In [None]:
DELTA = 0.1

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt1_d1 = start_adwin(stream_norm_abrupt1, stream_name_abrupt1)

##### Abrupt drift 2

In [None]:
DELTA = 0.1

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt2_d1 = start_adwin(stream_norm_abrupt2, stream_name_abrupt2)

##### Gradual drift

In [None]:
DELTA = 0.1

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_gradual_d1 = start_adwin(stream_norm_gradual, stream_name_gradual)

#### $\delta = 1.0$

##### Abrupt drift 1

In [None]:
DELTA = 1.0

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt1_d10 = start_adwin(stream_norm_abrupt1, stream_name_abrupt1)

##### Abrupt drift 2

In [None]:
DELTA = 1.0

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_abrupt2_d10 = start_adwin(stream_norm_abrupt2, stream_name_abrupt2)

##### Gradual drift

In [None]:
DELTA = 1.0

adwin = ADWIN(DELTA)

window_width = []
vertical_lines = []
width_vs_variance = []
i = 0
caption = ""

result_nrm_gradual_d10 = start_adwin(stream_norm_gradual, stream_name_gradual)

#### Summary of Delta Ranges for Scaled Input

In [None]:
plot.plotoutput51(result_nrm_abrupt1_d0001, result_nrm_abrupt1, result_nrm_abrupt1_d01, result_nrm_abrupt1_d1, result_nrm_abrupt1_d10, "ABRUPT 1 - SCALED [0,1]")

In [None]:
plot.plotoutput51(result_nrm_abrupt2_d0001, result_nrm_abrupt2, result_nrm_abrupt2_d01, result_nrm_abrupt2_d1, result_nrm_abrupt2_d10, "ABRUPT 2 - SCALED [0,1]")

In [None]:
plot.plotoutput51(result_nrm_gradual_d0001, result_nrm_gradual, result_nrm_gradual_d01, result_nrm_gradual_d1, result_nrm_gradual_d10, "GRADUAL - SCALED [0,1]")

## 6. Additional library AdwinDo

Provides interface to additional parameters of ADWIN algorithm

### import libraries

In [None]:
import adwinDo_c as awd
from datetime import datetime, timedelta

### redefine helper function to add stream elements to ADWIN or AdwinDo and detect if drift occurred

In [None]:
def start_adwin(stream, stream_name):
    # statistics + methodology description
    #print("W = vector of input in Window")
    #print("n = length of W")
    #print("m = 1 / (1/n0 + 1/n1) (harmonic mean of n0 and n1)")
    
    # start timer to calculate throughput
    timer_start = datetime.now()

    # initialize variables
    window_width = []
    width_vs_variance = []
    caption = ""

    # normalize input stream
    # need to normalize data to values between 0 and 1 according to
    # Bifet and Gavalda (2007. "Learning from time-changing data with adaptive windowing.")
    stream_norm = np.interp(stream, (0, 100), (0,1) )

    # source file name for labeling output files
    source_file =  stream_name.replace("data/", "")

    data_table = []

    for idx in range(len(stream_norm)):
        #update ADWIN window, calculate statistics, and determine if drift occurred
        in_drift, in_warning = adwin.update(stream_norm[idx])
        
        window_width.append(adwin.width)    # plots window width
        if in_drift:
            prev_variance = 0.
            adwn_variance = 0.
            prev_mean = 0.
            prev_df = pd.DataFrame(adwin.prev_window.data)
            adwn_df = pd.DataFrame(adwin.ADWIN_window.data)
            if len(prev_df) > 0:
                prev_variance = np.var(prev_df['stream_value'])
                prev_mean = np.mean(prev_df['stream_value'])
            if len(adwn_df) > 0:
                adwn_variance = np.var(adwn_df['stream_value'])
            print(f"  Drift detected in index {idx} for stream value {stream_norm[idx]},\
                  \n\twidth: {len(adwin.prev_window.data) + 1} => {int(adwin.width)},\
                  \n\tmean: {prev_mean} => {adwin.adwin_mean},\
                  \n\tvariance: {prev_variance} => {adwin.adwin_var}\n\n")
            
            width_vs_variance.append((adwin.width, adwin.adwin_var, idx))
            # print(f"width vs variance {width_vs_variance} \n")


    # write drift detection data to DataFrame then CSV
    data_array = []
    for dt_row in adwin.output_data_list:
        data_array.append(dict(
            Stream_Index                = dt_row.Stream_Index,
            Stream_Value                = dt_row.Stream_Value,
            Is_Drift_Detection_Point    = dt_row.Is_Drift_Detection_Point,
            Drift_Count                 = dt_row.Drift_Count,
            ADWIN_Window_Size           = dt_row.ADWIN_Window_Size,
            ADWIN_Mean                  = dt_row.ADWIN_Mean,
            ADWIN_Variance              = dt_row.ADWIN_Variance,
            Rolling_Window_Size         = dt_row.Rolling_Window_Size,
            Rolling_Mean                = dt_row.Rolling_Mean,
            Rolling_Variance            = dt_row.Rolling_Variance,
            delta_of_Mean               = dt_row.delta_Mean,
            delta_of_Variance           = dt_row.delta_Variance
        ))
        
    drift_df = pd.DataFrame(data_array)
    drift_df.to_csv(f".\\output\\DriftsDetected-{source_file}-AdWinDo-v_{version}.csv")
    drift_comp_df = pd.DataFrame(adwin.drift_comparisons)
    drift_comp_df.to_csv(f".\\output\\DriftComparisons-{source_file}-AdWinDo-v_{version}.csv")
    
    timer_stop = datetime.now()
    timer_span = (timer_stop - timer_start) // timedelta(milliseconds=1)
    print(f"timer start: {timer_start}")
    print(f"timer stop: {timer_stop}")
    print(f"time elapsed (ms): {timer_span}")
    print(f"throughput: {1000 * len(drift_df) / timer_span} data points / second\n")

    # plot stream and window
    caption = f"delta: {str(DELTA)}    epsilon: {str(EPSILON)}       rolling window size: {str(GAMMA)}    ADWIN grace period:{str(adwin.grace_period)}\n" \
            + f"buckets: {str(adwin.max_buckets)}   clock: {str(adwin.clock)}    algo: {str(adwin.algo)}" 
    plot.plotcomps(stream, window_width, adwin.novel_drifts, adwin.repeat_drifts, abs(drift_df['ADWIN_Mean'] - drift_df['Rolling_Mean']), abs(drift_df['ADWIN_Variance'] - drift_df['Rolling_Variance']) \
        , caption, stream_name, 'data stream', 'adwin window width', 'μ_ADWIN - μ_ScndWIN', 'σ2_ADWIN - σ2_ScndWIN')
    
    # return stats for building comparison plots
    return {
        "stream": stream,
        "window_width": window_width,
        "vertical_lines": vertical_lines,
        "caption": caption,
        "stream_name": stream_name
    }

#### Configurable Parameters
The AdwinDo module provides the ability to configure the following parameters of the ADWIN algorithm. The values set below are the original values from the River ADWIN module.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.002
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 10
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 32
# max buckets - more buckets lead to smoother approximation and more accurate drift detection at the
#   expense of additional computation
MAX_BUCKETS = 5
# min window length
MIN_WINDOW_LENGTH = 5
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

In [None]:
# set version for graph output - used by updated helper function
version = "0.9.9"

# set N/A for unused parameters
EPSILON = "N/A"
GAMMA = "N/A"
PHI = "N/A"

# set drift duplicate probability parameters to values that turn off feature
#   drift duplicate filter described later in this notebook

# TURN OFF DRIFT DUPLICATE FILTER
ADWINDO_p = 1.0
ADWINDO_q = 0.0000000001

Create adwinDo object to track streaming data and raise alerts when a drift in the underlying population distribution is detected

In [None]:
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo='classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

## 6.1. Rerun Analysis with Original Data
Rerun with original values; scaling is now done inside the helper function.

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo='classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

result_awd_abrupt1 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo='classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_awd_abrupt2 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo='classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_awd_gradual = start_adwin(data_stream_gradual, stream_name_gradual)

## 6.2. Rerun Analysis with Higher-Resolution Clock ($T=10$)
Lower CLOCK parameter and rerun analysis.
At CLOCK = 10, the algorithm will check for a drift every 10 input points, as long as the ADWIN window width is greater than GRACE.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.002
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 10
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 10
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 5
# min window length
MIN_WINDOW_LENGTH = 5
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

result_abrupt1_t10  = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_abrupt2_t10  = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_gradual_t10  = start_adwin(data_stream_gradual, stream_name_gradual)

## 6.3. Rerun Analysis with Higher-Resolution Clock ($T=1$)
Lower CLOCK parameter and rerun analysis. At CLOCK = 1, the algorithm will check for a drift every 1 input points, as long as the ADWIN window width is greater than GRACE.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.002
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 10
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 5
# min window length
MIN_WINDOW_LENGTH = 5
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

result_abrupt1_t1 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_abrupt2_t1  = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
start_adwin(data_stream_gradual, stream_name_gradual)

## 6.4. Rerun Analysis with Higher-Resolution Minimum Width ($n_{\mathrm{min}}= 1$)
Lower MIN_WINDOW_LENGTH parameter and rerun analysis. At MIN_WINDOW_LENGTH = 1, the algorithm will check for a drift by comparing two sub-windows where each sub-window has at least 1 element in it. MIN_WINDOW_LENGTH is set to 1 to allow all possible cuts of ADWIN window. This increases computation expense.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.002
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 10
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 5
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

result_abrupt1_n1  = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_abrupt2_n1  = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_gradual_n1  = start_adwin(data_stream_gradual, stream_name_gradual)

# 6.5. Rerun Analysis with Higher-Resolution Grace Period
Lower GRACE parameter and rerun analysis. At GRACE = 1 (and CLOCK = 1), the algorithm will check for a drift every 1 (CLOCK) input points, as long as the ADWIN window width is greater than 1 (GRACE). GRACE = 1 is the minimum value since the ADWIN window must be split into two buckets for statistical comparison. MIN_WINDOW_LENGTH is set to 1 to allow all possible cuts of ADWIN window.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.002
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 5
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

result_abrupt1_g1  = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_abrupt2_g1  = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_gradual_g1  = start_adwin(data_stream_gradual, stream_name_gradual)

## 6.6. Rerun Analysis with Higher-Resolution Maximum Buckets ($buckets_{\mathrm{max}}=50$)
Raise MAX_BUCKETS parameter and rerun analysis. At MAX_BUCKETS = 50, the algorithm will check for a drift by dividing the ADWIN window into up to 50 partitions. This increases the probability of correctly identifying a drift.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.002
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 50
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

result_abrupt1_b50 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_abrupt2_b50 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_gradual_b50 = start_adwin(data_stream_gradual, stream_name_gradual)

## 6.7. Rerun Analysis with Higher-Resolution Maximum Buckets ($buckets_{\mathrm{max}}=100$)
Raise MAX_BUCKETS parameter and rerun analysis. At MAX_BUCKETS = 100, the algorithm will check for a drift by dividing the ADWIN window into up to 100 partitions. This increases the probability of correctly identifying a drift, but significantly increases the time and memory computation cost.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.002
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 100
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

result_abrupt1_b100 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_abrupt2_b200 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

#run
result_gradual_b100 = start_adwin(data_stream_gradual, stream_name_gradual)

## 6.8. Rerun Analysis with Higher-Resolution Maximum Buckets ($buckets_{\mathrm{max}}=250$)
Raise MAX_BUCKETS parameter and rerun analysis. At MAX_BUCKETS = 250, the algorithm will check for a drift by dividing the ADWIN window into up to 250 partitions. This increases the probability of identifying a drift.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.002
# (grace period - 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 250
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_b250  = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_b250 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_b250 = start_adwin(data_stream_gradual, stream_name_gradual)

## 6.9. Rerun Analysis with Higher-Resolution Maximum Buckets ($buckets_{\mathrm{max}}=500$)
Raise MAX_BUCKETS parameter and rerun analysis. At MAX_BUCKETS = 500, the algorithm will check for a drift by dividing the ADWIN window into up to 500 partitions. This increases the probability of identifying a drift.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.002
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 500
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_b500 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_b500  = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_b500 = start_adwin(data_stream_gradual, stream_name_gradual)

## 6.9. Rerun Analysis with Adjusted $\delta$

### 6.9.1. $\delta = 0.01, buckets_{\mathrm{max}}=50$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.01
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 50
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

#### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d01_b50 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

#### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d01_b50 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

#### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d01_b50 = start_adwin(data_stream_gradual, stream_name_gradual)

### 6.9.2. $\delta = 0.01, buckets_{\mathrm{max}}=100$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.01
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 100
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d01_b100 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d01_b100 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d01_b100 = start_adwin(data_stream_gradual, stream_name_gradual)

### 6.9.3. $\delta = 0.1, buckets_{\mathrm{max}}=50$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.1
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 50
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d1_b50 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d1_b50 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d1_b50 = start_adwin(data_stream_gradual, stream_name_gradual)

### 6.9.4. $\delta = 0.1, buckets_{\mathrm{max}}=100$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.1
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 100
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d1_b100 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d1_b100 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d1_b100 = start_adwin(data_stream_gradual, stream_name_gradual)

### 6.9.5. $\delta = 0.1, buckets_{\mathrm{max}}=250$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.1
# (grace period - 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 250
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d1_b250 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run


result_abrupt2_d1_b250 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d1_b250 = start_adwin(data_stream_gradual, stream_name_gradual)

### 6.9.6. $\delta = 0.2, buckets_{\mathrm{max}}=100$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.2
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 100
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d2_b100 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d2_b100 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d2_b100 = start_adwin(data_stream_gradual, stream_name_gradual)

### 6.9.7. $\delta = 0.2, buckets_{\mathrm{max}}=400$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.2
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 400
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

#### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d2_b400 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d2_b400 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d2_b400 = start_adwin(data_stream_gradual, stream_name_gradual)

## 6.10. Rerun Analysis with Other Adjusted Parameters

### 6.10.1. $\delta = 0.1, buckets_{\mathrm{max}}=100, n_{\mathrm{min}}= 10$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.1
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 100
# min window length
MIN_WINDOW_LENGTH = 10
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

#### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d1_b100_n10 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

#### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d1_b100_n10 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

#### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d1_b100_n10 = start_adwin(data_stream_gradual, stream_name_gradual)

### 6.10.2. $\delta = 0.2, buckets_{\mathrm{max}}=400, grace = 20$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.2
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 20
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 400
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d2_b400_g20 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d2_b400_g20 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d2_b400_g20 = start_adwin(data_stream_gradual, stream_name_gradual)

### 6.10.3. $\delta = 0.2, buckets_{\mathrm{max}}=400, grace = 15, T = 32, n_{\mathrm{min}}=8$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.2
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 15
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 32
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 400
# min window length
MIN_WINDOW_LENGTH = 8
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d2_b400_g15_t32_n8 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d2_b400_g15_t32_n8 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d2_b400_g15_t32_n8 = start_adwin(data_stream_gradual, stream_name_gradual)

### 6.10.4. $\delta = 0.2, buckets_{\mathrm{max}}=400, grace = 15, T = 16, n_{\mathrm{min}}=8$

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.2
# (grace period + 1) is minimum width of ADWIN window at which a drift analysis is run
GRACE = 15
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 16
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 400
# min window length
MIN_WINDOW_LENGTH = 8
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt1_d2_b400_g15_t16_n8 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_abrupt2_d2_b400_g15_t16_n8 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic', adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q)

# run
result_gradual_d2_b400_g15_t16_n8 = start_adwin(data_stream_gradual, stream_name_gradual)

## 7. Duplicate Drift Pattern Detection
Adjusting the parameters to shorten the response time to drifts also increases the number of positive responses. The most influential factor in this trade-off is the maximum number of buckets. An algorithm is added to detect when a drift matches the pattern of a previous drift. This is based on:

1. statistical tests to determine if the data points in the windows before the drift are from the same population
2. statistical tests to determine if the data points in the windows after the drift are from the same population
3. descriptive statistics to compare the rates of changes between the two drifts


### New parameters

The duplicate drift pattern detection utilizes two new parameters:

1. <strong>ADWINDO_p</strong>: the p-value threshold for statistical tests. If the combined p-value of either the pre-drift windows or post-drift windows is above this threshold, the drift is considered a duplicate.
2. <strong>ADWINDO_q</strong>: the average absolute difference threshold for descriptive statistics. If the average of the absolute differences between the two drifts is below q, the drift is considered a duplicate.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.2
# p is the threshold probability to label a drift detected as a repeat drift pattern
ADWINDO_p = 0.70
# q is the limit on average absolute deltas for descriptive statistic between two drifts
ADWINDO_q = 0.15
# grace period is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 400
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic')

# run
result_abrupt1_d2_b400_g1_t1_n1_p7_q15 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic')

# run
result_abrupt2_d2_b400_g1_t1_n1_p7_q15 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH, algo = 'classic')

#run
result_gradual_d2_b400_g1_t1_n1_p7_q15 = start_adwin(data_stream_gradual, stream_name_gradual)

## 8. AdwinDo Algorithm
In addition to the classic ADWIN window ($W$), keep a fixed-width rolling window of width $\gamma$ (gamma). When the delta of window means between the ADWIN window and the sliding window reaches or exceeds $\epsilon$ (epsilon), reduce $W$ to the most recent $\gamma * \phi$ elements, where $\phi$ (phi) is the percentage of $\gamma$ to reduce $W$ to.

In [None]:
# \delta is ADWIN algorithm threshhold; must be between 0 and 1
DELTA = 0.2
# \epsilon := ADWIN vs. rolling window mean difference threshhold
EPSILON = 0.025
# \gamma := width of rolling window
GAMMA = 100
# \phi := percentage of \gamma to reset AdwinDo window width to on drift detection
PHI = 0.1
# p is the threshhold probability to label a drift detected as a repeat drift pattern
ADWINDO_p = 0.7
# q is the limit on average absolute deltas for descriptive statistic between two drifts
ADWINDO_q = 0.15
# grace period is minimum width of ADWIN window at which a drift analysis is run
GRACE = 1
# clock is the period (1/frequency) in number of data entry points for checking for drift
# higher means it will check less often
# e.g. if clock = 60, it only checks for a drift on every 60th data point
CLOCK = 1
# max buckets - more buckets lead to smoother approximation and more accurate drift detection with the
#   expense of additional computation
MAX_BUCKETS = 250
# min window length
MIN_WINDOW_LENGTH = 1
# the seed is the starting point in the "random" number generator table, setting the seed ensures repeatable "random" numbers for running experiments - cET
SEED = np.random.seed(42)

### Abrupt 1

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, epsilon = EPSILON, gamma = GAMMA, phi = PHI, adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH)

# run
result_roll_abrupt1_d2_b250_g1_t1_n1_p7_q15_e025_ga100_ph1 = start_adwin(data_stream_abrupt1, stream_name_abrupt1)

### Abrupt 2

In [None]:
#reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, epsilon = EPSILON, gamma = GAMMA, phi = PHI, adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH)

#run
result_roll_abrupt2_d2_b250_g1_t1_n1_p7_q15_e025_ga100_ph1 = start_adwin(data_stream_abrupt2, stream_name_abrupt2)

### Gradual Drift

In [None]:
# reset ADWIN
adwin = awd.ADWINDO(delta = DELTA, epsilon = EPSILON, gamma = GAMMA, phi = PHI, adwindo_p = ADWINDO_p, adwindo_q = ADWINDO_q, clock = CLOCK, grace = GRACE, max_buckets = MAX_BUCKETS, min_window_length = MIN_WINDOW_LENGTH)

# run
result_roll_gradual_d2_b250_g1_t1_n1_p7_q15_e025_ga100_ph1 = start_adwin(data_stream_gradual, stream_name_gradual)

## References

1. Learning from Time-Changing Data with Adaptive Windowing  
   Albert Bifet and Ricard Gavaldà  
Proceedings of the 2007 SIAM International Conference on Data Mining (SDM). 2007, 443-448 