# Evaluation of MSSW

In [1]:
print('HEYYYY')

HEYYYY


## MSSW Algorithm

### MSSW Preprocessing

In [2]:
import numpy as np
import scipy
from sklearn.preprocessing import MinMaxScaler


def ptg_for_all(reference_data):
    """
    Calculate all P_tgs from reference data

    :param reference_data: array of shape (#points, #attributes) of reference data
    :return: array of shape (#points, #attribute) of corresponding P_tgs
    """
    column_sum = np.sum(reference_data, axis=0)
    return np.divide(reference_data, column_sum)


def information_utilities_for_all(ptgs):
    """
    Calculate information utility values from P_tgs

    :param ptgs: P_tgs as obtained from ptg_for_all(...)
    :return: array of shape (1, #attributes) of the information utility of each attribute
    """
    entropies = np.divide(scipy.stats.entropy(ptgs, axis=0), ptgs.shape[0])
    information_utilities = np.subtract(1, entropies).reshape((1, entropies.shape[0]))
    return information_utilities


def attribute_weights_for_all(information_utilities):
    """
    Calculate the weights of attributes from information utilities

    :param information_utilities: information utilities as obtained from information_utilities_for_all(...)
    :return: array of shape (1, #attributes) of the attribute weights of each attribute
    """
    attribute_weights = np.divide(information_utilities, np.sum(information_utilities))
    return attribute_weights


def get_attribute_weights_from(reference_data):
    """
    Calculate weights of attributes from reference (benchmark) data

    :param reference_data: array of shape (#points, #attributes)
    :return: array of shape (1, #attributes) of the attribute weights of each attribute
    """
    ptgs = ptg_for_all(reference_data)
    information_utilities = information_utilities_for_all(ptgs)
    attribute_weights = attribute_weights_for_all(information_utilities)
    return attribute_weights


def transform_data_by_attribute_weights(original_data, attribute_weights):
    """
    Transform data by the sqrt of attribute weights

    :param original_data: array of shape (#points, #attributes) to transform
    :param attribute_weights: array of shape (1, #attributes) to use for the transformation
    :return: array of shape (#points, #attributes) of weighted data
    """
    sqrt_attribute_weights = np.sqrt(attribute_weights)
    weighted_data = np.multiply(original_data, sqrt_attribute_weights)
    return weighted_data


def transform_batches_by_attribute_weights(original_batches, attribute_weights):
    """
    Transform multiple batches of data by the sqrt of attribute weights

    :param original_batches: list of arrays of shape (n_i, #attributes), i=batch number, n_i > 1
    :param attribute_weights: array of shape (1, #attributes) of weights to use for the transformation
    :return: list of arrays of shape(n_i, #attributes) of weighted data
    """
    weighted_batches = []
    for original_batch in original_batches:
        weighted_batches.append(transform_data_by_attribute_weights(original_batch, attribute_weights))
    return weighted_batches


def mssw_preprocess(reference_data_batches, testing_data_batches):
    """
    Preprocess data batches through minmax scaling, apply weighting so that Euclidean distance on this weighted data
    becomes the desired entropy-weighted distance on the original data

    :param reference_data_batches: list of arrays of shape (n_r_r, #attributes), r_r=reference batch number,
        n_r_r=#points in this batch
    :param testing_data_batches: list of arrays of shape (n_r_t, #attributes), r_t=testing batch number,
        n_r_t=#points in this batch
    :return: (array of shape (sum(n_r_r) #attributes) of joined reference data, weighted reference batches (same
        structure as reference_data_batches), weighted testing batches (same structure as testing_data_batches))
    """
    joined_reference_data = reference_data_batches[0]
    for reference_batch in reference_data_batches[1:]:
        np.append(joined_reference_data, reference_batch, axis=0)

    scaler = MinMaxScaler()
    scaler.fit(joined_reference_data)
    joined_reference_data = scaler.transform(joined_reference_data)
    reference_data_batches = [scaler.transform(batch) for batch in reference_data_batches]
    testing_data_batches = [scaler.transform(batch) for batch in testing_data_batches]

    attribute_weights = get_attribute_weights_from(joined_reference_data)
    weighted_joined_reference_data = transform_data_by_attribute_weights(joined_reference_data, attribute_weights)
    weighted_reference_batches =\
        [transform_data_by_attribute_weights(batch, attribute_weights) for batch in reference_data_batches]
    weighted_testing_batches =\
        [transform_data_by_attribute_weights(batch, attribute_weights) for batch in testing_data_batches]
    return weighted_joined_reference_data, weighted_reference_batches, weighted_testing_batches


### MSSW

In [3]:
"""
Drift detection algorithm from
[1] Y. Yuan, Z. Wang, and W. Wang,
“Unsupervised concept drift detection based on multi-scale slide windows,”
Ad Hoc Networks, vol. 111, p. 102325, Feb. 2021, doi: 10.1016/j.adhoc.2020.102325.

MSSW is an abbreviation for Multi-Scale Sliding Windows

- Unless specified otherwise, functions in this file work with numpy arrays
- The terms "benchmark data" and "reference data" mean the same thing, default is "reference data"
- The terms "slide data" and "testing data" mean the same thing, default is "testing data"
"""
import inspect
import time
import numpy as np
from sklearn.cluster import KMeans


def obtain_cluster_distances_and_sizes(weighted_sub_window, fitted_kmeans, n_clusters):
    """
    Get the sum of centroid distances and size for clusters formed by fitted_kmeans and weighted_sub_window

    :param weighted_sub_window: array of shape (#points, #attributes) of weighted data
    :param fitted_kmeans: fitted sklearn kmeans object to use for clustering of the weighted_sub_window
    :param n_clusters: number of clusters used to fit the kmeans object
    :return: (array of shape (1, n_clusters) of sums of centroid distances,
    array of shape (1, n_clusters) of cluster sizes)
    """
    centroids = fitted_kmeans.cluster_centers_
    predicted_cluster_labels = fitted_kmeans.predict(weighted_sub_window)

    centroid_distance_sums = np.zeros(n_clusters).reshape((1, n_clusters))
    num_points_in_clusters = np.zeros(n_clusters).reshape((1, n_clusters))
    for cluster_id in range(n_clusters):
        cluster_mask = predicted_cluster_labels == cluster_id
        cluster = weighted_sub_window[cluster_mask]

        num_points_in_clusters[0, cluster_id] = cluster.shape[0]

        centroid = centroids[cluster_id]
        centroid_diffs = np.subtract(cluster, centroid)
        euclideans = np.linalg.norm(centroid_diffs, axis=1)
        sum_euclideans = np.sum(euclideans)
        centroid_distance_sums[0, cluster_id] = sum_euclideans

    return centroid_distance_sums, num_points_in_clusters


def calculate_clustering_statistics(weighted_sub_window, fitted_kmeans, n_clusters):
    """
    Cluster the given weighted_sub_window, and then obtain JSEE, Av_ci for all i, and Av_sr from it

    :param weighted_sub_window: array of shape (#points, #attributes) of weighted data
    :param fitted_kmeans: fitted sklearn kmeans object to use for clustering of the weighted_sub_window
    :param n_clusters: number of clusters used to fit the kmeans object
    :return: (JSEE float, Av_ci array of shape (1, #attributes), Av_sr float)
    """
    centroid_distance_sums, num_points_in_clusters = obtain_cluster_distances_and_sizes(
        weighted_sub_window, fitted_kmeans, n_clusters
    )

    JSEE = np.sum(centroid_distance_sums)
    Av_c = np.divide(centroid_distance_sums, num_points_in_clusters)
    Av_sr = JSEE / weighted_sub_window.shape[0]
    return JSEE, Av_c, Av_sr


def get_s_s(weighted_reference_sub_windows, fitted_kmeans, n_clusters):
    """
    Get S_s = the total average distance sequence of sub-windows in reference (benchmark) data

    :param weighted_reference_sub_windows: list of arrays of shape (n_r, #attributes) of weighted reference data,
        r is the sub-window number, n_r=#points in this sub-window
    :param fitted_kmeans: sklearn kmeans object previously fitted to weighted reference (benchmark) data
    :param n_clusters: number of clusters used to fit the kmeans object
    :return: array of shape (1, len(weighted_reference_sub_windows))
    """
    num_sub_windows = len(weighted_reference_sub_windows)
    s_s = np.zeros(num_sub_windows).reshape((1, num_sub_windows))
    for i, weighted_reference_sub_window in enumerate(weighted_reference_sub_windows):
        _, _, Av_sr = calculate_clustering_statistics(weighted_reference_sub_window, fitted_kmeans, n_clusters)
        s_s[0, i] = Av_sr
    return s_s


def get_moving_ranges(s_s):
    """
    Get moving ranges (MR_i) for each sub-window from S_s

    :param s_s: s_s as obtained from get_s_s(...)
    :return: array of shape (1, len(s_s)-1)
    """
    moving_ranges = np.abs(np.subtract(s_s[:, 1:], s_s[:, :-1]))
    return moving_ranges


def get_mean_s_s_and_mean_moving_ranges(weighted_reference_sub_windows, fitted_kmeans, n_clusters):
    """
    Find the S_s and MR sequences and return their mean

    :param weighted_reference_sub_windows: list of arrays of shape (n_r, #attributes) of weighted reference data,
        r is the sub-window number, n_r=#points in this sub-window
    :param fitted_kmeans: sklearn kmeans object previously fitted to weighted reference (benchmark) data
    :param n_clusters: number of clusters used to fit the kmeans object
    :return: (mean of S_s as float, mean of MR as float)
    """
    s_s = get_s_s(weighted_reference_sub_windows, fitted_kmeans, n_clusters)
    moving_ranges = get_moving_ranges(s_s)
    return np.mean(s_s), np.mean(moving_ranges)


# - function to test for concept drift based on the total average distance from one testing (slide) sub-window
def concept_drift_detected(mean_av_s, mean_mr, weighted_testing_sub_window, fitted_kmeans, n_clusters, coeff):
    """
    Test for concept drift in one weighted testing sub-window

    :param mean_av_s: mean_s_s as obtained from get_mean_s_s_and_mean_moving_ranges(...)
    :param mean_mr: mean_mr as obtained from get_mean_s_s_and_mean_moving_ranges(...)
    :param weighted_testing_sub_window: array of shape (#points, #attributes) of one weighted testing sub-window
    :param fitted_kmeans: sklearn kmeans object previously fitted to weighted reference (benchmark) data
    :param n_clusters: number of clusters used to fit the kmeans object
    :param coeff: drift detection coefficient
    :return: True if drift is detected, False otherwise
    """
    UCL_Av_s = mean_av_s + coeff * mean_mr
    LCL_Av_s = mean_av_s - coeff * mean_mr
    _, _, test_Av_sr = calculate_clustering_statistics(weighted_testing_sub_window, fitted_kmeans, n_clusters)

    return not (LCL_Av_s < test_Av_sr < UCL_Av_s)


def all_drifting_batches(
        reference_data_batches,
        testing_data_batches,
        n_clusters=2,
        n_init=10,
        max_iter=300,
        tol=1e-4,
        random_state=None,
        coeff=2.66
):
    """
    Find all drift locations based on the given reference and testing batches

    :param reference_data_batches: list of arrays of shape (n_r_r, #attributes), r_r=reference batch number,
        n_r_r=#points in this batch
    :param testing_data_batches: list of arrays of shape (n_r_t, #attributes), r_t=testing batch number,
        n_r_t=#points in this batch
    :param n_clusters: desired number of clusters for kmeans
    :param random_state: used to potentially control randomness - see sklearn.cluster.KMeans random_state
    :param coeff: coeff used to detect drift, default=2.66
    :return: a boolean list, length=len(testing_data_batches),
        an entry is True if drift was detected there and False otherwise
    """
    weighted_joined_reference_data, weighted_reference_batches, weighted_testing_batches =\
        mssw_preprocess(reference_data_batches, testing_data_batches)

    fitted_kmeans = KMeans(
        n_clusters=n_clusters,
        n_init=n_init,
        max_iter=max_iter,
        tol=tol,
        random_state=random_state
    ).fit(weighted_joined_reference_data)
    mean_av_s, mean_mr = get_mean_s_s_and_mean_moving_ranges(weighted_reference_batches, fitted_kmeans, n_clusters)

    drifts_detected = []
    for weighted_testing_batch in weighted_testing_batches:
        drifts_detected.append(concept_drift_detected(
            mean_av_s, mean_mr, weighted_testing_batch, fitted_kmeans, n_clusters, coeff))
    return drifts_detected

## MSSW Evaluation Helpers

### MSSW Metrics

In [4]:
def fpr_and_latency_when_averaging(drift_locations, num_test_batches, true_drift_idx):
    """The inputs drift_locations and true_drift_idx are is zero-indexed"""
    fpr = 0
    latency = 1
    drift_locations_arr = np.array(drift_locations)
    signal_locations_before_drift = drift_locations_arr[drift_locations_arr < true_drift_idx]
    signal_locations_not_before_drift = drift_locations_arr[drift_locations_arr >= true_drift_idx]
    num_batches_after_first_drift = num_test_batches - (true_drift_idx + 1)
    drift_detected = False # says whether some drift detection was triggered at or after a drift occurrence

    if len(drift_locations) >= 1:
        if len(signal_locations_before_drift) > 0:
            fpr = len(signal_locations_before_drift) / true_drift_idx
        if len(signal_locations_not_before_drift) > 0:
            first_useful_drift_signal = signal_locations_not_before_drift[0]
            latency = (first_useful_drift_signal - true_drift_idx) / num_batches_after_first_drift
            drift_detected = True

    return fpr, latency, drift_detected

### MSSW Randomness-Robust Evaluation

In [5]:
def all_drifting_batches_randomness_robust(reference_data_batches, testing_data_batches, n_clusters=2, n_init=10,
                                           max_iter=300, tol=1e-4, coeff=2.66, true_drift_idx=2, first_random_state=0,
                                           min_runs=10, std_err_threshold=0.05):
    """
    Repeat running mssw.mssw.all_drifting_batches(...) until the s.e. of metrics from different runs is low enough

    :param n_init:
    :param max_iter:
    :param tol:
    :param reference_data_batches: list of arrays of shape (n_r_r, #attributes), r_r=reference batch number,
        n_r_r=#points in this batch
    :param testing_data_batches: list of arrays of shape (n_r_t, #attributes), r_t=testing batch number,
        n_r_t=#points in this batch
    :param n_clusters: desired number of clusters for kmeans
    :param first_random_state: random states used will be incremented from this one
    :param coeff: coeff used to detect drift, default=2.66
    :param std_err_threshold: threshold to stop executing the mssw algorithm
    :return: a list of lists from all_drifting_batches(...), and the mean and s.e. of FPR and latency
    """
    print('min_runs', min_runs)

    fprs = []
    latencies = []
    runs_results_bool = []
    fpr_std_err = -1
    latency_std_err = -1
    num_runs = 0
    random_state = first_random_state
    while num_runs < min_runs or max(fpr_std_err, latency_std_err) > std_err_threshold:
        drifting_batches_bool = mssw.mssw.all_drifting_batches(
            reference_data_batches,
            testing_data_batches,
            n_clusters=n_clusters,
            n_init=n_init,
            max_iter=max_iter,
            tol=tol,
            random_state=random_state,
            coeff=coeff
        )
        # print('drifting_batches_bool')
        # print(drifting_batches_bool)
        drift_locations = np.arange(len(drifting_batches_bool))[drifting_batches_bool]
        # print('drift_locations')
        # print(drift_locations)
        fpr, latency, _ = fpr_and_latency_when_averaging(
            drift_locations,
            len(testing_data_batches),
            true_drift_idx
        )
        fprs.append(fpr)
        latencies.append(latency)
        runs_results_bool.append(drifting_batches_bool)
        num_runs += 1
        random_state += n_init

        # print('number of runs', num_runs)
        if num_runs >= min_runs:
            fpr_std_err = np.std(fprs) / np.sqrt(len(fprs))
            latency_std_err = np.std(latencies) / np.sqrt(len(latencies))
        # print('fprs', fprs, 's.e.', fpr_std_err)
        # print('latencies', latencies, 's.e.', latency_std_err)

    final_fpr_mean = np.mean(fprs)
    final_latency_mean = np.mean(latencies)
    return runs_results_bool, final_fpr_mean, fpr_std_err, final_latency_mean, latency_std_err

## MSSW Evaluation on Local Datasets

## Synthetic Dataset Locations

In [6]:
abrupt_sea_path = '../Datasets_concept_drift/synthetic_data/abrupt_drift/sea_1_abrupt_drift_0_noise_balanced.arff'
abrupt_agraw1_path = 'Datasets_concept_drift/synthetic_data/abrupt_drift/agraw1_1_abrupt_drift_0_noise_balanced.arff'
abrupt_agraw2_path = 'Datasets_concept_drift/synthetic_data/abrupt_drift/agraw2_1_abrupt_drift_0_noise_balanced.arff'

gradual_sea_paths = [
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_05.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_1.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_5.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_10.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_20.arff'
]

gradual_agraw1_paths = [
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_05.arff',
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_1.arff',
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_5.arff',
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_10.arff',
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_20.arff'
]

gradual_agraw2_paths = [
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_05.arff',
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_1.arff',
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_5.arff',
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_10.arff',
    'Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_20.arff'
]

all_sea_data_paths = [abrupt_sea_path] + gradual_sea_paths
all_agraw1_data_paths = [abrupt_agraw1_path] + gradual_agraw1_paths
all_agraw2_data_paths = [abrupt_agraw2_path] + gradual_agraw2_paths

only_numerical_data_paths = [abrupt_sea_path] + gradual_sea_paths
only_mixed_data_paths = [abrupt_agraw1_path] + gradual_agraw1_paths + [abrupt_agraw2_path] + gradual_agraw2_paths

## Evaluating SEA

### Obtaining Batches

In [7]:
import sklearn
from scipy.io import arff
import pandas as pd
import numpy as np

def column_values_to_string(df, columns):
    for column in columns:
        df[column] = df[column].str.decode('utf-8')
    return df

sea_reference_batches = {}
sea_testing_batches = {}

for file_path in all_sea_data_paths:
    data = arff.loadarff(file_path)
    df = pd.DataFrame(data[0])
    
    df_y = column_values_to_string(df[['class']], ['class'])
    df_x = df.drop(columns='class')

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    reference_data = df_x_ref.to_numpy()
    testing_data = df_x_test.to_numpy()
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    sea_reference_batches[file_path] = ref_batches
    sea_testing_batches[file_path] = test_batches
    
print(sea_reference_batches)
print(sea_testing_batches)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


{'../Datasets_concept_drift/synthetic_data/abrupt_drift/sea_1_abrupt_drift_0_noise_balanced.arff': [array([[7.30878191, 4.10080811, 2.07714841],
       [5.83353857, 0.42298334, 7.61674693],
       [1.39762683, 6.9494798 , 8.05227771],
       ...,
       [2.44944393, 1.18319878, 0.66092076],
       [4.13414227, 5.6603355 , 1.40371031],
       [0.79410049, 1.95041641, 0.80560304]]), array([[9.04975341, 9.77234334, 8.45628655],
       [3.99493855, 2.50308712, 3.89978574],
       [7.38903589, 4.68834837, 3.15931014],
       ...,
       [2.35440974, 3.99262033, 6.30371527],
       [1.05072289, 8.06634262, 8.23389443],
       [4.37366571, 0.57454544, 6.17180829]]), array([[9.65495012, 0.09388668, 7.47905497],
       [2.45062219, 1.61325991, 9.40223334],
       [9.36198364, 7.80269722, 4.91227934],
       ...,
       [3.13653612, 3.78971742, 4.85247618],
       [0.82259566, 9.99325225, 6.16945617],
       [0.73806547, 5.02972058, 0.6496514 ]])], '../Datasets_concept_drift/synthetic_data/gradu

### Inspecting Clustering to use Reasonable Parameters

In [8]:
import sys
orig_stdout = sys.stdout
sys.stdout = open('output.txt', 'wt')

sea_abrupt_ref_batches = sea_reference_batches[abrupt_sea_path]
sea_abrupt_test_batches = sea_testing_batches[abrupt_sea_path]

weighted_joined_reference_data, weighted_reference_batches, weighted_testing_batches =\
        mssw_preprocess(sea_abrupt_ref_batches, sea_abrupt_test_batches)

fitted_kmeans = KMeans(
        n_clusters=2,
        n_init=100,
        max_iter=1000,
        tol=0.0000001,
        verbose=1,
        random_state=1053
    ).fit(weighted_joined_reference_data)

sys.stdout = orig_stdout
print('something')

something


### Using the Obtained Parameters to Evaluate the Algorithm on SEA Abrupt

In [14]:
sys.stdout = orig_stdout
sea_abrupt_all_drifting_batches = all_drifting_batches(sea_abrupt_ref_batches,
        sea_abrupt_test_batches,
        n_clusters=2,
        n_init=100,
        max_iter=500,
        tol=1e-7,
        random_state=0,
        coeff=2.66)

print('sea_abrupt_all_drifting_batches', sea_abrupt_all_drifting_batches)

sea_abrupt_all_drifting_batches [False, False, True, True, True, True, True]


### Using the Obtained Parameters to Evaluate the Algorithm on All SEA Datasets

In [15]:
sea_all_drifting_batches = {}
for sea_path in all_sea_data_paths:
    sea_ref_batches = sea_reference_batches[sea_path]
    sea_test_batches = sea_testing_batches[sea_path]
    
    sea_all_drifting_batches[sea_path] = all_drifting_batches(sea_ref_batches,
        sea_test_batches,
        n_clusters=2,
        n_init=100,
        max_iter=500,
        tol=1e-7,
        random_state=0,
        coeff=2.66)
    
print(sea_all_drifting_batches)

{'../Datasets_concept_drift/synthetic_data/abrupt_drift/sea_1_abrupt_drift_0_noise_balanced.arff': [False, False, True, True, True, True, True], '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_05.arff': [False, False, True, True, True, True, True], '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_1.arff': [False, False, True, True, True, True, True], '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_5.arff': [False, False, True, True, True, True, True], '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_10.arff': [False, False, False, True, True, True, True], '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_20.arff': [False, False, False, True, True, True, True]}
